256 lines
9.5 KiB
Python
256 lines
9.5 KiB
Python
import json
|
|
import os
|
|
|
|
def find_table_boundaries(lines):
|
|
"""
|
|
Finds the overall boundaries of the table structure from LINE entities.
|
|
It assumes the lowest lines form the header.
|
|
|
|
Returns:
|
|
dict: A dictionary with min/max coordinates for X and Y.
|
|
"""
|
|
if not lines:
|
|
return None, None
|
|
|
|
x_coords = []
|
|
y_coords = []
|
|
for line in lines:
|
|
x_coords.extend([line['start'][0], line['end'][0]])
|
|
y_coords.extend([line['start'][1], line['end'][1]])
|
|
|
|
min_x, max_x = min(x_coords), max(x_coords)
|
|
|
|
# The header is at the bottom, so find the lowest Y coordinates for horizontal lines
|
|
horiz_lines_y = sorted(list(set(
|
|
line['start'][1]
|
|
for line in lines
|
|
if abs(line['start'][1] - line['end'][1]) < 0.1
|
|
)))
|
|
|
|
# Assume the header is composed of the bottom two sections
|
|
if len(horiz_lines_y) < 3:
|
|
print("Warning: Could not clearly identify a 2-row header structure.")
|
|
# Fallback for a single-row header
|
|
if len(horiz_lines_y) < 2:
|
|
return None, None
|
|
y_bottom = horiz_lines_y[0]
|
|
y_top = horiz_lines_y[1]
|
|
data_start_y = y_top # Data rows start where the header ends
|
|
else:
|
|
y_bottom = horiz_lines_y[0]
|
|
y_middle = horiz_lines_y[1]
|
|
y_top = horiz_lines_y[2]
|
|
data_start_y = y_top # Data rows start above the header top line
|
|
|
|
# Get vertical column dividers' absolute X coordinates
|
|
vert_lines_x = sorted(list(set(
|
|
round(line['start'][0], 2)
|
|
for line in lines
|
|
if abs(line['start'][0] - line['end'][0]) < 0.1
|
|
)))
|
|
|
|
boundaries = {
|
|
"x_min": min_x, "x_max": max_x,
|
|
"y_min": y_bottom, "y_max": y_top,
|
|
"header_total_height": y_top - y_bottom,
|
|
"data_start_y": data_start_y
|
|
}
|
|
|
|
# Return boundaries and the absolute X coords of vertical lines
|
|
return boundaries, vert_lines_x
|
|
|
|
def generate_header_template(data, bounds, col_x_coords_abs):
|
|
"""
|
|
Generates the header part of the template from extracted entity data,
|
|
including the exact line geometry.
|
|
"""
|
|
lines = data.get("lines", [])
|
|
texts = data.get("texts", [])
|
|
if not bounds:
|
|
print("Could not determine table boundaries for header. Aborting.")
|
|
return None
|
|
|
|
table_base_x = bounds['x_min']
|
|
table_base_y = bounds['y_min']
|
|
|
|
# --- Identify texts that are within the header boundaries ---
|
|
header_texts_data = []
|
|
for text in texts:
|
|
text_y = text['insert_point'][1]
|
|
if bounds['y_min'] <= text_y <= bounds['y_max']:
|
|
rel_x = text['insert_point'][0] - table_base_x
|
|
rel_y = text_y - table_base_y
|
|
header_texts_data.append({
|
|
"content": text['content'],
|
|
"relative_pos": [round(rel_x, 2), round(rel_y, 2)],
|
|
"alignment": text.get("alignment", "BOTTOM_LEFT"),
|
|
"height": text['height'],
|
|
"style": text['style'],
|
|
"layer": text['layer'],
|
|
"color": text['color']
|
|
})
|
|
|
|
# --- Identify LINES that are within the header boundaries ---
|
|
header_lines_data = []
|
|
for line in lines:
|
|
start_y = line['start'][1]
|
|
end_y = line['end'][1]
|
|
# Check if the line is roughly within the header's Y-span
|
|
if bounds['y_min'] - 0.1 <= start_y <= bounds['y_max'] + 0.1 and \
|
|
bounds['y_min'] - 0.1 <= end_y <= bounds['y_max'] + 0.1:
|
|
|
|
start_rel_x = line['start'][0] - table_base_x
|
|
start_rel_y = start_y - table_base_y
|
|
end_rel_x = line['end'][0] - table_base_x
|
|
end_rel_y = end_y - table_base_y
|
|
|
|
header_lines_data.append({
|
|
"start": [round(start_rel_x, 2), round(start_rel_y, 2)],
|
|
"end": [round(end_rel_x, 2), round(end_rel_y, 2)]
|
|
})
|
|
|
|
|
|
# --- Build the final template structure ---
|
|
col_boundaries_relative = [round(x - table_base_x, 2) for x in col_x_coords_abs]
|
|
|
|
template = {
|
|
"template_name": "标准物料清单-底部表头",
|
|
"row_height": 8.0,
|
|
"header_height": round(bounds['header_total_height'], 2),
|
|
"column_boundaries": col_boundaries_relative,
|
|
"header_definition": {
|
|
"lines": header_lines_data,
|
|
"texts": sorted(header_texts_data, key=lambda x: (x['relative_pos'][1], x['relative_pos'][0]), reverse=True)
|
|
},
|
|
"column_definitions": {}
|
|
}
|
|
|
|
return template
|
|
|
|
|
|
def generate_column_definitions(data, bounds, col_x_coords_abs, header_template):
|
|
"""
|
|
Analyzes the data rows to determine the pattern for each column.
|
|
"""
|
|
texts = data.get("texts", [])
|
|
table_base_x = bounds['x_min']
|
|
|
|
# Use the header text to identify columns
|
|
header_texts = header_template["header_definition"]["texts"]
|
|
|
|
# Find one distinct piece of text per column from the top row of the header to name the column
|
|
col_names = {} # Maps col_idx -> col_name
|
|
header_texts_by_col = [[] for _ in col_x_coords_abs]
|
|
for text in header_texts:
|
|
text_x = text["relative_pos"][0] + table_base_x
|
|
for i in range(len(col_x_coords_abs) - 1):
|
|
if col_x_coords_abs[i] <= text_x < col_x_coords_abs[i+1]:
|
|
header_texts_by_col[i].append(text)
|
|
break
|
|
|
|
# Get column names from header
|
|
for i, col_texts in enumerate(header_texts_by_col):
|
|
main_text = next((t for t in col_texts if t['height'] == 3.5 and 'PARTS' not in t['content']), None)
|
|
if main_text:
|
|
col_names[i] = main_text['content'].strip()
|
|
|
|
# --- Find text patterns in the first data row ---
|
|
first_data_row_y = bounds["data_start_y"]
|
|
data_row_texts = [
|
|
t for t in texts
|
|
if first_data_row_y < t['insert_point'][1] < first_data_row_y + 8.0
|
|
]
|
|
|
|
col_defs_list = []
|
|
for col_idx, col_name in col_names.items():
|
|
col_left_x_abs = col_x_coords_abs[col_idx]
|
|
col_right_x_abs = col_x_coords_abs[col_idx+1] if col_idx + 1 < len(col_x_coords_abs) else bounds['x_max']
|
|
|
|
texts_in_col = [
|
|
t for t in data_row_texts
|
|
if col_left_x_abs <= t['insert_point'][0] < col_right_x_abs
|
|
]
|
|
|
|
text_defs_for_col = []
|
|
for i, text in enumerate(texts_in_col):
|
|
key = "main" # Default key
|
|
if len(texts_in_col) > 1:
|
|
if text['height'] == 3.5: key = "chinese_name"
|
|
elif text['height'] == 2.0: key = "english_name"
|
|
elif text['height'] == 3.0 and i > 0: key = "specification"
|
|
|
|
row_bottom_y = bounds["data_start_y"]
|
|
|
|
text_defs_for_col.append({
|
|
"data_key": key,
|
|
"relative_pos": [
|
|
round(text['insert_point'][0] - col_left_x_abs, 2),
|
|
round(text['insert_point'][1] - row_bottom_y, 2)
|
|
],
|
|
"alignment": text.get("alignment", "BOTTOM_LEFT"),
|
|
"height": text['height'],
|
|
"style": text['style'],
|
|
"layer": text['layer'],
|
|
"color": text['color']
|
|
})
|
|
|
|
col_defs_list.append({
|
|
"name": col_name,
|
|
"relative_x_start": round(col_left_x_abs - table_base_x, 2),
|
|
"text_definitions": text_defs_for_col
|
|
})
|
|
|
|
return col_defs_list
|
|
|
|
|
|
def main():
|
|
source_json_path = os.path.join("03_Python_OpenSource_DXF", "Drawing1_entities.json")
|
|
header_template_path = os.path.join("03_Python_OpenSource_DXF", "header_template.json")
|
|
columns_template_path = os.path.join("03_Python_OpenSource_DXF", "columns_template.json")
|
|
|
|
if not os.path.exists(source_json_path):
|
|
print(f"Error: Source JSON file not found at {source_json_path}")
|
|
return
|
|
|
|
print(f"Reading entity data from {source_json_path}...")
|
|
with open(source_json_path, 'r', encoding='utf-8') as f:
|
|
entity_data = json.load(f)
|
|
|
|
print("Generating templates...")
|
|
bounds, col_x_coords_abs = find_table_boundaries(entity_data["lines"])
|
|
|
|
if bounds and col_x_coords_abs:
|
|
# 1. Generate and save the header template
|
|
header_template = generate_header_template(entity_data, bounds, col_x_coords_abs)
|
|
if header_template:
|
|
try:
|
|
with open(header_template_path, 'w', encoding='utf-8') as f:
|
|
json.dump(header_template, f, ensure_ascii=False, indent=2)
|
|
print(f"Successfully generated header template: {header_template_path}")
|
|
except IOError as e:
|
|
print(f"Error writing header template file: {e}")
|
|
|
|
# 2. Generate and save the columns template
|
|
# We need the header text to name the columns correctly
|
|
if not header_template:
|
|
header_template = generate_header_template(entity_data, bounds, col_x_coords_abs)
|
|
|
|
column_definitions = generate_column_definitions(entity_data, bounds, col_x_coords_abs, header_template)
|
|
|
|
# Create the final columns template structure
|
|
columns_template = {
|
|
"row_height": 8.0,
|
|
"column_definitions": column_definitions
|
|
}
|
|
|
|
if column_definitions:
|
|
try:
|
|
with open(columns_template_path, 'w', encoding='utf-8') as f:
|
|
json.dump(columns_template, f, ensure_ascii=False, indent=2)
|
|
print(f"Successfully generated columns template: {columns_template_path}")
|
|
except IOError as e:
|
|
print(f"Error writing columns template file: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|