dxfedit/03_Python_OpenSource_DXF/analyze_dxf_tables.py
2025-09-09 18:42:30 +08:00

199 lines
7.3 KiB
Python

import ezdxf
from collections import defaultdict
import os
def group_close_coords(coords, tolerance=1.0):
"""Groups coordinates that are very close to each other."""
if not coords:
return []
# Sort coords to ensure grouping works correctly
coords = sorted(coords)
groups = []
current_group = [coords[0]]
for i in range(1, len(coords)):
if abs(coords[i] - current_group[-1]) < tolerance:
current_group.append(coords[i])
else:
groups.append(sum(current_group) / len(current_group))
current_group = [coords[i]]
groups.append(sum(current_group) / len(current_group))
return groups
def analyze_dxf_tables(dxf_path):
"""
Analyzes a DXF file to find tables composed of LINE and TEXT/MTEXT entities.
This function extracts raw grid and text data and returns it.
"""
try:
doc = ezdxf.readfile(dxf_path)
msp = doc.modelspace()
except IOError:
print(f"Cannot open DXF file: {dxf_path}")
return None
except ezdxf.DXFStructureError as e:
print(f"Invalid or corrupted DXF file: {dxf_path}. Error: {e}")
return None
lines = msp.query('LINE')
texts = msp.query('TEXT MTEXT')
# Filter for horizontal and vertical lines to define grid
horizontal_lines = [l for l in lines if abs(l.dxf.start.y - l.dxf.end.y) < 0.1]
vertical_lines = [l for l in lines if abs(l.dxf.start.x - l.dxf.end.x) < 0.1]
if not horizontal_lines or not vertical_lines:
print("No table structure (horizontal/vertical lines) found.")
return None
# Get all unique X and Y coordinates to define grid boundaries
y_coords = set()
for l in horizontal_lines:
y_coords.add(l.dxf.start.y)
x_coords = set()
for l in vertical_lines:
x_coords.add(l.dxf.start.x)
row_boundaries = sorted(list(group_close_coords(list(y_coords))), reverse=True)
col_boundaries = sorted(list(group_close_coords(list(x_coords))))
num_rows = len(row_boundaries) - 1
num_cols = len(col_boundaries) - 1
if num_rows <= 0 or num_cols <= 0:
print("Could not determine table grid.")
return None
# Create a grid of cells, where each cell can hold multiple text entities
table_grid = [[[] for _ in range(num_cols)] for _ in range(num_rows)]
# Place text into cells
for text in texts:
pos = text.dxf.insert
row, col = -1, -1
for i in range(num_rows):
if pos.y < row_boundaries[i] and pos.y > row_boundaries[i+1]:
row = i
break
for j in range(num_cols):
if pos.x > col_boundaries[j] and pos.x < col_boundaries[j+1]:
col = j
break
if row != -1 and col != -1:
content = text.text if hasattr(text, 'text') else text.dxf.text
height = text.dxf.char_height if hasattr(text.dxf, 'char_height') else text.dxf.height
text_info = {
"text": content.strip(),
"pos": (pos.x, pos.y),
"height": height
}
table_grid[row][col].append(text_info)
return {
"grid": table_grid,
"row_coords": row_boundaries,
"col_coords": col_boundaries
}
def print_analysis_report(table_data):
"""
Prints a detailed analysis report from the extracted table data.
This function replaces the main print logic.
"""
if not table_data:
return
table_grid = table_data["grid"]
row_boundaries = table_data["row_coords"]
col_boundaries = table_data["col_coords"]
print("--- 1. Grid Information ---")
print(f"Row Coordinates (Y): {[f'{y:.2f}' for y in row_boundaries]}")
print(f"Column Coordinates (X): {[f'{x:.2f}' for x in col_boundaries]}")
# Calculate and print column widths
col_widths = [col_boundaries[i+1] - col_boundaries[i] for i in range(len(col_boundaries)-1)]
print("\n--- 2. Column Widths ---")
for i, width in enumerate(col_widths):
print(f" Column {i}: {width:.2f} units")
print("-" * 25)
# Split the grid into separate tables based on empty rows
tables = []
current_table = { "rows": [], "start_row": -1 }
for r_idx, row in enumerate(table_grid):
# A row is considered empty if all its cells are empty lists
if any(cell for cell in row):
if not current_table["rows"]:
current_table["start_row"] = r_idx
current_table["rows"].append(row)
else:
if current_table["rows"]:
tables.append(current_table)
current_table = { "rows": [], "start_row": -1 }
if current_table["rows"]:
tables.append(current_table)
print(f"\nFound {len(tables)} table(s).\n")
for i, tbl_data in enumerate(tables):
tbl = tbl_data["rows"]
start_row_idx = tbl_data["start_row"]
end_row_idx = start_row_idx + len(tbl)
table_height = row_boundaries[start_row_idx] - row_boundaries[end_row_idx]
print(f"--- Table {i+1} Analysis ---")
print(f" Overall Height: {table_height:.2f} units")
print(" --- Text Position Analysis (relative to cell top-left) ---")
for r_local_idx, row in enumerate(tbl):
r_global_idx = start_row_idx + r_local_idx
for c_idx, cell in enumerate(row):
if cell:
for text_info in cell:
# Cell's top-left corner
cell_top_y = row_boundaries[r_global_idx]
cell_left_x = col_boundaries[c_idx]
# Relative position
rel_x = text_info['pos'][0] - cell_left_x
rel_y = cell_top_y - text_info['pos'][1] # Y is inverted
print(f" Cell({r_local_idx}, {c_idx}): '{text_info['text']}'")
print(f" - Abs Pos: (X={text_info['pos'][0]:.2f}, Y={text_info['pos'][1]:.2f})")
print(f" - Rel Pos: (dX={rel_x:.2f}, dY={rel_y:.2f})")
print(f" - Height: {text_info['height']:.2f}")
print("\n")
if __name__ == "__main__":
# Construct a relative path to the file from the script's location
# Script is in 03_Python_OpenSource_DXF, data is in 04_Test_Files
script_dir = os.path.dirname(__file__)
dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))
if os.path.exists(dxf_file_path):
extracted_data = analyze_dxf_tables(dxf_file_path)
if extracted_data:
print_analysis_report(extracted_data)
else:
print(f"File not found at the expected path: {dxf_file_path}")
# As a fallback, try the absolute path provided by the user, in case the script is run from somewhere else
abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
if os.path.exists(abs_path):
print("Found file at absolute path, running analysis...")
extracted_data = analyze_dxf_tables(abs_path)
if extracted_data:
print_analysis_report(extracted_data)
else:
print(f"Also could not find file at absolute path: {abs_path}")