199 lines
7.3 KiB
Python
199 lines
7.3 KiB
Python
import ezdxf
|
|
from collections import defaultdict
|
|
import os
|
|
|
|
def group_close_coords(coords, tolerance=1.0):
|
|
"""Groups coordinates that are very close to each other."""
|
|
if not coords:
|
|
return []
|
|
|
|
# Sort coords to ensure grouping works correctly
|
|
coords = sorted(coords)
|
|
|
|
groups = []
|
|
current_group = [coords[0]]
|
|
for i in range(1, len(coords)):
|
|
if abs(coords[i] - current_group[-1]) < tolerance:
|
|
current_group.append(coords[i])
|
|
else:
|
|
groups.append(sum(current_group) / len(current_group))
|
|
current_group = [coords[i]]
|
|
groups.append(sum(current_group) / len(current_group))
|
|
return groups
|
|
|
|
def analyze_dxf_tables(dxf_path):
|
|
"""
|
|
Analyzes a DXF file to find tables composed of LINE and TEXT/MTEXT entities.
|
|
This function extracts raw grid and text data and returns it.
|
|
"""
|
|
try:
|
|
doc = ezdxf.readfile(dxf_path)
|
|
msp = doc.modelspace()
|
|
except IOError:
|
|
print(f"Cannot open DXF file: {dxf_path}")
|
|
return None
|
|
except ezdxf.DXFStructureError as e:
|
|
print(f"Invalid or corrupted DXF file: {dxf_path}. Error: {e}")
|
|
return None
|
|
|
|
lines = msp.query('LINE')
|
|
texts = msp.query('TEXT MTEXT')
|
|
|
|
# Filter for horizontal and vertical lines to define grid
|
|
horizontal_lines = [l for l in lines if abs(l.dxf.start.y - l.dxf.end.y) < 0.1]
|
|
vertical_lines = [l for l in lines if abs(l.dxf.start.x - l.dxf.end.x) < 0.1]
|
|
|
|
if not horizontal_lines or not vertical_lines:
|
|
print("No table structure (horizontal/vertical lines) found.")
|
|
return None
|
|
|
|
# Get all unique X and Y coordinates to define grid boundaries
|
|
y_coords = set()
|
|
for l in horizontal_lines:
|
|
y_coords.add(l.dxf.start.y)
|
|
|
|
x_coords = set()
|
|
for l in vertical_lines:
|
|
x_coords.add(l.dxf.start.x)
|
|
|
|
row_boundaries = sorted(list(group_close_coords(list(y_coords))), reverse=True)
|
|
col_boundaries = sorted(list(group_close_coords(list(x_coords))))
|
|
|
|
num_rows = len(row_boundaries) - 1
|
|
num_cols = len(col_boundaries) - 1
|
|
|
|
if num_rows <= 0 or num_cols <= 0:
|
|
print("Could not determine table grid.")
|
|
return None
|
|
|
|
# Create a grid of cells, where each cell can hold multiple text entities
|
|
table_grid = [[[] for _ in range(num_cols)] for _ in range(num_rows)]
|
|
|
|
# Place text into cells
|
|
for text in texts:
|
|
pos = text.dxf.insert
|
|
|
|
row, col = -1, -1
|
|
|
|
for i in range(num_rows):
|
|
if pos.y < row_boundaries[i] and pos.y > row_boundaries[i+1]:
|
|
row = i
|
|
break
|
|
|
|
for j in range(num_cols):
|
|
if pos.x > col_boundaries[j] and pos.x < col_boundaries[j+1]:
|
|
col = j
|
|
break
|
|
|
|
if row != -1 and col != -1:
|
|
content = text.text if hasattr(text, 'text') else text.dxf.text
|
|
height = text.dxf.char_height if hasattr(text.dxf, 'char_height') else text.dxf.height
|
|
|
|
text_info = {
|
|
"text": content.strip(),
|
|
"pos": (pos.x, pos.y),
|
|
"height": height
|
|
}
|
|
table_grid[row][col].append(text_info)
|
|
|
|
return {
|
|
"grid": table_grid,
|
|
"row_coords": row_boundaries,
|
|
"col_coords": col_boundaries
|
|
}
|
|
|
|
|
|
def print_analysis_report(table_data):
|
|
"""
|
|
Prints a detailed analysis report from the extracted table data.
|
|
This function replaces the main print logic.
|
|
"""
|
|
if not table_data:
|
|
return
|
|
|
|
table_grid = table_data["grid"]
|
|
row_boundaries = table_data["row_coords"]
|
|
col_boundaries = table_data["col_coords"]
|
|
|
|
print("--- 1. Grid Information ---")
|
|
print(f"Row Coordinates (Y): {[f'{y:.2f}' for y in row_boundaries]}")
|
|
print(f"Column Coordinates (X): {[f'{x:.2f}' for x in col_boundaries]}")
|
|
|
|
# Calculate and print column widths
|
|
col_widths = [col_boundaries[i+1] - col_boundaries[i] for i in range(len(col_boundaries)-1)]
|
|
print("\n--- 2. Column Widths ---")
|
|
for i, width in enumerate(col_widths):
|
|
print(f" Column {i}: {width:.2f} units")
|
|
print("-" * 25)
|
|
|
|
# Split the grid into separate tables based on empty rows
|
|
tables = []
|
|
current_table = { "rows": [], "start_row": -1 }
|
|
for r_idx, row in enumerate(table_grid):
|
|
# A row is considered empty if all its cells are empty lists
|
|
if any(cell for cell in row):
|
|
if not current_table["rows"]:
|
|
current_table["start_row"] = r_idx
|
|
current_table["rows"].append(row)
|
|
else:
|
|
if current_table["rows"]:
|
|
tables.append(current_table)
|
|
current_table = { "rows": [], "start_row": -1 }
|
|
if current_table["rows"]:
|
|
tables.append(current_table)
|
|
|
|
print(f"\nFound {len(tables)} table(s).\n")
|
|
|
|
for i, tbl_data in enumerate(tables):
|
|
tbl = tbl_data["rows"]
|
|
start_row_idx = tbl_data["start_row"]
|
|
end_row_idx = start_row_idx + len(tbl)
|
|
|
|
table_height = row_boundaries[start_row_idx] - row_boundaries[end_row_idx]
|
|
|
|
print(f"--- Table {i+1} Analysis ---")
|
|
print(f" Overall Height: {table_height:.2f} units")
|
|
print(" --- Text Position Analysis (relative to cell top-left) ---")
|
|
|
|
for r_local_idx, row in enumerate(tbl):
|
|
r_global_idx = start_row_idx + r_local_idx
|
|
for c_idx, cell in enumerate(row):
|
|
if cell:
|
|
for text_info in cell:
|
|
# Cell's top-left corner
|
|
cell_top_y = row_boundaries[r_global_idx]
|
|
cell_left_x = col_boundaries[c_idx]
|
|
|
|
# Relative position
|
|
rel_x = text_info['pos'][0] - cell_left_x
|
|
rel_y = cell_top_y - text_info['pos'][1] # Y is inverted
|
|
|
|
print(f" Cell({r_local_idx}, {c_idx}): '{text_info['text']}'")
|
|
print(f" - Abs Pos: (X={text_info['pos'][0]:.2f}, Y={text_info['pos'][1]:.2f})")
|
|
print(f" - Rel Pos: (dX={rel_x:.2f}, dY={rel_y:.2f})")
|
|
print(f" - Height: {text_info['height']:.2f}")
|
|
print("\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Construct a relative path to the file from the script's location
|
|
# Script is in 03_Python_OpenSource_DXF, data is in 04_Test_Files
|
|
script_dir = os.path.dirname(__file__)
|
|
dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))
|
|
|
|
if os.path.exists(dxf_file_path):
|
|
extracted_data = analyze_dxf_tables(dxf_file_path)
|
|
if extracted_data:
|
|
print_analysis_report(extracted_data)
|
|
else:
|
|
print(f"File not found at the expected path: {dxf_file_path}")
|
|
# As a fallback, try the absolute path provided by the user, in case the script is run from somewhere else
|
|
abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
|
|
if os.path.exists(abs_path):
|
|
print("Found file at absolute path, running analysis...")
|
|
extracted_data = analyze_dxf_tables(abs_path)
|
|
if extracted_data:
|
|
print_analysis_report(extracted_data)
|
|
else:
|
|
print(f"Also could not find file at absolute path: {abs_path}")
|