import ezdxf from collections import defaultdict import os def group_close_coords(coords, tolerance=1.0): """Groups coordinates that are very close to each other.""" if not coords: return [] # Sort coords to ensure grouping works correctly coords = sorted(coords) groups = [] current_group = [coords[0]] for i in range(1, len(coords)): if abs(coords[i] - current_group[-1]) < tolerance: current_group.append(coords[i]) else: groups.append(sum(current_group) / len(current_group)) current_group = [coords[i]] groups.append(sum(current_group) / len(current_group)) return groups def analyze_dxf_tables(dxf_path): """ Analyzes a DXF file to find tables composed of LINE and TEXT/MTEXT entities. This function extracts raw grid and text data and returns it. """ try: doc = ezdxf.readfile(dxf_path) msp = doc.modelspace() except IOError: print(f"Cannot open DXF file: {dxf_path}") return None except ezdxf.DXFStructureError as e: print(f"Invalid or corrupted DXF file: {dxf_path}. Error: {e}") return None lines = msp.query('LINE') texts = msp.query('TEXT MTEXT') # Filter for horizontal and vertical lines to define grid horizontal_lines = [l for l in lines if abs(l.dxf.start.y - l.dxf.end.y) < 0.1] vertical_lines = [l for l in lines if abs(l.dxf.start.x - l.dxf.end.x) < 0.1] if not horizontal_lines or not vertical_lines: print("No table structure (horizontal/vertical lines) found.") return None # Get all unique X and Y coordinates to define grid boundaries y_coords = set() for l in horizontal_lines: y_coords.add(l.dxf.start.y) x_coords = set() for l in vertical_lines: x_coords.add(l.dxf.start.x) row_boundaries = sorted(list(group_close_coords(list(y_coords))), reverse=True) col_boundaries = sorted(list(group_close_coords(list(x_coords)))) num_rows = len(row_boundaries) - 1 num_cols = len(col_boundaries) - 1 if num_rows <= 0 or num_cols <= 0: print("Could not determine table grid.") return None # Create a grid of cells, where each cell can hold multiple text entities table_grid = [[[] for _ in range(num_cols)] for _ in range(num_rows)] # Place text into cells for text in texts: pos = text.dxf.insert row, col = -1, -1 for i in range(num_rows): if pos.y < row_boundaries[i] and pos.y > row_boundaries[i+1]: row = i break for j in range(num_cols): if pos.x > col_boundaries[j] and pos.x < col_boundaries[j+1]: col = j break if row != -1 and col != -1: content = text.text if hasattr(text, 'text') else text.dxf.text height = text.dxf.char_height if hasattr(text.dxf, 'char_height') else text.dxf.height text_info = { "text": content.strip(), "pos": (pos.x, pos.y), "height": height } table_grid[row][col].append(text_info) return { "grid": table_grid, "row_coords": row_boundaries, "col_coords": col_boundaries } def print_analysis_report(table_data): """ Prints a detailed analysis report from the extracted table data. This function replaces the main print logic. """ if not table_data: return table_grid = table_data["grid"] row_boundaries = table_data["row_coords"] col_boundaries = table_data["col_coords"] print("--- 1. Grid Information ---") print(f"Row Coordinates (Y): {[f'{y:.2f}' for y in row_boundaries]}") print(f"Column Coordinates (X): {[f'{x:.2f}' for x in col_boundaries]}") # Calculate and print column widths col_widths = [col_boundaries[i+1] - col_boundaries[i] for i in range(len(col_boundaries)-1)] print("\n--- 2. Column Widths ---") for i, width in enumerate(col_widths): print(f" Column {i}: {width:.2f} units") print("-" * 25) # Split the grid into separate tables based on empty rows tables = [] current_table = { "rows": [], "start_row": -1 } for r_idx, row in enumerate(table_grid): # A row is considered empty if all its cells are empty lists if any(cell for cell in row): if not current_table["rows"]: current_table["start_row"] = r_idx current_table["rows"].append(row) else: if current_table["rows"]: tables.append(current_table) current_table = { "rows": [], "start_row": -1 } if current_table["rows"]: tables.append(current_table) print(f"\nFound {len(tables)} table(s).\n") for i, tbl_data in enumerate(tables): tbl = tbl_data["rows"] start_row_idx = tbl_data["start_row"] end_row_idx = start_row_idx + len(tbl) table_height = row_boundaries[start_row_idx] - row_boundaries[end_row_idx] print(f"--- Table {i+1} Analysis ---") print(f" Overall Height: {table_height:.2f} units") print(" --- Text Position Analysis (relative to cell top-left) ---") for r_local_idx, row in enumerate(tbl): r_global_idx = start_row_idx + r_local_idx for c_idx, cell in enumerate(row): if cell: for text_info in cell: # Cell's top-left corner cell_top_y = row_boundaries[r_global_idx] cell_left_x = col_boundaries[c_idx] # Relative position rel_x = text_info['pos'][0] - cell_left_x rel_y = cell_top_y - text_info['pos'][1] # Y is inverted print(f" Cell({r_local_idx}, {c_idx}): '{text_info['text']}'") print(f" - Abs Pos: (X={text_info['pos'][0]:.2f}, Y={text_info['pos'][1]:.2f})") print(f" - Rel Pos: (dX={rel_x:.2f}, dY={rel_y:.2f})") print(f" - Height: {text_info['height']:.2f}") print("\n") if __name__ == "__main__": # Construct a relative path to the file from the script's location # Script is in 03_Python_OpenSource_DXF, data is in 04_Test_Files script_dir = os.path.dirname(__file__) dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf')) if os.path.exists(dxf_file_path): extracted_data = analyze_dxf_tables(dxf_file_path) if extracted_data: print_analysis_report(extracted_data) else: print(f"File not found at the expected path: {dxf_file_path}") # As a fallback, try the absolute path provided by the user, in case the script is run from somewhere else abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf" if os.path.exists(abs_path): print("Found file at absolute path, running analysis...") extracted_data = analyze_dxf_tables(abs_path) if extracted_data: print_analysis_report(extracted_data) else: print(f"Also could not find file at absolute path: {abs_path}")