import os from analyze_dxf_tables import analyze_dxf_tables def generate_report(table_data): """ Analyzes extracted table data and prints a summarized report on patterns. """ if not table_data: print("No table data to analyze.") return grid = table_data["grid"] row_coords = table_data["row_coords"] col_coords = table_data["col_coords"] # --- 1. Overall Patterns --- print("--- 1. Overall Patterns ---") # Analyze row heights row_heights = [row_coords[i] - row_coords[i+1] for i in range(len(row_coords)-1)] # Check for a common row height common_height = None if row_heights: counts = {h: row_heights.count(h) for h in set(row_heights)} # Find the most frequent height, ignoring very small variations rounded_counts = {} for h in row_heights: rh = round(h, 1) rounded_counts[rh] = rounded_counts.get(rh, 0) + 1 if rounded_counts: most_common_rounded = max(rounded_counts, key=rounded_counts.get) print(f"Most common row height is approximately: {most_common_rounded:.2f} units.") print("\n--- 2. Header and Content Patterns ---") # Split into tables tables = [] current_table = { "rows": [], "start_row": -1 } for r_idx, row in enumerate(grid): if any(cell for cell in row): if not current_table["rows"]: current_table["start_row"] = r_idx current_table["rows"].append(row) elif current_table["rows"]: tables.append(current_table) current_table = { "rows": [], "start_row": -1 } if current_table["rows"]: tables.append(current_table) for i, table_info in enumerate(tables): print(f"\n--- Table {i+1} ---") # Assume last 2 rows are the header header_rows = table_info["rows"][-2:] start_row_idx = table_info["start_row"] print("\n [Header Analysis]") for r_local, header_row in enumerate(header_rows): r_global = start_row_idx + len(table_info["rows"]) - 2 + r_local row_height = row_coords[r_global] - row_coords[r_global+1] print(f" Header Row {r_local+1} (Height: {row_height:.2f}):") for c_idx, cell in enumerate(header_row): if cell: print(f" - Cell({c_idx}):") for text in cell: rel_x = text['pos'][0] - col_coords[c_idx] rel_y = row_coords[r_global] - text['pos'][1] print(f" - Text: '{text['text']}' (H: {text['height']:.2f}) -> Rel Pos (dX: {rel_x:.2f}, dY: {rel_y:.2f})") # Analyze a sample of content rows to find text style patterns content_rows = table_info["rows"][:-2] print("\n [Content Text Style Patterns]") styles = {} # Store patterns: (height, rX, rY) -> count for r_local, content_row in enumerate(content_rows[:5]): # Sample first 5 rows r_global = start_row_idx + r_local for c_idx, cell in enumerate(content_row): if cell: for text in cell: rel_x = text['pos'][0] - col_coords[c_idx] rel_y = row_coords[r_global] - text['pos'][1] # Create a key by rounding the values to group similar styles style_key = ( text['height'], round(rel_x / 0.5) * 0.5, # Round to nearest 0.5 round(rel_y / 0.5) * 0.5 ) styles[style_key] = styles.get(style_key, 0) + 1 print(" (Based on a sample of rows)") sorted_styles = sorted(styles.items(), key=lambda item: item[1], reverse=True) for style, count in sorted_styles: print(f" - Style (Height: {style[0]:.2f}, dX: ~{style[1]:.2f}, dY: ~{style[2]:.2f}) found {count} times.") if __name__ == "__main__": script_dir = os.path.dirname(__file__) dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf')) if os.path.exists(dxf_file_path): table_data = analyze_dxf_tables(dxf_file_path) generate_report(table_data) else: print(f"File not found: {dxf_file_path}") abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf" if os.path.exists(abs_path): print("Found file at absolute path, running analysis...") table_data = analyze_dxf_tables(abs_path) generate_report(table_data) else: print(f"Also could not find file at absolute path: {abs_path}")