dxfedit/03_Python_OpenSource_DXF/generate_pattern_report.py
2025-09-09 18:42:30 +08:00

114 lines
4.7 KiB
Python

import os
from analyze_dxf_tables import analyze_dxf_tables
def generate_report(table_data):
"""
Analyzes extracted table data and prints a summarized report on patterns.
"""
if not table_data:
print("No table data to analyze.")
return
grid = table_data["grid"]
row_coords = table_data["row_coords"]
col_coords = table_data["col_coords"]
# --- 1. Overall Patterns ---
print("--- 1. Overall Patterns ---")
# Analyze row heights
row_heights = [row_coords[i] - row_coords[i+1] for i in range(len(row_coords)-1)]
# Check for a common row height
common_height = None
if row_heights:
counts = {h: row_heights.count(h) for h in set(row_heights)}
# Find the most frequent height, ignoring very small variations
rounded_counts = {}
for h in row_heights:
rh = round(h, 1)
rounded_counts[rh] = rounded_counts.get(rh, 0) + 1
if rounded_counts:
most_common_rounded = max(rounded_counts, key=rounded_counts.get)
print(f"Most common row height is approximately: {most_common_rounded:.2f} units.")
print("\n--- 2. Header and Content Patterns ---")
# Split into tables
tables = []
current_table = { "rows": [], "start_row": -1 }
for r_idx, row in enumerate(grid):
if any(cell for cell in row):
if not current_table["rows"]:
current_table["start_row"] = r_idx
current_table["rows"].append(row)
elif current_table["rows"]:
tables.append(current_table)
current_table = { "rows": [], "start_row": -1 }
if current_table["rows"]:
tables.append(current_table)
for i, table_info in enumerate(tables):
print(f"\n--- Table {i+1} ---")
# Assume last 2 rows are the header
header_rows = table_info["rows"][-2:]
start_row_idx = table_info["start_row"]
print("\n [Header Analysis]")
for r_local, header_row in enumerate(header_rows):
r_global = start_row_idx + len(table_info["rows"]) - 2 + r_local
row_height = row_coords[r_global] - row_coords[r_global+1]
print(f" Header Row {r_local+1} (Height: {row_height:.2f}):")
for c_idx, cell in enumerate(header_row):
if cell:
print(f" - Cell({c_idx}):")
for text in cell:
rel_x = text['pos'][0] - col_coords[c_idx]
rel_y = row_coords[r_global] - text['pos'][1]
print(f" - Text: '{text['text']}' (H: {text['height']:.2f}) -> Rel Pos (dX: {rel_x:.2f}, dY: {rel_y:.2f})")
# Analyze a sample of content rows to find text style patterns
content_rows = table_info["rows"][:-2]
print("\n [Content Text Style Patterns]")
styles = {} # Store patterns: (height, rX, rY) -> count
for r_local, content_row in enumerate(content_rows[:5]): # Sample first 5 rows
r_global = start_row_idx + r_local
for c_idx, cell in enumerate(content_row):
if cell:
for text in cell:
rel_x = text['pos'][0] - col_coords[c_idx]
rel_y = row_coords[r_global] - text['pos'][1]
# Create a key by rounding the values to group similar styles
style_key = (
text['height'],
round(rel_x / 0.5) * 0.5, # Round to nearest 0.5
round(rel_y / 0.5) * 0.5
)
styles[style_key] = styles.get(style_key, 0) + 1
print(" (Based on a sample of rows)")
sorted_styles = sorted(styles.items(), key=lambda item: item[1], reverse=True)
for style, count in sorted_styles:
print(f" - Style (Height: {style[0]:.2f}, dX: ~{style[1]:.2f}, dY: ~{style[2]:.2f}) found {count} times.")
if __name__ == "__main__":
script_dir = os.path.dirname(__file__)
dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))
if os.path.exists(dxf_file_path):
table_data = analyze_dxf_tables(dxf_file_path)
generate_report(table_data)
else:
print(f"File not found: {dxf_file_path}")
abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
if os.path.exists(abs_path):
print("Found file at absolute path, running analysis...")
table_data = analyze_dxf_tables(abs_path)
generate_report(table_data)
else:
print(f"Also could not find file at absolute path: {abs_path}")