114 lines
4.7 KiB
Python
114 lines
4.7 KiB
Python
import os
|
|
from analyze_dxf_tables import analyze_dxf_tables
|
|
|
|
def generate_report(table_data):
|
|
"""
|
|
Analyzes extracted table data and prints a summarized report on patterns.
|
|
"""
|
|
if not table_data:
|
|
print("No table data to analyze.")
|
|
return
|
|
|
|
grid = table_data["grid"]
|
|
row_coords = table_data["row_coords"]
|
|
col_coords = table_data["col_coords"]
|
|
|
|
# --- 1. Overall Patterns ---
|
|
print("--- 1. Overall Patterns ---")
|
|
|
|
# Analyze row heights
|
|
row_heights = [row_coords[i] - row_coords[i+1] for i in range(len(row_coords)-1)]
|
|
|
|
# Check for a common row height
|
|
common_height = None
|
|
if row_heights:
|
|
counts = {h: row_heights.count(h) for h in set(row_heights)}
|
|
# Find the most frequent height, ignoring very small variations
|
|
rounded_counts = {}
|
|
for h in row_heights:
|
|
rh = round(h, 1)
|
|
rounded_counts[rh] = rounded_counts.get(rh, 0) + 1
|
|
if rounded_counts:
|
|
most_common_rounded = max(rounded_counts, key=rounded_counts.get)
|
|
print(f"Most common row height is approximately: {most_common_rounded:.2f} units.")
|
|
|
|
print("\n--- 2. Header and Content Patterns ---")
|
|
|
|
# Split into tables
|
|
tables = []
|
|
current_table = { "rows": [], "start_row": -1 }
|
|
for r_idx, row in enumerate(grid):
|
|
if any(cell for cell in row):
|
|
if not current_table["rows"]:
|
|
current_table["start_row"] = r_idx
|
|
current_table["rows"].append(row)
|
|
elif current_table["rows"]:
|
|
tables.append(current_table)
|
|
current_table = { "rows": [], "start_row": -1 }
|
|
if current_table["rows"]:
|
|
tables.append(current_table)
|
|
|
|
for i, table_info in enumerate(tables):
|
|
print(f"\n--- Table {i+1} ---")
|
|
|
|
# Assume last 2 rows are the header
|
|
header_rows = table_info["rows"][-2:]
|
|
start_row_idx = table_info["start_row"]
|
|
|
|
print("\n [Header Analysis]")
|
|
for r_local, header_row in enumerate(header_rows):
|
|
r_global = start_row_idx + len(table_info["rows"]) - 2 + r_local
|
|
row_height = row_coords[r_global] - row_coords[r_global+1]
|
|
print(f" Header Row {r_local+1} (Height: {row_height:.2f}):")
|
|
for c_idx, cell in enumerate(header_row):
|
|
if cell:
|
|
print(f" - Cell({c_idx}):")
|
|
for text in cell:
|
|
rel_x = text['pos'][0] - col_coords[c_idx]
|
|
rel_y = row_coords[r_global] - text['pos'][1]
|
|
print(f" - Text: '{text['text']}' (H: {text['height']:.2f}) -> Rel Pos (dX: {rel_x:.2f}, dY: {rel_y:.2f})")
|
|
|
|
# Analyze a sample of content rows to find text style patterns
|
|
content_rows = table_info["rows"][:-2]
|
|
print("\n [Content Text Style Patterns]")
|
|
|
|
styles = {} # Store patterns: (height, rX, rY) -> count
|
|
for r_local, content_row in enumerate(content_rows[:5]): # Sample first 5 rows
|
|
r_global = start_row_idx + r_local
|
|
for c_idx, cell in enumerate(content_row):
|
|
if cell:
|
|
for text in cell:
|
|
rel_x = text['pos'][0] - col_coords[c_idx]
|
|
rel_y = row_coords[r_global] - text['pos'][1]
|
|
|
|
# Create a key by rounding the values to group similar styles
|
|
style_key = (
|
|
text['height'],
|
|
round(rel_x / 0.5) * 0.5, # Round to nearest 0.5
|
|
round(rel_y / 0.5) * 0.5
|
|
)
|
|
styles[style_key] = styles.get(style_key, 0) + 1
|
|
|
|
print(" (Based on a sample of rows)")
|
|
sorted_styles = sorted(styles.items(), key=lambda item: item[1], reverse=True)
|
|
for style, count in sorted_styles:
|
|
print(f" - Style (Height: {style[0]:.2f}, dX: ~{style[1]:.2f}, dY: ~{style[2]:.2f}) found {count} times.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
script_dir = os.path.dirname(__file__)
|
|
dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))
|
|
|
|
if os.path.exists(dxf_file_path):
|
|
table_data = analyze_dxf_tables(dxf_file_path)
|
|
generate_report(table_data)
|
|
else:
|
|
print(f"File not found: {dxf_file_path}")
|
|
abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
|
|
if os.path.exists(abs_path):
|
|
print("Found file at absolute path, running analysis...")
|
|
table_data = analyze_dxf_tables(abs_path)
|
|
generate_report(table_data)
|
|
else:
|
|
print(f"Also could not find file at absolute path: {abs_path}")
|