dxfedit/03_Python_OpenSource_DXF/generate_pattern_report.py

import os
from analyze_dxf_tables import analyze_dxf_tables

def generate_report(table_data):
    """
    Analyzes extracted table data and prints a summarized report on patterns.
    """
    if not table_data:
        print("No table data to analyze.")
        return

    grid = table_data["grid"]
    row_coords = table_data["row_coords"]
    col_coords = table_data["col_coords"]

    # --- 1. Overall Patterns ---
    print("--- 1. Overall Patterns ---")

    # Analyze row heights
    row_heights = [row_coords[i] - row_coords[i+1] for i in range(len(row_coords)-1)]

    # Check for a common row height
    common_height = None
    if row_heights:
        counts = {h: row_heights.count(h) for h in set(row_heights)}
        # Find the most frequent height, ignoring very small variations
        rounded_counts = {}
        for h in row_heights:
            rh = round(h, 1)
            rounded_counts[rh] = rounded_counts.get(rh, 0) + 1
        if rounded_counts:
            most_common_rounded = max(rounded_counts, key=rounded_counts.get)
            print(f"Most common row height is approximately: {most_common_rounded:.2f} units.")

    print("\n--- 2. Header and Content Patterns ---")

    # Split into tables
    tables = []
    current_table = { "rows": [], "start_row": -1 }
    for r_idx, row in enumerate(grid):
        if any(cell for cell in row):
            if not current_table["rows"]:
                current_table["start_row"] = r_idx
            current_table["rows"].append(row)
        elif current_table["rows"]:
            tables.append(current_table)
            current_table = { "rows": [], "start_row": -1 }
    if current_table["rows"]:
        tables.append(current_table)

    for i, table_info in enumerate(tables):
        print(f"\n--- Table {i+1} ---")

        # Assume last 2 rows are the header
        header_rows = table_info["rows"][-2:]
        start_row_idx = table_info["start_row"]

        print("\n  [Header Analysis]")
        for r_local, header_row in enumerate(header_rows):
            r_global = start_row_idx + len(table_info["rows"]) - 2 + r_local
            row_height = row_coords[r_global] - row_coords[r_global+1]
            print(f"  Header Row {r_local+1} (Height: {row_height:.2f}):")
            for c_idx, cell in enumerate(header_row):
                if cell:
                    print(f"    - Cell({c_idx}):")
                    for text in cell:
                        rel_x = text['pos'][0] - col_coords[c_idx]
                        rel_y = row_coords[r_global] - text['pos'][1]
                        print(f"      - Text: '{text['text']}' (H: {text['height']:.2f}) -> Rel Pos (dX: {rel_x:.2f}, dY: {rel_y:.2f})")

        # Analyze a sample of content rows to find text style patterns
        content_rows = table_info["rows"][:-2]
        print("\n  [Content Text Style Patterns]")

        styles = {} # Store patterns: (height, rX, rY) -> count
        for r_local, content_row in enumerate(content_rows[:5]): # Sample first 5 rows
             r_global = start_row_idx + r_local
             for c_idx, cell in enumerate(content_row):
                if cell:
                    for text in cell:
                        rel_x = text['pos'][0] - col_coords[c_idx]
                        rel_y = row_coords[r_global] - text['pos'][1]

                        # Create a key by rounding the values to group similar styles
                        style_key = (
                            text['height'],
                            round(rel_x / 0.5) * 0.5, # Round to nearest 0.5
                            round(rel_y / 0.5) * 0.5
                        )
                        styles[style_key] = styles.get(style_key, 0) + 1

        print("  (Based on a sample of rows)")
        sorted_styles = sorted(styles.items(), key=lambda item: item[1], reverse=True)
        for style, count in sorted_styles:
             print(f"  - Style (Height: {style[0]:.2f}, dX: ~{style[1]:.2f}, dY: ~{style[2]:.2f}) found {count} times.")


if __name__ == "__main__":
    script_dir = os.path.dirname(__file__)
    dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))

    if os.path.exists(dxf_file_path):
        table_data = analyze_dxf_tables(dxf_file_path)
        generate_report(table_data)
    else:
        print(f"File not found: {dxf_file_path}")
        abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
        if os.path.exists(abs_path):
            print("Found file at absolute path, running analysis...")
            table_data = analyze_dxf_tables(abs_path)
            generate_report(table_data)
        else:
             print(f"Also could not find file at absolute path: {abs_path}")