import ezdxf
from collections import defaultdict
import os

def group_close_coords(coords, tolerance=1.0):
    """Groups coordinates that are very close to each other."""
    if not coords:
        return []
    
    # Sort coords to ensure grouping works correctly
    coords = sorted(coords)
    
    groups = []
    current_group = [coords[0]]
    for i in range(1, len(coords)):
        if abs(coords[i] - current_group[-1]) < tolerance:
            current_group.append(coords[i])
        else:
            groups.append(sum(current_group) / len(current_group))
            current_group = [coords[i]]
    groups.append(sum(current_group) / len(current_group))
    return groups

def analyze_dxf_tables(dxf_path):
    """
    Analyzes a DXF file to find tables composed of LINE and TEXT/MTEXT entities.
    This function extracts raw grid and text data and returns it.
    """
    try:
        doc = ezdxf.readfile(dxf_path)
        msp = doc.modelspace()
    except IOError:
        print(f"Cannot open DXF file: {dxf_path}")
        return None
    except ezdxf.DXFStructureError as e:
        print(f"Invalid or corrupted DXF file: {dxf_path}. Error: {e}")
        return None

    lines = msp.query('LINE')
    texts = msp.query('TEXT MTEXT')

    # Filter for horizontal and vertical lines to define grid
    horizontal_lines = [l for l in lines if abs(l.dxf.start.y - l.dxf.end.y) < 0.1]
    vertical_lines = [l for l in lines if abs(l.dxf.start.x - l.dxf.end.x) < 0.1]

    if not horizontal_lines or not vertical_lines:
        print("No table structure (horizontal/vertical lines) found.")
        return None

    # Get all unique X and Y coordinates to define grid boundaries
    y_coords = set()
    for l in horizontal_lines:
        y_coords.add(l.dxf.start.y)

    x_coords = set()
    for l in vertical_lines:
        x_coords.add(l.dxf.start.x)

    row_boundaries = sorted(list(group_close_coords(list(y_coords))), reverse=True)
    col_boundaries = sorted(list(group_close_coords(list(x_coords))))
    
    num_rows = len(row_boundaries) - 1
    num_cols = len(col_boundaries) - 1

    if num_rows <= 0 or num_cols <= 0:
        print("Could not determine table grid.")
        return None

    # Create a grid of cells, where each cell can hold multiple text entities
    table_grid = [[[] for _ in range(num_cols)] for _ in range(num_rows)]

    # Place text into cells
    for text in texts:
        pos = text.dxf.insert
        
        row, col = -1, -1

        for i in range(num_rows):
            if pos.y < row_boundaries[i] and pos.y > row_boundaries[i+1]:
                row = i
                break
        
        for j in range(num_cols):
            if pos.x > col_boundaries[j] and pos.x < col_boundaries[j+1]:
                col = j
                break
        
        if row != -1 and col != -1:
            content = text.text if hasattr(text, 'text') else text.dxf.text
            height = text.dxf.char_height if hasattr(text.dxf, 'char_height') else text.dxf.height
            
            text_info = {
                "text": content.strip(),
                "pos": (pos.x, pos.y),
                "height": height
            }
            table_grid[row][col].append(text_info)

    return {
        "grid": table_grid,
        "row_coords": row_boundaries,
        "col_coords": col_boundaries
    }


def print_analysis_report(table_data):
    """
    Prints a detailed analysis report from the extracted table data.
    This function replaces the main print logic.
    """
    if not table_data:
        return

    table_grid = table_data["grid"]
    row_boundaries = table_data["row_coords"]
    col_boundaries = table_data["col_coords"]

    print("--- 1. Grid Information ---")
    print(f"Row Coordinates (Y): {[f'{y:.2f}' for y in row_boundaries]}")
    print(f"Column Coordinates (X): {[f'{x:.2f}' for x in col_boundaries]}")
    
    # Calculate and print column widths
    col_widths = [col_boundaries[i+1] - col_boundaries[i] for i in range(len(col_boundaries)-1)]
    print("\n--- 2. Column Widths ---")
    for i, width in enumerate(col_widths):
        print(f"  Column {i}: {width:.2f} units")
    print("-" * 25)
    
    # Split the grid into separate tables based on empty rows
    tables = []
    current_table = { "rows": [], "start_row": -1 }
    for r_idx, row in enumerate(table_grid):
        # A row is considered empty if all its cells are empty lists
        if any(cell for cell in row):
            if not current_table["rows"]:
                current_table["start_row"] = r_idx
            current_table["rows"].append(row)
        else:
            if current_table["rows"]:
                tables.append(current_table)
                current_table = { "rows": [], "start_row": -1 }
    if current_table["rows"]:
        tables.append(current_table)

    print(f"\nFound {len(tables)} table(s).\n")

    for i, tbl_data in enumerate(tables):
        tbl = tbl_data["rows"]
        start_row_idx = tbl_data["start_row"]
        end_row_idx = start_row_idx + len(tbl)
        
        table_height = row_boundaries[start_row_idx] - row_boundaries[end_row_idx]
        
        print(f"--- Table {i+1} Analysis ---")
        print(f"  Overall Height: {table_height:.2f} units")
        print("  --- Text Position Analysis (relative to cell top-left) ---")

        for r_local_idx, row in enumerate(tbl):
            r_global_idx = start_row_idx + r_local_idx
            for c_idx, cell in enumerate(row):
                if cell:
                    for text_info in cell:
                        # Cell's top-left corner
                        cell_top_y = row_boundaries[r_global_idx]
                        cell_left_x = col_boundaries[c_idx]
                        
                        # Relative position
                        rel_x = text_info['pos'][0] - cell_left_x
                        rel_y = cell_top_y - text_info['pos'][1] # Y is inverted

                        print(f"  Cell({r_local_idx}, {c_idx}): '{text_info['text']}'")
                        print(f"    - Abs Pos:  (X={text_info['pos'][0]:.2f}, Y={text_info['pos'][1]:.2f})")
                        print(f"    - Rel Pos:  (dX={rel_x:.2f}, dY={rel_y:.2f})")
                        print(f"    - Height:   {text_info['height']:.2f}")
        print("\n")


if __name__ == "__main__":
    # Construct a relative path to the file from the script's location
    # Script is in 03_Python_OpenSource_DXF, data is in 04_Test_Files
    script_dir = os.path.dirname(__file__)
    dxf_file_path = os.path.abspath(os.path.join(script_dir, '..', '04_Test_Files', '料表.dxf'))
    
    if os.path.exists(dxf_file_path):
        extracted_data = analyze_dxf_tables(dxf_file_path)
        if extracted_data:
            print_analysis_report(extracted_data)
    else:
        print(f"File not found at the expected path: {dxf_file_path}")
        # As a fallback, try the absolute path provided by the user, in case the script is run from somewhere else
        abs_path = r"C:\Users\83500\久翌\CAD编辑同步excel\测试文件区\04_Test_Files\料表.dxf"
        if os.path.exists(abs_path):
            print("Found file at absolute path, running analysis...")
            extracted_data = analyze_dxf_tables(abs_path)
            if extracted_data:
                print_analysis_report(extracted_data)
        else:
            print(f"Also could not find file at absolute path: {abs_path}")