FA3-Datafetch/backend/scripts/backfill_stock_code.py

55 lines
1.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import sys
import os
import re
# Add the parent directory to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import select, update
from app.database import engine, SessionLocal
from app.models import LLMUsageLog
async def backfill_stock_codes():
async with SessionLocal() as session:
# Fetch logs where stock_code is NULL
result = await session.execute(select(LLMUsageLog).where(LLMUsageLog.stock_code == None))
logs = result.scalars().all()
print(f"Found {len(logs)} logs to process.")
# Regex to extract code
# Looks for: 当前分析对象Name (Code, Market)
# We capture 'Code'
pattern = re.compile(r"当前分析对象:.*? \((.*?),\s*(.*?)\)")
updates_count = 0
for log in logs:
match = pattern.search(log.prompt)
if match:
stock_code = match.group(1).strip()
# market = match.group(2).strip() # We assume we only want the stock code for now
print(f"Log ID {log.id}: Found code '{stock_code}' in prompt.")
# key is unique id, but we are iterating objects.
# We can batch update or update one by one. updating object directly.
log.stock_code = stock_code
updates_count += 1
else:
print(f"Log ID {log.id}: No match found in prompt.")
if updates_count > 0:
try:
await session.commit()
print(f"Successfully backfilled {updates_count} logs.")
except Exception as e:
print(f"Error committing changes: {e}")
await session.rollback()
else:
print("No updates made.")
if __name__ == "__main__":
asyncio.run(backfill_stock_codes())