FA3-Datafetch/backend/app/api/routes.py
2026-01-08 21:01:55 +08:00

534 lines
20 KiB
Python

from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from fastapi.responses import HTMLResponse, FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from app.database import get_db
from app.schemas import StockSearchRequest, StockSearchResponse, AnalysisRequest, ReportResponse, AnalysisStatus, ConfigUpdateRequest
from app.models import Report, Setting
from app.services import analysis_service
import os
import markdown
from weasyprint import HTML
import io
import tempfile
from urllib.parse import quote
from bs4 import BeautifulSoup
router = APIRouter()
@router.get("/health")
def health_check():
return {"status": "healthy"}
@router.post("/search", response_model=list[StockSearchResponse])
async def search_stock(request: StockSearchRequest, db: AsyncSession = Depends(get_db)):
setting = await db.get(Setting, "GEMINI_API_KEY")
api_key = setting.value if setting else os.getenv("GEMINI_API_KEY")
if not api_key:
raise HTTPException(status_code=500, detail="API Key not configured")
# Get AI model setting
model_setting = await db.get(Setting, "AI_MODEL")
model = model_setting.value if model_setting else "gemini-2.0-flash"
result = await analysis_service.search_stock(request.query, api_key, model)
if isinstance(result, dict) and "error" in result:
if isinstance(result, str) and "```json" in result:
pass
raise HTTPException(status_code=400, detail=str(result))
return result
@router.post("/analyze", response_model=ReportResponse)
async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
# Get AI model
if request.model:
model = request.model
else:
model_setting = await db.get(Setting, "AI_MODEL")
model = model_setting.value if model_setting else "gemini-2.0-flash"
new_report = Report(
market=request.market,
symbol=request.symbol,
company_name=request.company_name,
status=AnalysisStatus.PENDING,
ai_model=model
)
db.add(new_report)
await db.commit()
await db.refresh(new_report)
setting = await db.get(Setting, "GEMINI_API_KEY")
api_key = setting.value if setting else os.getenv("GEMINI_API_KEY")
if not api_key:
new_report.status = AnalysisStatus.FAILED
await db.commit()
raise HTTPException(status_code=500, detail="API Key not configured")
# Trigger background task
background_tasks.add_task(
analysis_service.run_analysis_task,
new_report.id,
request.market,
request.symbol,
api_key,
request.data_source
)
# Re-fetch with selectinload to avoid lazy loading issues
result = await db.execute(select(Report).options(selectinload(Report.sections)).where(Report.id == new_report.id))
report_with_sections = result.scalar_one()
return report_with_sections
@router.get("/reports", response_model=list[ReportResponse])
async def get_reports(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Report).options(selectinload(Report.sections)).order_by(Report.created_at.desc()))
return result.scalars().all()
@router.get("/reports/{report_id}", response_model=ReportResponse)
async def get_report(report_id: int, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Report).options(selectinload(Report.sections)).where(Report.id == report_id))
report = result.scalar_one_or_none()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
return report
@router.get("/reports/{report_id}/html", response_class=HTMLResponse)
async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Report).options(selectinload(Report.sections)).where(Report.id == report_id))
report = result.scalar_one_or_none()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
# Get Financial HTML (Charts)
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
base_dir = os.path.join(root_dir, "data", report.market)
symbol_dir = os.path.join(base_dir, report.symbol)
# Fuzzy match logic
financial_html = ""
try:
if not os.path.exists(symbol_dir) and os.path.exists(base_dir):
candidates = [d for d in os.listdir(base_dir) if d.startswith(report.symbol) and os.path.isdir(os.path.join(base_dir, d))]
if candidates:
symbol_dir = os.path.join(base_dir, candidates[0])
start_html_path = os.path.join(symbol_dir, "report.html")
if os.path.exists(start_html_path):
with open(start_html_path, 'r', encoding='utf-8') as f:
financial_html = f.read()
else:
financial_html = "<p>财务图表尚未生成,数据获取可能仍在进行中。</p>"
except Exception as e:
financial_html = f"<p>加载财务图表时出错: {str(e)}</p>"
# If content is not ready, add auto-refresh meta tag
meta_refresh = ""
if "财务图表尚未生成" in financial_html:
meta_refresh = '<meta http-equiv="refresh" content="2">'
# Only return financial charts, no analysis sections
final_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
{meta_refresh}
<title>{report.company_name} - 财务数据</title>
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; padding: 20px; line-height: 1.6; max-width: 1200px; margin: 0 auto; }}
table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
th, td {{ border: 1px solid #ddd; padding: 12px; }}
th {{ background-color: #f5f5f5; }}
img {{ max-width: 100%; }}
</style>
</head>
<body>
{financial_html}
<script>
// Send height to parent window
function sendHeight() {{
const height = Math.max(
document.body.scrollHeight,
document.body.offsetHeight,
document.documentElement.clientHeight,
document.documentElement.scrollHeight,
document.documentElement.offsetHeight
);
window.parent.postMessage({{ type: 'resize', height: height }}, '*');
}}
// Send height on load and when window resizes
window.addEventListener('load', sendHeight);
window.addEventListener('resize', sendHeight);
// Also send immediately in case load already fired
sendHeight();
</script>
</body>
</html>
"""
return HTMLResponse(content=final_html, headers={"Cache-Control": "no-store, no-cache, must-revalidate", "Pragma": "no-cache", "Expires": "0"})
@router.get("/config")
async def get_config(db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Setting))
settings = result.scalars().all()
config_map = {s.key: s.value for s in settings}
if "GEMINI_API_KEY" in config_map:
config_map["GEMINI_API_KEY"] = "********" + config_map["GEMINI_API_KEY"][-4:]
elif os.getenv("GEMINI_API_KEY"):
val = os.getenv("GEMINI_API_KEY")
config_map["GEMINI_API_KEY"] = "********" + val[-4:]
else:
config_map["GEMINI_API_KEY"] = ""
return config_map
@router.post("/config")
async def update_config(request: ConfigUpdateRequest, db: AsyncSession = Depends(get_db)):
setting = await db.get(Setting, request.key)
if not setting:
setting = Setting(key=request.key, value=request.value)
db.add(setting)
else:
setting.value = request.value
await db.commit()
return {"status": "updated", "key": request.key}
@router.get("/reports/{report_id}/pdf")
async def download_report_pdf(report_id: int, db: AsyncSession = Depends(get_db)):
result = await db.execute(select(Report).options(selectinload(Report.sections)).where(Report.id == report_id))
report = result.scalar_one_or_none()
if not report:
raise HTTPException(status_code=404, detail="Report not found")
# Get Financial HTML (Charts)
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
base_dir = os.path.join(root_dir, "data", report.market)
symbol_dir = os.path.join(base_dir, report.symbol)
# Fuzzy match logic
financial_html = ""
try:
if not os.path.exists(symbol_dir) and os.path.exists(base_dir):
candidates = [d for d in os.listdir(base_dir) if d.startswith(report.symbol) and os.path.isdir(os.path.join(base_dir, d))]
if candidates:
symbol_dir = os.path.join(base_dir, candidates[0])
start_html_path = os.path.join(symbol_dir, "report.html")
if os.path.exists(start_html_path):
with open(start_html_path, 'r', encoding='utf-8') as f:
financial_html = f.read()
# Parse and clean HTML - keep table styles but remove containers and backgrounds
soup = BeautifulSoup(financial_html, 'html.parser')
# Modify style tags to remove background colors
for style_tag in soup.find_all('style'):
style_content = style_tag.string
if style_content:
# Remove CSS variables for backgrounds
style_content = '\n'.join([line for line in style_content.split('\n')
if not any(bg in line.lower() for bg in ['--bg:', '--card-bg:', '--header-bg:', '--section-bg:'])])
# Remove background properties from body
style_content = style_content.replace('background: var(--bg);', 'background: white;')
style_content = style_content.replace('background: var(--card-bg);', '')
style_content = style_content.replace('background: var(--header-bg);', '')
style_tag.string = style_content
# Remove container divs but keep content
for div in soup.find_all('div', class_='report-container'):
div.unwrap()
# Remove inline styles from container divs only, preserve td/th styles (including background colors)
for div in soup.find_all('div'):
if div.get('style'):
del div['style']
# Limit table columns to first 6
for table in soup.find_all('table'):
for row in table.find_all('tr'):
cells = row.find_all(['th', 'td'])
if len(cells) > 6:
for cell in cells[6:]:
cell.decompose()
financial_html = str(soup)
else:
financial_html = "<p>财务图表尚未生成,数据获取可能仍在进行中。</p>"
except Exception as e:
financial_html = f"<p>加载财务图表时出错: {str(e)}</p>"
# Build analysis sections HTML
sections_html = ""
section_names = {
'company_profile': '公司简介',
'fundamental_analysis': '基本面分析',
'insider_analysis': '内部人士分析',
'bullish_analysis': '看涨分析',
'bearish_analysis': '看跌分析'
}
import re
for section in sorted(report.sections, key=lambda s: list(section_names.keys()).index(s.section_name) if s.section_name in section_names else 999):
# Pre-process markdown to fix list formatting
content = section.content
# 1. First, find cases where numbered lists are clumped in one line like "1. xxx 2. yyy"
# and split them into multiple lines.
content = re.sub(r'(\s)(\d+\.\s)', r'\n\2', content)
# 2. Ensure list items have proper spacing
lines = content.split('\n')
fixed_lines = []
in_list = False
for i, line in enumerate(lines):
stripped = line.strip()
# Check if this line is a list item (bullet or numbered)
# Regex \d+\.\s matches "1. ", "2. ", etc.
is_list_item = stripped.startswith('* ') or stripped.startswith('- ') or re.match(r'^\d+\.\s', stripped)
if is_list_item:
# Add blank line before first list item
if not in_list and fixed_lines and fixed_lines[-1].strip():
fixed_lines.append('')
in_list = True
fixed_lines.append(line)
else:
# Add blank line after last list item
if in_list and stripped:
fixed_lines.append('')
in_list = False
fixed_lines.append(line)
content = '\n'.join(fixed_lines)
section_content = markdown.markdown(content, extensions=['tables', 'fenced_code'])
sections_html += f"""
<div class="section">
<div class="section-content">
{section_content}
</div>
</div>
"""
# Complete PDF HTML
complete_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{report.company_name} - 分析报告</title>
<style>
@page {{
size: A4;
margin: 2.5cm 1cm;
}}
body {{
font-family: "PingFang SC", "Microsoft YaHei", "SimHei", sans-serif;
line-height: 1.6;
color: #333;
font-size: 11pt;
background-color: white;
margin: 0;
padding: 0;
}}
.cover {{
page-break-after: always;
text-align: center;
padding-top: 30%;
}}
.cover h1 {{
font-size: 32pt;
margin-bottom: 20px;
color: #1a1a1a;
}}
.cover .meta {{
font-size: 14pt;
color: #666;
margin: 10px 0;
}}
h1 {{
font-size: 20pt;
color: #1a1a1a;
margin-top: 20px;
margin-bottom: 12px;
border-bottom: 2px solid #4a90e2;
padding-bottom: 8px;
}}
h2 {{
font-size: 16pt;
color: #2c3e50;
margin-top: 16px;
margin-bottom: 8px;
}}
h3 {{
font-size: 13pt;
color: #34495e;
margin-top: 12px;
margin-bottom: 6px;
}}
p {{
margin: 4px 0;
text-align: justify;
font-size: 10pt;
}}
/* Prevent double spacing in lists */
li p {{
margin: 0;
}}
table {{
border-collapse: collapse;
width: 100%;
max-width: 100%;
margin: 10px 0;
font-size: 6pt;
table-layout: fixed;
line-height: 1.2;
}}
th, td {{
border: 1px solid #ddd;
padding: 2px 2px;
text-align: left;
word-wrap: break-word;
overflow-wrap: break-word;
hyphens: auto;
white-space: normal;
overflow: hidden;
text-overflow: ellipsis;
}}
/* First column for tables starting from the 2nd table (财务指标表) - narrower */
table:nth-of-type(n+2) th:first-child,
table:nth-of-type(n+2) td:first-child {{
width: 150px;
min-width: 150px;
max-width: 150px;
}}
/* PE and PB columns in the first table (Basic Info) - narrower */
table:first-of-type th:nth-child(4),
table:first-of-type td:nth-child(4),
table:first-of-type th:nth-child(5),
table:first-of-type td:nth-child(5) {{
width: 90px;
}}
th {{
background-color: #f3f4f6;
color: #374151;
font-weight: bold;
font-size: 6pt;
padding: 5px 2px;
}}
tr:nth-child(even) {{
background-color: #f9f9f9;
}}
/* Handle tables with many columns */
.table-wrapper {{
overflow: hidden;
max-width: 100%;
}}
img {{
max-width: 100%;
height: auto;
display: block;
margin: 10px auto;
}}
.section {{
page-break-before: always;
}}
.section:first-of-type {{
page-break-before: auto;
}}
.section-title {{
font-size: 22pt;
color: #1a1a1a;
margin-bottom: 15px;
border-bottom: 3px solid #4a90e2;
padding-bottom: 8px;
}}
.section-content {{
margin-top: 10px;
}}
ul, ol {{
margin: 0 0 6px 0;
padding-left: 30px;
line-height: 1.8;
}}
li {{
margin: 2px 0;
font-size: 10pt;
list-style-position: outside;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: "Courier New", monospace;
font-size: 10pt;
}}
pre {{
background-color: #f4f4f4;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
font-size: 9pt;
}}
blockquote {{
border-left: 4px solid #4a90e2;
padding-left: 15px;
margin: 15px 0;
color: #555;
font-style: italic;
}}
</style>
</head>
<body>
<div class="cover">
<h1>{report.company_name}</h1>
<div class="meta">{report.market} {report.symbol}</div>
<div class="meta">分析日期: {report.created_at.strftime('%Y年%m月%d')}</div>
{f'<div class="meta">AI模型: {report.ai_model}</div>' if report.ai_model else ''}
</div>
<div class="section">
<div class="section-content">
{financial_html}
</div>
</div>
{sections_html}
</body>
</html>
"""
# Generate PDF
try:
# Create a temporary file for the PDF
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
pdf_path = tmp_file.name
HTML(string=complete_html).write_pdf(pdf_path)
# Return the PDF file
filename = f"{report.company_name}_{report.symbol}_分析报告.pdf"
# Use RFC 5987 encoding for non-ASCII filenames
filename_encoded = quote(filename)
return FileResponse(
path=pdf_path,
media_type='application/pdf',
headers={
'Content-Disposition': f"attachment; filename*=UTF-8''{filename_encoded}"
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"生成PDF失败: {str(e)}")