更改数据源配置

This commit is contained in:
xucheng 2026-01-08 21:01:55 +08:00
parent 548ee242ba
commit b9c8f90cbc
29 changed files with 2198 additions and 2132 deletions

View File

@ -42,7 +42,10 @@ async def search_stock(request: StockSearchRequest, db: AsyncSession = Depends(g
@router.post("/analyze", response_model=ReportResponse) @router.post("/analyze", response_model=ReportResponse)
async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)): async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
# Get AI model setting # Get AI model
if request.model:
model = request.model
else:
model_setting = await db.get(Setting, "AI_MODEL") model_setting = await db.get(Setting, "AI_MODEL")
model = model_setting.value if model_setting else "gemini-2.0-flash" model = model_setting.value if model_setting else "gemini-2.0-flash"
@ -71,7 +74,8 @@ async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundT
new_report.id, new_report.id,
request.market, request.market,
request.symbol, request.symbol,
api_key api_key,
request.data_source
) )
# Re-fetch with selectinload to avoid lazy loading issues # Re-fetch with selectinload to avoid lazy loading issues
@ -122,12 +126,18 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
except Exception as e: except Exception as e:
financial_html = f"<p>加载财务图表时出错: {str(e)}</p>" financial_html = f"<p>加载财务图表时出错: {str(e)}</p>"
# If content is not ready, add auto-refresh meta tag
meta_refresh = ""
if "财务图表尚未生成" in financial_html:
meta_refresh = '<meta http-equiv="refresh" content="2">'
# Only return financial charts, no analysis sections # Only return financial charts, no analysis sections
final_html = f""" final_html = f"""
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
{meta_refresh}
<title>{report.company_name} - 财务数据</title> <title>{report.company_name} - 财务数据</title>
<style> <style>
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; padding: 20px; line-height: 1.6; max-width: 1200px; margin: 0 auto; }} body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; padding: 20px; line-height: 1.6; max-width: 1200px; margin: 0 auto; }}
@ -162,7 +172,7 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
</body> </body>
</html> </html>
""" """
return final_html return HTMLResponse(content=final_html, headers={"Cache-Control": "no-store, no-cache, must-revalidate", "Pragma": "no-cache", "Expires": "0"})
@router.get("/config") @router.get("/config")
async def get_config(db: AsyncSession = Depends(get_db)): async def get_config(db: AsyncSession = Depends(get_db)):

View File

@ -21,6 +21,8 @@ class AnalysisRequest(BaseModel):
market: str market: str
symbol: str symbol: str
company_name: str company_name: str
model: Optional[str] = None
data_source: Optional[str] = None
class ReportSectionSchema(BaseModel): class ReportSectionSchema(BaseModel):
section_name: str section_name: str

View File

@ -113,12 +113,12 @@ async def search_stock(query: str, api_key: str, model: str = "gemini-2.0-flash"
print(f"Search error: {e}") print(f"Search error: {e}")
return {"error": f"搜索失败: {str(e)}"} return {"error": f"搜索失败: {str(e)}"}
async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str): async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str, data_source: str = None):
""" """
Background task to run the full analysis pipeline. Background task to run the full analysis pipeline.
Creates its own DB session. Creates its own DB session.
""" """
print(f"Starting analysis for report {report_id}: {market} {symbol}") print(f"Starting analysis for report {report_id}: {market} {symbol} (Source: {data_source})")
# Create new session # Create new session
from app.database import AsyncSessionLocal from app.database import AsyncSessionLocal
@ -138,6 +138,8 @@ async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: s
# 2. Run Main Data Fetching Script (run_fetcher.py) # 2. Run Main Data Fetching Script (run_fetcher.py)
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
cmd = [sys.executable, "run_fetcher.py", market, symbol] cmd = [sys.executable, "run_fetcher.py", market, symbol]
if data_source:
cmd.extend(["--data-source", data_source])
print(f"Executing data fetch command: {cmd} in {root_dir}") print(f"Executing data fetch command: {cmd} in {root_dir}")
process = await asyncio.create_subprocess_exec( process = await asyncio.create_subprocess_exec(

View File

@ -2,7 +2,7 @@
import { useEffect, useState, use, useRef } from "react" import { useEffect, useState, use, useRef } from "react"
import { getReport } from "@/lib/api" import { getReport } from "@/lib/api"
import { Badge } from "@/components/ui/badge"
import { Card, CardContent } from "@/components/ui/card" import { Card, CardContent } from "@/components/ui/card"
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs" import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"
import { MarkdownRenderer } from "@/components/markdown-renderer" import { MarkdownRenderer } from "@/components/markdown-renderer"
@ -101,19 +101,24 @@ export default function AnalysisPage({ params }: { params: Promise<{ id: string
</p> </p>
</div> </div>
<div className="flex items-center gap-4"> <div className="flex items-center gap-4">
<Badge variant={ <Button
variant={
report.status === "completed" ? "default" : report.status === "completed" ? "default" :
report.status === "in_progress" ? "secondary" : report.status === "in_progress" ? "secondary" :
report.status === "failed" ? "destructive" : "outline" report.status === "failed" ? "destructive" : "outline"
}> }
size="sm"
className="pointer-events-none w-32"
>
{report.status === "in_progress" ? ( {report.status === "in_progress" ? (
<div className="flex items-center gap-2"> <>
<Loader2 className="h-3 w-3 animate-spin" /> <Loader2 className="h-4 w-4 mr-2 animate-spin" />
</div>
</>
) : report.status === "completed" ? "已完成" : report.status === "failed" ? "失败" : report.status === "pending" ? "待处理" : report.status} ) : report.status === "completed" ? "已完成" : report.status === "failed" ? "失败" : report.status === "pending" ? "待处理" : report.status}
</Badge> </Button>
{report.status === "completed" && ( {report.status === "completed" && (
<Button onClick={handleDownloadPDF} variant="outline" size="sm"> <Button onClick={handleDownloadPDF} variant="outline" size="sm" className="w-32">
<Download className="h-4 w-4 mr-2" /> <Download className="h-4 w-4 mr-2" />
PDF PDF
</Button> </Button>

View File

@ -1,18 +1,54 @@
"use client" "use client"
import { useState } from "react" import { useState, useEffect } from "react"
import { searchStock, startAnalysis } from "@/lib/api" import { searchStock, startAnalysis, getConfig } from "@/lib/api"
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card" import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/components/ui/card"
import { Input } from "@/components/ui/input" import { Input } from "@/components/ui/input"
import { Button } from "@/components/ui/button" import { Button } from "@/components/ui/button"
import { Search, Loader2 } from "lucide-react" import { Search, Loader2, Database, Bot } from "lucide-react"
import { useRouter } from "next/navigation" import { useRouter } from "next/navigation"
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select"
import { Label } from "@/components/ui/label"
import { Badge } from "@/components/ui/badge"
export function SearchStock() { export function SearchStock() {
const [query, setQuery] = useState("") const [query, setQuery] = useState("")
const [results, setResults] = useState<{ market: string; symbol: string; company_name: string }[]>([]) const [results, setResults] = useState<{ market: string; symbol: string; company_name: string }[]>([])
const [loading, setLoading] = useState(false) const [loading, setLoading] = useState(false)
const [error, setError] = useState("") const [error, setError] = useState("")
const [activeIndex, setActiveIndex] = useState<number | null>(null)
// Global Configuration State
const [selectedModel, setSelectedModel] = useState("gemini-2.0-flash")
const [dataSourcePrefs, setDataSourcePrefs] = useState<Record<string, string>>({
'CN': 'Tushare',
'HK': 'iFinD',
'US': 'Alpha Vantage',
'JP': 'iFinD',
'VN': 'iFinD'
})
// Fetch initial config
useEffect(() => {
const fetchConfig = async () => {
try {
const config = await getConfig()
if (config.AI_MODEL) {
setSelectedModel(config.AI_MODEL)
}
} catch (e) {
console.error("Failed to load config:", e)
}
}
fetchConfig()
}, [])
const router = useRouter() const router = useRouter()
const handleSearch = async () => { const handleSearch = async () => {
@ -20,10 +56,12 @@ export function SearchStock() {
setLoading(true) setLoading(true)
setError("") setError("")
setResults([]) setResults([])
setActiveIndex(null)
try { try {
const data = await searchStock(query) const data = await searchStock(query)
setResults(data) setResults(data)
// Auto-select the first result if exists? Or keep null? User asked for "click to select". Keeping null is safer.
} catch (err: any) { } catch (err: any) {
setError(err.message || "搜索失败") setError(err.message || "搜索失败")
} finally { } finally {
@ -34,7 +72,9 @@ export function SearchStock() {
const handleAnalyze = async (result: { market: string; symbol: string; company_name: string }) => { const handleAnalyze = async (result: { market: string; symbol: string; company_name: string }) => {
setLoading(true) setLoading(true)
try { try {
const report = await startAnalysis(result.market, result.symbol, result.company_name) // Use global model selection
const dataSource = dataSourcePrefs[result.market]
const report = await startAnalysis(result.market, result.symbol, result.company_name, selectedModel, dataSource)
router.push(`/analysis/${report.id}`) router.push(`/analysis/${report.id}`)
} catch (err: any) { } catch (err: any) {
setError(err.message || "启动分析失败") setError(err.message || "启动分析失败")
@ -43,12 +83,25 @@ export function SearchStock() {
} }
} }
// Dynamic Data Source Options (Mocking availability)
const dataSourceOptions: Record<string, string[]> = {
'CN': ['Tushare'],
'HK': ['iFinD'],
'US': ['Alpha Vantage', 'iFinD'],
'JP': ['iFinD'],
'VN': ['iFinD']
}
return ( return (
<Card className="w-full max-w-2xl"> <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 w-full max-w-6xl">
{/* Left Column: Search & Results */}
<div className="lg:col-span-2 space-y-6">
<Card className="h-full flex flex-col">
<CardHeader> <CardHeader>
<CardTitle></CardTitle> <CardTitle></CardTitle>
<CardDescription></CardDescription>
</CardHeader> </CardHeader>
<CardContent className="space-y-4"> <CardContent className="space-y-4 flex-grow">
<div className="flex gap-2"> <div className="flex gap-2">
<Input <Input
placeholder="输入公司名称例如腾讯或代码例如700" placeholder="输入公司名称例如腾讯或代码例如700"
@ -56,27 +109,48 @@ export function SearchStock() {
onChange={(e) => setQuery(e.target.value)} onChange={(e) => setQuery(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleSearch()} onKeyDown={(e) => e.key === "Enter" && handleSearch()}
/> />
<Button onClick={handleSearch} disabled={loading}> <Button onClick={handleSearch} disabled={loading} size="default" className="px-4">
{loading ? <Loader2 className="animate-spin" /> : <Search />} {loading ? <Loader2 className="animate-spin" /> : <Search className="h-4 w-4" />}
</Button> </Button>
</div> </div>
{error && <div className="text-red-500 text-sm">{error}</div>} {error && <div className="text-red-500 text-sm">{error}</div>}
{results.length > 0 && ( {results.length > 0 && (
<div className="space-y-2"> <div className="mt-6 space-y-3">
{results.length > 1 && ( <div className="text-sm font-medium text-muted-foreground flex items-center justify-between">
<div className="text-sm text-muted-foreground"> {results.length} </div> <span> {results.length} </span>
)} <span className="text-xs"></span>
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
{results.map((result, index) => (
<div key={index} className="bg-muted p-3 rounded-md space-y-2 border hover:border-primary transition-colors">
<div className="font-medium">{result.company_name}</div>
<div className="text-xs text-muted-foreground">
{result.market} | {result.symbol}
</div> </div>
<Button onClick={() => handleAnalyze(result)} disabled={loading} className="w-full" size="sm"> <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
{loading ? "正在启动分析..." : "运行分析"} {results.map((result, index) => (
<div
key={index}
className={`group relative flex flex-col justify-between p-4 rounded-lg border transition-all cursor-pointer shadow-sm hover:shadow active:scale-95 duration-200 ${activeIndex === index ? 'border-primary bg-primary/5 ring-1 ring-primary' : 'bg-card hover:bg-accent hover:border-primary/50'}`}
onClick={() => setActiveIndex(index)}
>
<div className="space-y-2 mb-4">
<div className="font-semibold text-base line-clamp-2 leading-tight" title={result.company_name}>
{result.company_name}
</div>
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<Badge variant="secondary" className="px-1.5 py-0 text-[10px] h-5">{result.market}</Badge>
<span className="font-mono">{result.symbol}</span>
</div>
</div>
<Button
onClick={(e) => {
e.stopPropagation();
handleAnalyze(result);
}}
disabled={loading}
size="sm"
variant={activeIndex === index ? "default" : "secondary"}
className="w-full mt-auto"
>
{loading ? <Loader2 className="animate-spin h-3 w-3 mr-2" /> : null}
</Button> </Button>
</div> </div>
))} ))}
@ -85,5 +159,95 @@ export function SearchStock() {
)} )}
</CardContent> </CardContent>
</Card> </Card>
</div>
{/* Right Column: Configuration */}
<div className="lg:col-span-1 space-y-6">
<Card className="h-full border-dashed shadow-sm bg-muted/30 flex flex-col">
<CardHeader>
<CardTitle className="flex items-center gap-2 text-base">
<Bot className="h-5 w-5 text-primary" />
</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Section 1: AI Model */}
<div className="space-y-3">
<Label className="text-sm font-medium"> AI </Label>
<Select
value={selectedModel}
onValueChange={setSelectedModel}
>
<SelectTrigger className="w-full bg-background">
<SelectValue placeholder="Select model" />
</SelectTrigger>
<SelectContent>
<SelectItem value="gemini-2.0-flash">Gemini 2.0 Flash</SelectItem>
<SelectItem value="gemini-2.5-flash">Gemini 2.5 Flash</SelectItem>
<SelectItem value="gemini-3-flash-preview">Gemini 3 Flash Preview</SelectItem>
<SelectItem value="gemini-3-pro-preview">Gemini 3 Pro Preview</SelectItem>
{/* If the current selected model is custom and not in the list above, show it */}
{selectedModel &&
!["gemini-2.0-flash", "gemini-2.5-flash", "gemini-3-flash-preview", "gemini-3-pro-preview"].includes(selectedModel) && (
<SelectItem value={selectedModel}>{selectedModel} (Custom)</SelectItem>
)}
</SelectContent>
</Select>
<p className="text-xs text-muted-foreground">
</p>
</div>
{/* Section 2: Data Sources - Only show when results are available and for relevant markets */}
{results.length > 0 && (
<>
<div className="h-[1px] bg-border w-full my-4" />
<div className="space-y-3">
<div className="flex items-center gap-2">
<Database className="h-4 w-4 text-primary" />
<Label className="text-sm font-medium"></Label>
</div>
<div className="space-y-4">
{(activeIndex !== null && results[activeIndex] ? [results[activeIndex].market] : Array.from(new Set(results.map(r => r.market)))).map((market) => (
<div key={market} className="space-y-2 animate-in fade-in slide-in-from-right-4 duration-300">
<div className="flex items-center gap-2">
<Badge variant="outline" className="h-5 px-1.5 text-[10px] uppercase">{market}</Badge>
<span className="text-xs text-muted-foreground">:</span>
</div>
<div className="grid grid-cols-2 gap-2">
{(dataSourceOptions[market] || ['Default']).map((opt) => {
const isSelected = dataSourcePrefs[market] === opt;
return (
<div
key={opt}
onClick={() => setDataSourcePrefs(prev => ({ ...prev, [market]: opt }))}
className={`
cursor-pointer relative flex flex-col items-center justify-center p-2 rounded-md border text-xs font-medium transition-all
${isSelected
? 'border-primary bg-primary/10 text-primary ring-1 ring-primary/20'
: 'bg-background hover:bg-accent hover:border-primary/30 text-muted-foreground'}
`}
>
{opt}
{isSelected && <div className="absolute top-1 right-1 w-1.5 h-1.5 rounded-full bg-primary" />}
</div>
)
})}
</div>
</div>
))}
</div>
<p className="text-xs text-muted-foreground mt-2">
</p>
</div>
</>
)}
</CardContent>
</Card>
</div>
</div>
) )
} }

View File

@ -10,11 +10,11 @@ export async function searchStock(query: string) {
return res.json() as Promise<{ market: string; symbol: string; company_name: string }[]>; return res.json() as Promise<{ market: string; symbol: string; company_name: string }[]>;
} }
export async function startAnalysis(market: string, symbol: string, company_name: string) { export async function startAnalysis(market: string, symbol: string, company_name: string, model?: string, data_source?: string) {
const res = await fetch(`${API_BASE}/analyze`, { const res = await fetch(`${API_BASE}/analyze`, {
method: "POST", method: "POST",
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
body: JSON.stringify({ market, symbol, company_name }), body: JSON.stringify({ market, symbol, company_name, model, data_source }),
}); });
if (!res.ok) { if (!res.ok) {
const error = await res.json(); const error = await res.json();

View File

@ -9,14 +9,14 @@ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
# from strategies.hk_strategy import HK_Strategy # from strategies.hk_strategy import HK_Strategy
# from strategies.jp_strategy import JP_Strategy # from strategies.jp_strategy import JP_Strategy
def get_strategy(market, stock_code, tushare_token=None, av_key=None): def get_strategy(market, stock_code, tushare_token=None, av_key=None, data_source=None):
market = market.upper() market = market.upper()
if market == 'CN': if market == 'CN':
from strategies.cn_strategy import CN_Strategy from strategies.cn_strategy import CN_Strategy
return CN_Strategy(stock_code, tushare_token) return CN_Strategy(stock_code, tushare_token)
elif market == 'US': elif market == 'US':
from strategies.us_strategy import US_Strategy from strategies.us_strategy import US_Strategy
return US_Strategy(stock_code, av_key) return US_Strategy(stock_code, av_key, data_source)
elif market == 'HK': elif market == 'HK':
from strategies.hk_strategy import HK_Strategy from strategies.hk_strategy import HK_Strategy
ifind_token = os.getenv('IFIND_REFRESH_TOKEN') ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
@ -37,10 +37,19 @@ def main():
tushare_token = os.getenv('TUSHARE_TOKEN') tushare_token = os.getenv('TUSHARE_TOKEN')
av_key = os.getenv('ALPHA_VANTAGE_KEY') av_key = os.getenv('ALPHA_VANTAGE_KEY')
if len(sys.argv) > 2: import argparse
market = sys.argv[1] parser = argparse.ArgumentParser(description='Run Stock Analysis Data Fetcher')
symbol = sys.argv[2] parser.add_argument('market', help='Market (CN, US, HK, JP, VN)')
strategy = get_strategy(market, symbol, tushare_token, av_key) parser.add_argument('symbol', help='Stock Symbol')
parser.add_argument('--data-source', help='Data Source Preference', default=None)
if len(sys.argv) > 1:
args = parser.parse_args()
market = args.market
symbol = args.symbol
data_source = args.data_source
strategy = get_strategy(market, symbol, tushare_token, av_key, data_source)
strategy.execute() strategy.execute()
else: else:
print("Usage: python run_fetcher.py <MARKET> <SYMBOL>") print("Usage: python run_fetcher.py <MARKET> <SYMBOL>")

BIN
server.log Normal file

Binary file not shown.

View File

@ -0,0 +1,172 @@
import requests
import pandas as pd
import time
from storage.file_io import DataStorage
class AlphaVantageUsClient:
BASE_URL = "https://www.alphavantage.co/query"
def __init__(self, api_key: str):
self.api_key = api_key
self.storage = DataStorage()
def _save_raw_data(self, data, symbol: str, name: str):
if data is None:
return
df = pd.DataFrame()
if isinstance(data, list):
df = pd.DataFrame(data)
elif isinstance(data, dict):
# For single-record JSON objects, convert to a DataFrame
df = pd.DataFrame([data])
if not df.empty:
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
params = {
"function": function,
"symbol": symbol,
"apikey": self.api_key
}
try:
time.sleep(15)
response = requests.get(self.BASE_URL, params=params)
data = response.json()
except Exception as e:
print(f"Error requesting {function}: {e}")
return pd.DataFrame()
if data:
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
df_annual = pd.DataFrame()
if "annualReports" in data and data["annualReports"]:
df_annual = pd.DataFrame(data["annualReports"])
if "fiscalDateEnding" in df_annual.columns:
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
df_annual = df_annual.head(5)
else:
print(f"Error fetching {function} for {symbol}: {data}")
return pd.DataFrame()
return df_annual
def get_market_metrics(self, symbol: str) -> dict:
# 1. Get Overview for PE, PB, MarketCap, Employees
overview_data = {}
try:
time.sleep(15)
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
r = requests.get(self.BASE_URL, params=params)
overview_data = r.json()
# Clean up 'None' strings from API response before processing
if isinstance(overview_data, dict):
for key, value in overview_data.items():
if value == 'None':
overview_data[key] = None
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
except Exception as e:
print(f"Error fetching OVERVIEW for {symbol}: {e}")
market_cap = float(overview_data.get("MarketCapitalization") or 0)
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
price = 0
if shares_outstanding > 0:
price = market_cap / shares_outstanding
return {
"price": price,
"name": overview_data.get("Name"),
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
"dividend_yield": float(overview_data.get("DividendYield") or 0),
"market_cap": market_cap,
"pe": float(overview_data.get("PERatio") or 0),
"pb": float(overview_data.get("PriceToBookRatio") or 0),
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
"total_share_holders": 0 # Not typically provided in basic AV Overview
}
def get_income_statement(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("INCOME_STATEMENT", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalRevenue": "revenue",
"netIncome": "net_income",
"grossProfit": "gross_profit",
"costOfRevenue": "cogs",
"researchAndDevelopment": "rd_exp",
"sellingGeneralAndAdministrative": "sga_exp",
"interestExpense": "fin_exp",
"incomeBeforeTax": "total_profit",
"incomeTaxExpense": "income_tax",
"ebit": "ebit"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
"fin_exp", "total_profit", "income_tax", "ebit",
"depreciation", "depreciationAndAmortization"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("BALANCE_SHEET", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalShareholderEquity": "total_equity",
"totalLiabilities": "total_liabilities",
"totalCurrentAssets": "current_assets",
"totalCurrentLiabilities": "current_liabilities",
"cashAndCashEquivalentsAtCarryingValue": "cash",
"currentNetReceivables": "receivables",
"inventory": "inventory",
"propertyPlantEquipment": "fixed_assets",
"totalAssets": "total_assets",
"goodwill": "goodwill",
"longTermInvestments": "lt_invest",
"shortTermDebt": "short_term_debt",
"currentLongTermDebt": "short_term_debt_part",
"longTermDebt": "long_term_debt",
"currentAccountsPayable": "accounts_payable",
"otherCurrentAssets": "prepayment",
"otherNonCurrentAssets": "other_assets",
"deferredRevenue": "adv_receipts"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("CASH_FLOW", symbol)
cols_map = {
"fiscalDateEnding": "date",
"operatingCashflow": "ocf",
"capitalExpenditures": "capex",
"dividendPayout": "dividends",
"depreciationDepletionAndAmortization": "depreciation"
}
df = df.rename(columns=cols_map)
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df

View File

@ -1,243 +1,39 @@
import tushare as ts
import pandas as pd import pandas as pd
from .base import DataFetcher from .base import DataFetcher
import time
from storage.file_io import DataStorage
class CnFetcher(DataFetcher): class CnFetcher(DataFetcher):
def __init__(self, api_key: str): def __init__(self, api_key: str, data_source: str = 'Tushare'):
super().__init__(api_key) super().__init__(api_key)
ts.set_token(self.api_key) self.data_source = data_source
self.pro = ts.pro_api()
self.storage = DataStorage()
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str): if self.data_source == 'Tushare':
if df is None or df.empty: from .tushare_cn_client import TushareCnClient
return self.client = TushareCnClient(api_key)
market = 'CN' else:
self.storage.save_data(df, market, symbol, f"raw_{name}") # Default to Tushare if unknown, or raise error.
# For robustness, we can default to Tushare or handle Akshare later.
def _get_ts_code(self, symbol: str) -> str: if self.data_source == 'Akshare':
return symbol raise NotImplementedError("Akshare client not yet implemented")
from .tushare_cn_client import TushareCnClient
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame: self.client = TushareCnClient(api_key)
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def get_income_statement(self, symbol: str) -> pd.DataFrame: def get_income_statement(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_income_statement(symbol)
df = self.pro.income(ts_code=ts_code)
self._save_raw_data(df, ts_code, "income_statement")
rename_map = {
'end_date': 'date',
'revenue': 'revenue',
'n_income_attr_p': 'net_income'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame: def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_balance_sheet(symbol)
df = self.pro.balancesheet(ts_code=ts_code)
self._save_raw_data(df, ts_code, "balance_sheet")
rename_map = {
'end_date': 'date',
'total_hldr_eqy_exc_min_int': 'total_equity',
'total_liab': 'total_liabilities',
'total_cur_assets': 'current_assets',
'total_cur_liab': 'current_liabilities'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame: def get_cash_flow(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_cash_flow(symbol)
df = self.pro.cashflow(ts_code=ts_code)
self._save_raw_data(df, ts_code, "cash_flow")
df = self._filter_data(df)
df = df.rename(columns={
'end_date': 'date',
'n_cashflow_act': 'net_cash_flow',
'depr_fa_coga_dpba': 'depreciation'
})
return df
def get_market_metrics(self, symbol: str) -> dict: def get_market_metrics(self, symbol: str) -> dict:
ts_code = self._get_ts_code(symbol) return self.client.get_market_metrics(symbol)
metrics = {
"price": 0.0,
"market_cap": 0.0,
"pe": 0.0,
"pb": 0.0,
"total_share_holders": 0,
"employee_count": 0
}
try:
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
if not df_daily.empty:
row = df_daily.iloc[0]
metrics["price"] = row.get('close', 0.0)
metrics["pe"] = row.get('pe', 0.0)
metrics["pb"] = row.get('pb', 0.0)
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
if not df_basic.empty:
metrics['name'] = df_basic.iloc[0]['name']
metrics['list_date'] = df_basic.iloc[0]['list_date']
df_comp = self.pro.stock_company(ts_code=ts_code)
if not df_comp.empty:
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
if not df_holder.empty:
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
except Exception as e:
print(f"Error fetching market metrics for {symbol}: {e}")
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_historical_metrics(symbol, dates)
results = []
if not dates:
return pd.DataFrame()
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
try:
import datetime
min_date = min(unique_dates)
max_date = max(unique_dates)
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
if not df_daily.empty:
df_daily = df_daily.sort_values('trade_date', ascending=False)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
if not df_holder.empty:
df_holder = df_holder.sort_values('end_date', ascending=False)
for date_str in unique_dates:
metrics = {'date_str': date_str}
if not df_daily.empty:
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
if not closest_daily.empty:
row = closest_daily.iloc[0]
metrics['Price'] = row.get('close')
metrics['PE'] = row.get('pe')
metrics['PB'] = row.get('pb')
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
if not df_holder.empty:
closest_holder = df_holder[df_holder['end_date'] <= date_str]
if not closest_holder.empty:
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
results.append(metrics)
except Exception as e:
print(f"Error fetching historical metrics for {symbol}: {e}")
return pd.DataFrame(results)
def get_dividends(self, symbol: str) -> pd.DataFrame: def get_dividends(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_dividends(symbol)
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
self._save_raw_data(df_div, ts_code, "dividends_raw")
if df_div.empty:
return pd.DataFrame()
# Filter for implemented cash dividends
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
if df_div.empty:
return pd.DataFrame()
df_div['total_cash_div'] = 0.0
# Get total shares for each ex_date
for index, row in df_div.iterrows():
ex_date = row['ex_date']
if not ex_date or pd.isna(ex_date):
continue
try:
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
if not df_daily.empty and not df_daily['total_share'].empty:
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
cash_div_per_share = row['cash_div'] # This is per-share
# Total dividend in Yuan
total_cash_dividend = (cash_div_per_share * total_share * 10000)
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
except Exception as e:
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
return dividends_by_year[['date_str', 'dividends']]
def get_repurchases(self, symbol: str) -> pd.DataFrame: def get_repurchases(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol) return self.client.get_repurchases(symbol)
df = self.pro.repurchase(ts_code=ts_code)
self._save_raw_data(df, ts_code, "repurchases")
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
return pd.DataFrame()
# Filter for repurchases with a valid amount
df = df[df['amount'] > 0]
if df.empty:
return pd.DataFrame()
# Extract year and group by it
df['year'] = pd.to_datetime(df['ann_date']).dt.year
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
# Create date_str for merging (YYYY1231)
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
# Rename for merging.
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
return repurchases_by_year[['date_str', 'repurchases']]

View File

@ -1,6 +1,6 @@
class FetcherFactory: class FetcherFactory:
@staticmethod @staticmethod
def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, **kwargs): def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, data_source: str = None, **kwargs):
from .base import DataFetcher from .base import DataFetcher
market = market.upper() market = market.upper()
if market == 'CN': if market == 'CN':
@ -18,10 +18,24 @@ class FetcherFactory:
from .hk_fetcher import HkFetcher from .hk_fetcher import HkFetcher
return HkFetcher(ifind_token) return HkFetcher(ifind_token)
elif market == 'US': elif market == 'US':
# Default to Alpha Vantage if not specified or explicit
if data_source == 'iFinD':
ifind_token = kwargs.get('ifind_refresh_token')
if not ifind_token:
import os
ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
if not ifind_token:
# Fallback or error? Let's error if specifically requested
raise ValueError("iFinD Refresh Token is required for US market when iFinD is selected")
from .us_fetcher import UsFetcher
# We need to update UsFetcher to accept data_source or handle it internally
# For now, let's assume UsFetcher handles switching if we pass data_source
return UsFetcher(ifind_token, data_source='iFinD')
if not av_key: if not av_key:
raise ValueError("Alpha Vantage key is required for US market") raise ValueError("Alpha Vantage key is required for US market")
from .us_fetcher import UsFetcher from .us_fetcher import UsFetcher
return UsFetcher(av_key) return UsFetcher(av_key, data_source='Alpha Vantage')
elif market == 'JP': elif market == 'JP':
ifind_token = kwargs.get('ifind_refresh_token') or kwargs.get('jquants_refresh_token') ifind_token = kwargs.get('ifind_refresh_token') or kwargs.get('jquants_refresh_token')
if not ifind_token: if not ifind_token:

View File

@ -1,746 +1,61 @@
import pandas as pd import pandas as pd
import time
from .base import DataFetcher from .base import DataFetcher
from .ifind_client import IFindClient from .ifind_hk_client import IFindHKClient
from storage.file_io import DataStorage from storage.file_io import DataStorage
class HkFetcher(DataFetcher): class HkFetcher(DataFetcher):
def __init__(self, api_key: str): def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token # api_key is the iFinD Refresh Token
super().__init__(api_key) super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key) self.data_source = 'iFinD'
self.client = IFindHKClient(api_key)
self.storage = DataStorage() self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str: def _get_ifind_code(self, symbol: str) -> str:
"""保持逻辑一致性,如果是纯数字则补齐后缀 .HK否则直接传""" # HK stock codes are 4-5 digits, often 0 padded to 5 or 4 in other systems
# Strip .HK suffix if present to handle input like '00700.HK' # iFinD usually expects 4 digits like '0700.HK', '0005.HK'
clean_symbol = symbol.replace('.HK', '').replace('.hk', '') # Input symbol might be '700', '0700', '5', '0005'
if symbol.isdigit():
if clean_symbol.isdigit(): padded = symbol.zfill(4)
# Force 4 digits for HK (e.g., 700 -> 0700.HK, 00700 -> 0700.HK) return f"{padded}.HK"
# e.g. 01651 -> 1651.HK
code_int = int(clean_symbol)
normalized_code = str(code_int).zfill(4)
return f"{normalized_code}.HK"
return symbol return symbol
def _fetch_basic_info(self, symbol: str) -> dict: def _fetch_basic_info(self, symbol: str):
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if code in self._basic_info_cache: return self.client._fetch_basic_info(symbol, code)
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # 默认 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
info["acc_date"] = str(row.get("accounting_date", "1231"))
info["accounting_date"] = "1231"
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, 'HK', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
# Default to 0 if not present (for lenient mocking) or check properly
error_code = res.get("errorcode", 0)
if error_code != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
if comparable_record.empty:
dfs_to_concat = [latest_record, df]
else:
dfs_to_concat = [latest_record, comparable_record, df]
# HK typically has 1231 or 0331 or 0630 etc. but annual is annual.
combined = pd.concat(dfs_to_concat) # Include all for now and dedup
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
"""通用获取历年财务数据 (HKD 为主,但 iFinD 支持转 CNY)"""
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
# 1. First, determine the most recent valid year by trying backwards from current year
last_valid_year = None
# Try up to 3 years back to find the latest available report
# e.g., in Jan 2026, try 2026 -> fail, 2025 -> success
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
# Use the first indicator to test availability
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# Check for non-null values
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
# Fallback to current year if nothing found (will likely return empty/zeros, but keeps logic flowing)
last_valid_year = current_year
# 2. Fetch 5 years starting from the last valid year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}1231"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
if not df[valid_cols].isnull().all().all():
df['end_date'] = target_date
df = df.dropna(axis=1, how='all')
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str) -> pd.DataFrame: def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "total_oi"}, return self.client.get_income_statement(symbol, code)
{"indicator": "prime_oi"},
{"indicator": "other_oi"},
{"indicator": "operating_cost"},
{"indicator": "operating_expense"},
{"indicator": "operating_fee"},
{"indicator": "p_depreciation_and_amortization"},
{"indicator": "gross_profit"},
{"indicator": "sales_ad_and_ga"},
{"indicator": "rad_cost"},
{"indicator": "sales_fee"},
{"indicator": "financial_expense"},
{"indicator": "sales_income"},
{"indicator": "sales_cost"},
{"indicator": "other_income"},
{"indicator": "manage_fee"},
{"indicator": "deprec_and_amorti"},
{"indicator": "total_other_opearting_expense"},
{"indicator": "p_total_cost"},
{"indicator": "operating_profit"},
{"indicator": "total_gal"},
{"indicator": "interest_income"},
{"indicator": "interest_net_pay"},
{"indicator": "interest_expense"},
{"indicator": "income_from_asso_and_joint"},
{"indicator": "other_gal_effct_profit_pre_tax"},
{"indicator": "conti_op_before_tax"},
{"indicator": "profit_before_noncurrent_items"},
{"indicator": "profit_and_loss_of_noncurrent_items"},
{"indicator": "profit_before_tax"},
{"indicator": "income_tax"},
{"indicator": "profit_after_tax"},
{"indicator": "minoritygal"},
{"indicator": "continue_operate_net_profit"},
{"indicator": "noncontinue_operate_net_profit"},
{"indicator": "other_special_items"},
{"indicator": "ni_attr_to_cs"},
{"indicator": "np_atms"},
{"indicator": "preferred_divid_and_other_adjust"},
{"indicator": "oci"},
{"indicator": "total_oci"},
{"indicator": "oci_from_parent"},
{"indicator": "oci_from_minority"},
{"indicator": "invest_property_fv_chg"},
{"indicator": "operating_amt"},
{"indicator": "oi_si"},
{"indicator": "operating_premium_profit_si"},
{"indicator": "to_toallied_corp_perf"},
{"indicator": "to_joint_control_entity_perf"},
{"indicator": "pre_tax_profit_si"},
{"indicator": "after_tax_profit_si"},
{"indicator": "profit_attrbt_to_nonholders"},
{"indicator": "total_income_atncs"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'total_oi': 'revenue',
'operating_amt': 'turnover', # Backup for revenue
'gross_profit': 'gross_profit',
'sales_ad_and_ga': 'sga_exp',
'sales_fee': 'selling_marketing_exp',
'manage_fee': 'ga_exp',
'rad_cost': 'rd_exp',
'income_tax': 'income_tax',
'ni_attr_to_cs': 'net_income',
'operating_profit': 'operating_profit',
'depreciation': 'depreciation',
'deprec_and_amorti': 'depreciation', # Backup
'p_depreciation_and_amortization': 'depreciation' # Another backup
}
df_filtered = df.rename(columns=rename_map)
# Calculate EBIT if not present but operating_profit is there
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
# Simple approximation: Operating Profit is often used as EBIT
df_filtered['ebit'] = df_filtered['operating_profit']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame: def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "cce"}, return self.client.get_balance_sheet(symbol, code)
{"indicator": "st_investment"},
{"indicator": "total_cash"},
{"indicator": "account_receivable"},
{"indicator": "tradable_fnncl_asset"},
{"indicator": "derivative_fnncl_assets"},
{"indicator": "restriv_fund"},
{"indicator": "other_short_term_investment"},
{"indicator": "ar_nr"},
{"indicator": "total_ar"},
{"indicator": "or"},
{"indicator": "inventory"},
{"indicator": "flow_assets_dit"},
{"indicator": "pre_payment"},
{"indicator": "other_cunrrent_assets_si"},
{"indicator": "other_ca"},
{"indicator": "total_ca"},
{"indicator": "receivables_from_allied_corp"},
{"indicator": "current_assets_si"},
{"indicator": "prepay_deposits_etc"},
{"indicator": "receivables_from_jce"},
{"indicator": "receivables_from_ac"},
{"indicator": "recoverable_tax"},
{"indicator": "total_fixed_assets"},
{"indicator": "depreciation"},
{"indicator": "equity_and_lt_invest"},
{"indicator": "net_fixed_assets"},
{"indicator": "invest_property"},
{"indicator": "equity_investment"},
{"indicator": "investment_in_associate"},
{"indicator": "investment_in_joints"},
{"indicator": "held_to_maturity_invest"},
{"indicator": "goodwill_and_intangible_asset"},
{"indicator": "intangible_assets"},
{"indicator": "accum_amortized"},
{"indicator": "noncurrent_assets_dit"},
{"indicator": "other_noncurrent_assets_si"},
{"indicator": "dt_assets"},
{"indicator": "total_noncurrent_assets"},
{"indicator": "total_assets"},
{"indicator": "ac_equity"},
{"indicator": "lease_prepay"},
{"indicator": "noncurrent_assets_si"},
{"indicator": "st_lt_current_loan"},
{"indicator": "trade_financial_lia"},
{"indicator": "derivative_financial_lia"},
{"indicator": "ap_np"},
{"indicator": "accounts_payable"},
{"indicator": "advance_payment"},
{"indicator": "st_debt"},
{"indicator": "contra_liab"},
{"indicator": "tax_payable"},
{"indicator": "accrued_liab"},
{"indicator": "flow_debt_deferred_income"},
{"indicator": "other_cl"},
{"indicator": "other_cunrrent_liab_si"},
{"indicator": "total_cl"},
{"indicator": "accrued_expenses_etc"},
{"indicator": "money_payable_toac"},
{"indicator": "joint_control_entity_payable"},
{"indicator": "payable_to_associated_corp"},
{"indicator": "lt_debt"},
{"indicator": "long_term_loan"},
{"indicator": "other_noncurrent_liabi"},
{"indicator": "deferred_tax_liability"},
{"indicator": "ncl_deferred_income"},
{"indicator": "other_noncurrent_liab_si"},
{"indicator": "noncurrent_liab_si"},
{"indicator": "total_noncurrent_liab"},
{"indicator": "total_liab"},
{"indicator": "common_shares"},
{"indicator": "capital_reserve"},
{"indicator": "equity_premium"},
{"indicator": "treasury_stock"},
{"indicator": "accumgal"},
{"indicator": "equity_atsopc_sbi"},
{"indicator": "preferred_stock"},
{"indicator": "perpetual_debt"},
{"indicator": "reserve"},
{"indicator": "other_reserves"},
{"indicator": "retained_earnings"},
{"indicator": "oci_bs"},
{"indicator": "total_common_equity"},
{"indicator": "equity_belong_to_parent"},
{"indicator": "minority_interests"},
{"indicator": "other_equity_si"},
{"indicator": "total_equity"},
{"indicator": "total_lib_and_equity"},
{"indicator": "equity_si"},
{"indicator": "equity_atncs"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cce': 'cash',
'ar_nr': 'receivables',
'inventory': 'inventory',
'net_fixed_assets': 'fixed_assets',
'equity_and_lt_invest': 'long_term_investments',
'goodwill_and_intangible_asset': 'goodwill',
'st_debt': 'short_term_debt',
'st_lt_current_loan': 'short_term_borrowings',
'ap_np': 'accounts_payable',
'contra_liab': 'contract_liabilities',
'advance_payment': 'advances_from_customers',
'flow_debt_deferred_income': 'deferred_revenue',
'lt_debt': 'long_term_debt',
'long_term_loan': 'long_term_borrowings',
'total_assets': 'total_assets',
'equity_belong_to_parent': 'total_equity',
'pre_payment': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
# Deduplicate columns just in case
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_liab' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_liab']
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
# Deduplicate again in case total_liabilities logic added a dupe (unlikely)
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str) -> pd.DataFrame: def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "ni"}, return self.client.get_cash_flow(symbol, code)
{"indicator": "depreciation_and_amortization"},
{"indicator": "operating_capital_change"},
{"indicator": "ncf_from_oa"},
{"indicator": "capital_cost"},
{"indicator": "invest_buy"},
{"indicator": "ncf_from_ia"},
{"indicator": "increase_in_share_capital"},
{"indicator": "decrease_in_share_capital"},
{"indicator": "total_dividends_paid"},
{"indicator": "ncf_from_fa"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'ncf_from_oa': 'ocf',
'capital_cost': 'capex',
'total_dividends_paid': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str) -> dict: def get_market_metrics(self, symbol: str) -> dict:
"""获取公司基本信息(通过 ths_*_stock 基础指标)"""
basic_info = self._fetch_basic_info(symbol)
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
return self.client.get_market_metrics(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", ""),
"accounting_date": basic_info.get("accounting_date", ""),
"acc_date": basic_info.get("acc_date", ""),
"price": 0,
"market_cap": 0,
"pe": 0,
"pb": 0,
"dividend_yield": 0
}
# Fetch current market data using ths_* indicators confirmed for HK
params = {
"codes": code,
"indipara": [
{"indicator": "ths_close_price_stock", "indiparams": []},
{"indicator": "ths_market_value_stock", "indiparams": []},
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
{"indicator": "ths_pb_stock", "indiparams": []},
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
row = df.iloc[0]
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame() return self.client.get_historical_metrics(symbol, code, dates)
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str) -> pd.DataFrame: def get_dividends(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y")) return self.client.get_dividends(symbol, code)
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}1231", # Assume yearend for dividends
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str) -> pd.DataFrame: def get_repurchases(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y")) return self.client.get_repurchases(symbol, code)
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-12-31"
end_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str) -> pd.DataFrame: def get_employee_count(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y")) return self.client.get_employee_count(symbol, code)
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str) -> pd.DataFrame: def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
"""获取官方计算的财务指标(比率、周转天数等)"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y")) return self.client.get_financial_ratios(symbol, code)
# 1. Determine the latest valid year
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
# Try getting ROE as a proxy for data availability
test_date = f"{test_year}1231"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
# 2. Fetch 5 years starting from last valid year
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}1231"
year_str = str(target_year)
indipara = []
# 1. 人均指标 (参数: Year, "100")
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
# 2. 财务比率与周转率 (参数: Date YYYYMMDD)
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
# Filter out columns that are all NaN
df = df.dropna(axis=1, how='all')
# Identify if we have meaningful data (at least one valid metric)
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)

View File

@ -0,0 +1,699 @@
import pandas as pd
import time
from .ifind_client import IFindClient
from storage.file_io import DataStorage
class IFindHKClient:
"""
iFinD Client specifically for Hong Kong Market.
Uses 'THS' indicators and Chinese accounting standard mappings often used for HK stocks in iFinD.
"""
def __init__(self, api_key: str):
self.cli = IFindClient(refresh_token=api_key)
self.storage = DataStorage()
self.market = 'HK'
self._basic_info_cache = {}
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
if not res:
return pd.DataFrame()
error_code = res.get("errorcode", 0)
if error_code != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
if comparable_record.empty:
dfs_to_concat = [latest_record, df]
else:
dfs_to_concat = [latest_record, comparable_record, df]
combined = pd.concat(dfs_to_concat)
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231",
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
# HK logic typically defaults to 1231, ignoring accounting_date output in HkFetcher
info["acc_date"] = str(row.get("accounting_date", "1231"))
info["accounting_date"] = "1231"
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}1231"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
if not df[valid_cols].isnull().all().all():
df['end_date'] = target_date
df = df.dropna(axis=1, how='all')
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "total_oi"},
{"indicator": "prime_oi"},
{"indicator": "other_oi"},
{"indicator": "operating_cost"},
{"indicator": "operating_expense"},
{"indicator": "operating_fee"},
{"indicator": "p_depreciation_and_amortization"},
{"indicator": "gross_profit"},
{"indicator": "sales_ad_and_ga"},
{"indicator": "rad_cost"},
{"indicator": "sales_fee"},
{"indicator": "financial_expense"},
{"indicator": "sales_income"},
{"indicator": "sales_cost"},
{"indicator": "other_income"},
{"indicator": "manage_fee"},
{"indicator": "deprec_and_amorti"},
{"indicator": "total_other_opearting_expense"},
{"indicator": "p_total_cost"},
{"indicator": "operating_profit"},
{"indicator": "total_gal"},
{"indicator": "interest_income"},
{"indicator": "interest_net_pay"},
{"indicator": "interest_expense"},
{"indicator": "income_from_asso_and_joint"},
{"indicator": "other_gal_effct_profit_pre_tax"},
{"indicator": "conti_op_before_tax"},
{"indicator": "profit_before_noncurrent_items"},
{"indicator": "profit_and_loss_of_noncurrent_items"},
{"indicator": "profit_before_tax"},
{"indicator": "income_tax"},
{"indicator": "profit_after_tax"},
{"indicator": "minoritygal"},
{"indicator": "continue_operate_net_profit"},
{"indicator": "noncontinue_operate_net_profit"},
{"indicator": "other_special_items"},
{"indicator": "ni_attr_to_cs"},
{"indicator": "np_atms"},
{"indicator": "preferred_divid_and_other_adjust"},
{"indicator": "oci"},
{"indicator": "total_oci"},
{"indicator": "oci_from_parent"},
{"indicator": "oci_from_minority"},
{"indicator": "invest_property_fv_chg"},
{"indicator": "operating_amt"},
{"indicator": "oi_si"},
{"indicator": "operating_premium_profit_si"},
{"indicator": "to_toallied_corp_perf"},
{"indicator": "to_joint_control_entity_perf"},
{"indicator": "pre_tax_profit_si"},
{"indicator": "after_tax_profit_si"},
{"indicator": "profit_attrbt_to_nonholders"},
{"indicator": "total_income_atncs"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'total_oi': 'revenue',
'operating_amt': 'turnover',
'gross_profit': 'gross_profit',
'sales_ad_and_ga': 'sga_exp',
'sales_fee': 'selling_marketing_exp',
'manage_fee': 'ga_exp',
'rad_cost': 'rd_exp',
'income_tax': 'income_tax',
'ni_attr_to_cs': 'net_income',
'operating_profit': 'operating_profit',
'depreciation': 'depreciation',
'deprec_and_amorti': 'depreciation',
'p_depreciation_and_amortization': 'depreciation'
}
df_filtered = df.rename(columns=rename_map)
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
df_filtered['ebit'] = df_filtered['operating_profit']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "cce"},
{"indicator": "st_investment"},
{"indicator": "total_cash"},
{"indicator": "account_receivable"},
{"indicator": "tradable_fnncl_asset"},
{"indicator": "derivative_fnncl_assets"},
{"indicator": "restriv_fund"},
{"indicator": "other_short_term_investment"},
{"indicator": "ar_nr"},
{"indicator": "total_ar"},
{"indicator": "or"},
{"indicator": "inventory"},
{"indicator": "flow_assets_dit"},
{"indicator": "pre_payment"},
{"indicator": "other_cunrrent_assets_si"},
{"indicator": "other_ca"},
{"indicator": "total_ca"},
{"indicator": "receivables_from_allied_corp"},
{"indicator": "current_assets_si"},
{"indicator": "prepay_deposits_etc"},
{"indicator": "receivables_from_jce"},
{"indicator": "receivables_from_ac"},
{"indicator": "recoverable_tax"},
{"indicator": "total_fixed_assets"},
{"indicator": "depreciation"},
{"indicator": "equity_and_lt_invest"},
{"indicator": "net_fixed_assets"},
{"indicator": "invest_property"},
{"indicator": "equity_investment"},
{"indicator": "investment_in_associate"},
{"indicator": "investment_in_joints"},
{"indicator": "held_to_maturity_invest"},
{"indicator": "goodwill_and_intangible_asset"},
{"indicator": "intangible_assets"},
{"indicator": "accum_amortized"},
{"indicator": "noncurrent_assets_dit"},
{"indicator": "other_noncurrent_assets_si"},
{"indicator": "dt_assets"},
{"indicator": "total_noncurrent_assets"},
{"indicator": "total_assets"},
{"indicator": "ac_equity"},
{"indicator": "lease_prepay"},
{"indicator": "noncurrent_assets_si"},
{"indicator": "st_lt_current_loan"},
{"indicator": "trade_financial_lia"},
{"indicator": "derivative_financial_lia"},
{"indicator": "ap_np"},
{"indicator": "accounts_payable"},
{"indicator": "advance_payment"},
{"indicator": "st_debt"},
{"indicator": "contra_liab"},
{"indicator": "tax_payable"},
{"indicator": "accrued_liab"},
{"indicator": "flow_debt_deferred_income"},
{"indicator": "other_cl"},
{"indicator": "other_cunrrent_liab_si"},
{"indicator": "total_cl"},
{"indicator": "accrued_expenses_etc"},
{"indicator": "money_payable_toac"},
{"indicator": "joint_control_entity_payable"},
{"indicator": "payable_to_associated_corp"},
{"indicator": "lt_debt"},
{"indicator": "long_term_loan"},
{"indicator": "other_noncurrent_liabi"},
{"indicator": "deferred_tax_liability"},
{"indicator": "ncl_deferred_income"},
{"indicator": "other_noncurrent_liab_si"},
{"indicator": "noncurrent_liab_si"},
{"indicator": "total_noncurrent_liab"},
{"indicator": "total_liab"},
{"indicator": "common_shares"},
{"indicator": "capital_reserve"},
{"indicator": "equity_premium"},
{"indicator": "treasury_stock"},
{"indicator": "accumgal"},
{"indicator": "equity_atsopc_sbi"},
{"indicator": "preferred_stock"},
{"indicator": "perpetual_debt"},
{"indicator": "reserve"},
{"indicator": "other_reserves"},
{"indicator": "retained_earnings"},
{"indicator": "oci_bs"},
{"indicator": "total_common_equity"},
{"indicator": "equity_belong_to_parent"},
{"indicator": "minority_interests"},
{"indicator": "other_equity_si"},
{"indicator": "total_equity"},
{"indicator": "total_lib_and_equity"},
{"indicator": "equity_si"},
{"indicator": "equity_atncs"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cce': 'cash',
'ar_nr': 'receivables',
'inventory': 'inventory',
'net_fixed_assets': 'fixed_assets',
'equity_and_lt_invest': 'long_term_investments',
'goodwill_and_intangible_asset': 'goodwill',
'st_debt': 'short_term_debt',
'st_lt_current_loan': 'short_term_borrowings',
'ap_np': 'accounts_payable',
'contra_liab': 'contract_liabilities',
'advance_payment': 'advances_from_customers',
'flow_debt_deferred_income': 'deferred_revenue',
'lt_debt': 'long_term_debt',
'long_term_loan': 'long_term_borrowings',
'total_assets': 'total_assets',
'equity_belong_to_parent': 'total_equity',
'pre_payment': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_liab' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_liab']
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "ni"},
{"indicator": "depreciation_and_amortization"},
{"indicator": "operating_capital_change"},
{"indicator": "ncf_from_oa"},
{"indicator": "capital_cost"},
{"indicator": "invest_buy"},
{"indicator": "ncf_from_ia"},
{"indicator": "increase_in_share_capital"},
{"indicator": "decrease_in_share_capital"},
{"indicator": "total_dividends_paid"},
{"indicator": "ncf_from_fa"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'ncf_from_oa': 'ocf',
'capital_cost': 'capex',
'total_dividends_paid': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str, code: str) -> dict:
basic_info = self._fetch_basic_info(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", ""),
"accounting_date": basic_info.get("accounting_date", ""),
"price": 0,
"market_cap": 0,
"pe": 0,
"pb": 0,
"dividend_yield": 0
}
params = {
"codes": code,
"indipara": [
{"indicator": "ths_close_price_stock", "indiparams": []},
{"indicator": "ths_market_value_stock", "indiparams": []},
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
{"indicator": "ths_pb_stock", "indiparams": []},
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
row = df.iloc[0]
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
return metrics
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}1231",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-12-31"
end_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
# 1. Determine the latest valid year
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}1231"
year_str = str(target_year)
indipara = []
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
df = df.dropna(axis=1, how='all')
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)

View File

@ -0,0 +1,556 @@
import pandas as pd
import time
from .ifind_client import IFindClient
from storage.file_io import DataStorage
class IFindIntClient:
"""
Generic iFinD Client for International Markets (JP, VN, US).
Uses 'OAS' (Original Accounting Standards?) or similar standardized indicators
typically available for international stocks in iFinD.
"""
def __init__(self, api_key: str, market: str):
self.cli = IFindClient(refresh_token=api_key)
self.storage = DataStorage()
self.market = market
self._basic_info_cache = {}
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# Try to include standard fiscal year ends or just all annuals?
# JP/VN/US usually have annual reports.
# iFinD often returns data for specific requested dates.
# We will keep it simple and just dedup.
# But for consistency with existing logic which concatenates records:
is_annual = df['end_date'].astype(str).str.endswith('1231') | df['end_date'].astype(str).str.endswith('0331')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231",
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
if acc_date:
info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
last_valid_year = None
# 1. Determine most recent valid year
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
df['end_date'] = target_date
all_dfs.append(df)
# Filter and concat
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "revenue_oas"},
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "cash_equi_short_term_inve_oas"},
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "net_cash_flows_from_oa_oas"},
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str, code: str) -> dict:
basic_info = self._fetch_basic_info(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
# Need MM-DD from acc_date (assuming MMDD or YYYYMMDD? acc_date is MMDD usually)
# However, basic_info sets default to "1231" or reads MMDD.
mm = acc_date[:2]
dd = acc_date[2:]
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
# Generic Implementation if available.
# JP fetcher has it, VN fetcher did not show it but might support it.
# We will implement it based on JP fetcher.
current_year = int(time.strftime("%Y"))
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}{acc_date}"
year_str = str(target_year)
indipara = []
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
df = df.dropna(axis=1, how='all')
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)

View File

@ -1,515 +1,60 @@
import pandas as pd import pandas as pd
import os
import time
from .base import DataFetcher from .base import DataFetcher
from .ifind_client import IFindClient from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage from storage.file_io import DataStorage
class JpFetcher(DataFetcher): class JpFetcher(DataFetcher):
def __init__(self, api_key: str): def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token # api_key is the iFinD Refresh Token
super().__init__(api_key) super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key) self.data_source = 'iFinD'
self.client = IFindIntClient(api_key, 'JP')
self.storage = DataStorage() self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str: def _get_ifind_code(self, symbol: str) -> str:
"""保持逻辑一致性,如果是纯数字则补齐后缀 .T否则直接传""" # Simple logic: if pure digits, append .T (Tokyo SE).
# Otherwise assume it's already a code or handled.
if symbol.isdigit(): if symbol.isdigit():
return f"{symbol}.T" return f"{symbol}.T"
return symbol return symbol
def _fetch_basic_info(self, symbol: str) -> dict: def _fetch_basic_info(self, symbol: str):
"""获取公司的基本信息:中文名称、会计年结日、上市日期""" # Delegate to client
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if code in self._basic_info_cache: return self.client._fetch_basic_info(symbol, code)
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # 默认 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
# accounting_date 通常返回类似 "03-31" 或 "1231"
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
# 好像是ifind的API有问题明明财报是0331但如果去读20240331就是空数据
# if acc_date:
# info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
# 如果是字典API 响应),直接保存
if isinstance(data, dict):
df = pd.DataFrame([data]) # 包装成单行 DF 或简单处理
else:
df = data
self.storage.save_data(df, 'JP', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
print("iFinD API Warning: No tables found in response.")
return pd.DataFrame()
# 提取第一个 table
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
# Ensure all values are lists to avoid pd.DataFrame ValueError with scalars
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
# If still no end_date, look for it in columns
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
# Handle YoY logic: YYYYMMDD -> (YYYY-1)MMDD
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# 对齐 CN 逻辑,日本公司虽然多是 0331 截止
is_annual = df['end_date'].astype(str).str.endswith('0331') | df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
"""通用获取历年会计年结日的财务数据 (CNY 结算)"""
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
# 1. First, determine the most recent valid year by trying backwards from current year
last_valid_year = None
# Try up to 3 years back to find the latest available report
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
# Use the first indicator to test availability
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# Check for non-null values
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
# 2. Fetch 5 years starting from the last valid year
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# 强制设置 end_date 以防 API 返回不一致
df['end_date'] = target_date
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
# Remove empty or Check for all-NA columns DataFrames (Fixing FutureWarning)
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str) -> pd.DataFrame: def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "revenue_oas"}, return self.client.get_income_statement(symbol, code)
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
# 数值转换
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame: def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "cash_equi_short_term_inve_oas"}, return self.client.get_balance_sheet(symbol, code)
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
# 如果没有负债合计,用资产减权益
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str) -> pd.DataFrame: def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "net_cash_flows_from_oa_oas"}, return self.client.get_cash_flow(symbol, code)
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str) -> dict: def get_market_metrics(self, symbol: str) -> dict:
"""获取公司基本信息(名称、上市日期等静态数据)""" code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_market_metrics(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
"""获取历史日期的收盘价和市值 (通过 cmd_history_quotation)"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame() return self.client.get_historical_metrics(symbol, code, dates)
results = []
# get_historical_metrics里面不要拿所有日期数据了而是一个一个数据拿
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
# print(f"iFinD API Request: endpoint=date_sequence, params={params}")
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
# 找到最接近该日期且不晚于该日期的记录
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str) -> pd.DataFrame: def get_dividends(self, symbol: str) -> pd.DataFrame:
"""获取历年年度累计分红记录 (逐年获取)"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_dividends(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
# 获取最近 5 年的数据
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str) -> pd.DataFrame: def get_repurchases(self, symbol: str) -> pd.DataFrame:
"""获取历年年度回购记录 (从 repur_num_new 获取)"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_repurchases(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
# 为了对应日期格式 YYYY-MM-DD
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
# 获取最近 5 年的数据
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str) -> pd.DataFrame: def get_employee_count(self, symbol: str) -> pd.DataFrame:
"""获取历年员工人数"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_employee_count(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
current_year = int(time.strftime("%Y")) def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
results = [] code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)
# 获取最近 5 年的数据
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp

View File

@ -0,0 +1,242 @@
import tushare as ts
import pandas as pd
from storage.file_io import DataStorage
class TushareCnClient:
def __init__(self, api_key: str):
ts.set_token(api_key)
self.pro = ts.pro_api()
self.storage = DataStorage()
self.api_key = api_key
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str):
if df is None or df.empty:
return
market = 'CN'
self.storage.save_data(df, market, symbol, f"raw_{name}")
def _get_ts_code(self, symbol: str) -> str:
return symbol
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def get_income_statement(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.income(ts_code=ts_code)
self._save_raw_data(df, ts_code, "income_statement")
rename_map = {
'end_date': 'date',
'revenue': 'revenue',
'n_income_attr_p': 'net_income'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.balancesheet(ts_code=ts_code)
self._save_raw_data(df, ts_code, "balance_sheet")
rename_map = {
'end_date': 'date',
'total_hldr_eqy_exc_min_int': 'total_equity',
'total_liab': 'total_liabilities',
'total_cur_assets': 'current_assets',
'total_cur_liab': 'current_liabilities'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.cashflow(ts_code=ts_code)
self._save_raw_data(df, ts_code, "cash_flow")
df = self._filter_data(df)
df = df.rename(columns={
'end_date': 'date',
'n_cashflow_act': 'net_cash_flow',
'depr_fa_coga_dpba': 'depreciation'
})
return df
def get_market_metrics(self, symbol: str) -> dict:
ts_code = self._get_ts_code(symbol)
metrics = {
"price": 0.0,
"market_cap": 0.0,
"pe": 0.0,
"pb": 0.0,
"total_share_holders": 0,
"employee_count": 0
}
try:
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
if not df_daily.empty:
row = df_daily.iloc[0]
metrics["price"] = row.get('close', 0.0)
metrics["pe"] = row.get('pe', 0.0)
metrics["pb"] = row.get('pb', 0.0)
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
if not df_basic.empty:
metrics['name'] = df_basic.iloc[0]['name']
metrics['list_date'] = df_basic.iloc[0]['list_date']
df_comp = self.pro.stock_company(ts_code=ts_code)
if not df_comp.empty:
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
if not df_holder.empty:
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
except Exception as e:
print(f"Error fetching market metrics for {symbol}: {e}")
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
results = []
if not dates:
return pd.DataFrame()
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
try:
import datetime
min_date = min(unique_dates)
max_date = max(unique_dates)
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
if not df_daily.empty:
df_daily = df_daily.sort_values('trade_date', ascending=False)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
if not df_holder.empty:
df_holder = df_holder.sort_values('end_date', ascending=False)
for date_str in unique_dates:
metrics = {'date_str': date_str}
if not df_daily.empty:
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
if not closest_daily.empty:
row = closest_daily.iloc[0]
metrics['Price'] = row.get('close')
metrics['PE'] = row.get('pe')
metrics['PB'] = row.get('pb')
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
if not df_holder.empty:
closest_holder = df_holder[df_holder['end_date'] <= date_str]
if not closest_holder.empty:
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
results.append(metrics)
except Exception as e:
print(f"Error fetching historical metrics for {symbol}: {e}")
return pd.DataFrame(results)
def get_dividends(self, symbol: str) -> pd.DataFrame:
import time
ts_code = self._get_ts_code(symbol)
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
self._save_raw_data(df_div, ts_code, "dividends_raw")
if df_div.empty:
return pd.DataFrame()
# Filter for implemented cash dividends
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
if df_div.empty:
return pd.DataFrame()
df_div['total_cash_div'] = 0.0
# Get total shares for each ex_date
for index, row in df_div.iterrows():
ex_date = row['ex_date']
if not ex_date or pd.isna(ex_date):
continue
try:
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
if not df_daily.empty and not df_daily['total_share'].empty:
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
cash_div_per_share = row['cash_div'] # This is per-share
# Total dividend in Yuan
total_cash_dividend = (cash_div_per_share * total_share * 10000)
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
except Exception as e:
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
return dividends_by_year[['date_str', 'dividends']]
def get_repurchases(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.repurchase(ts_code=ts_code)
self._save_raw_data(df, ts_code, "repurchases")
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
return pd.DataFrame()
# Filter for repurchases with a valid amount
df = df[df['amount'] > 0]
if df.empty:
return pd.DataFrame()
# Extract year and group by it
df['year'] = pd.to_datetime(df['ann_date']).dt.year
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
# Create date_str for merging (YYYY1231)
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
# Rename for merging.
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
return repurchases_by_year[['date_str', 'repurchases']]

View File

@ -1,182 +1,93 @@
import requests
import pandas as pd import pandas as pd
import time
from .base import DataFetcher from .base import DataFetcher
# Import clients
from .alphavantage_us_client import AlphaVantageUsClient
from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage from storage.file_io import DataStorage
class UsFetcher(DataFetcher): class UsFetcher(DataFetcher):
BASE_URL = "https://www.alphavantage.co/query" def __init__(self, api_key: str, data_source: str = 'Alpha Vantage'):
def __init__(self, api_key: str):
super().__init__(api_key) super().__init__(api_key)
self.data_source = data_source
self.storage = DataStorage() self.storage = DataStorage()
def _save_raw_data(self, data, symbol: str, name: str): if self.data_source == 'iFinD':
if data is None: self.client = IFindIntClient(api_key, 'US')
return
df = pd.DataFrame()
if isinstance(data, list):
df = pd.DataFrame(data)
elif isinstance(data, dict):
# For single-record JSON objects, convert to a DataFrame
df = pd.DataFrame([data])
if not df.empty:
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
params = {
"function": function,
"symbol": symbol,
"apikey": self.api_key
}
try:
time.sleep(15)
response = requests.get(self.BASE_URL, params=params)
data = response.json()
except Exception as e:
print(f"Error requesting {function}: {e}")
return pd.DataFrame()
if data:
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
df_annual = pd.DataFrame()
if "annualReports" in data and data["annualReports"]:
df_annual = pd.DataFrame(data["annualReports"])
if "fiscalDateEnding" in df_annual.columns:
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
# Dynamic year filtering: Find the latest report with valid data and take surrounding 5 years
# For Alpha Vantage, data is already sorted by date descending.
# We simply check for the first row with non-None values in critical columns if possible,
# but usually AV returns valid blocks. We'll just take the top 5.
# Unlike iFinD, AV returns a list of available reports, so we don't need to probe year by year.
# Keep top 5 latest entries
df_annual = df_annual.head(5)
else: else:
print(f"Error fetching {function} for {symbol}: {data}") self.client = AlphaVantageUsClient(api_key)
return pd.DataFrame()
return df_annual def _get_ifind_code(self, symbol: str) -> str:
# If using iFinD for US, what's the code?
# Often ticker is enough or ticker + suffix.
# Since IFindClient takes list of codes, maybe just 'AAPL'?
# Let's return symbol for now.
return symbol
def _save_raw_data(self, data, symbol: str, name: str):
# Only used if strictly needed by fetcher itself, but now client handles it.
# However, let's keep it for compatibility if something else calls it or legacy.
pass
def get_market_metrics(self, symbol: str) -> dict: def get_market_metrics(self, symbol: str) -> dict:
# 1. Get Overview for PE, PB, MarketCap, Employees if self.data_source == 'iFinD':
overview_data = {} code = self._get_ifind_code(symbol)
try: return self.client.get_market_metrics(symbol, code)
time.sleep(15) else:
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key} return self.client.get_market_metrics(symbol)
r = requests.get(self.BASE_URL, params=params)
overview_data = r.json()
# Clean up 'None' strings from API response before processing
if isinstance(overview_data, dict):
for key, value in overview_data.items():
if value == 'None':
overview_data[key] = None
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
except Exception as e:
print(f"Error fetching OVERVIEW for {symbol}: {e}")
market_cap = float(overview_data.get("MarketCapitalization") or 0)
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
price = 0
if shares_outstanding > 0:
price = market_cap / shares_outstanding
return {
"price": price,
"name": overview_data.get("Name"),
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
"dividend_yield": float(overview_data.get("DividendYield") or 0),
"market_cap": market_cap,
"pe": float(overview_data.get("PERatio") or 0),
"pb": float(overview_data.get("PriceToBookRatio") or 0),
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
"total_share_holders": 0 # Not typically provided in basic AV Overview
}
def get_income_statement(self, symbol: str) -> pd.DataFrame: def get_income_statement(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("INCOME_STATEMENT", symbol) if self.data_source == 'iFinD':
cols_map = { code = self._get_ifind_code(symbol)
"fiscalDateEnding": "date", return self.client.get_income_statement(symbol, code)
"totalRevenue": "revenue", else:
"netIncome": "net_income", return self.client.get_income_statement(symbol)
"grossProfit": "gross_profit",
"costOfRevenue": "cogs",
"researchAndDevelopment": "rd_exp",
"sellingGeneralAndAdministrative": "sga_exp",
"interestExpense": "fin_exp",
"incomeBeforeTax": "total_profit",
"incomeTaxExpense": "income_tax",
"ebit": "ebit"
}
df = df.rename(columns=cols_map)
# Convert numeric columns for analysis, keep others as is
numeric_cols = [
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
"fin_exp", "total_profit", "income_tax", "ebit",
"depreciation", "depreciationAndAmortization"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame: def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("BALANCE_SHEET", symbol) if self.data_source == 'iFinD':
cols_map = { code = self._get_ifind_code(symbol)
"fiscalDateEnding": "date", return self.client.get_balance_sheet(symbol, code)
"totalShareholderEquity": "total_equity", else:
"totalLiabilities": "total_liabilities", return self.client.get_balance_sheet(symbol)
"totalCurrentAssets": "current_assets",
"totalCurrentLiabilities": "current_liabilities",
"cashAndCashEquivalentsAtCarryingValue": "cash",
"currentNetReceivables": "receivables",
"inventory": "inventory",
"propertyPlantEquipment": "fixed_assets",
"totalAssets": "total_assets",
"goodwill": "goodwill",
"longTermInvestments": "lt_invest",
"shortTermDebt": "short_term_debt",
"currentLongTermDebt": "short_term_debt_part",
"longTermDebt": "long_term_debt",
"currentAccountsPayable": "accounts_payable",
"otherCurrentAssets": "prepayment",
"otherNonCurrentAssets": "other_assets",
"deferredRevenue": "adv_receipts"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame: def get_cash_flow(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("CASH_FLOW", symbol) if self.data_source == 'iFinD':
cols_map = { code = self._get_ifind_code(symbol)
"fiscalDateEnding": "date", return self.client.get_cash_flow(symbol, code)
"operatingCashflow": "ocf", else:
"capitalExpenditures": "capex", return self.client.get_cash_flow(symbol)
"dividendPayout": "dividends",
"depreciationDepletionAndAmortization": "depreciation"
}
df = df.rename(columns=cols_map)
numeric_cols = ["ocf", "capex", "dividends", "depreciation"] # Optional methods not originally in UsFetcher but available in iFinD client
for col in numeric_cols: # If using Alpha Vantage, these might not be supported or need adding to AV client.
if col in df.columns: # We will only expose if iFinD or if AV client supports it (AV client currently doesn't implement these)
df[col] = pd.to_numeric(df[col], errors='coerce') # So we can create empty/dummy implementations for AV or check data_source.
return df
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_historical_metrics(symbol, code, dates)
return pd.DataFrame()
def get_dividends(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_dividends(symbol, code)
return pd.DataFrame()
def get_repurchases(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_repurchases(symbol, code)
return pd.DataFrame()
def get_employee_count(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_employee_count(symbol, code)
# AV Metrics has employee count in market metrics, but not historical series yet.
return pd.DataFrame()
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)
return pd.DataFrame()

View File

@ -1,474 +1,56 @@
import pandas as pd import pandas as pd
import os
import time
from .base import DataFetcher from .base import DataFetcher
from .ifind_client import IFindClient from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage from storage.file_io import DataStorage
class VnFetcher(DataFetcher): class VnFetcher(DataFetcher):
def __init__(self, api_key: str): def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token # api_key is the iFinD Refresh Token
super().__init__(api_key) super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key) self.data_source = 'iFinD'
self.client = IFindIntClient(api_key, 'VN')
self.storage = DataStorage() self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str: def _get_ifind_code(self, symbol: str) -> str:
# Vietnam stocks usually have 3 letter codes. # VN stocks in iFinD usually just symbol (e.g. VNM)
# We assume the user provides the correct code (e.g. VNM, or VNM.VN).
# We can add simple logic: if it's 3 letters, maybe append nothing?
# iFinD codes often need suffix. But without documentation, safest is to pass through.
return symbol return symbol
def _fetch_basic_info(self, symbol: str) -> dict: def _fetch_basic_info(self, symbol: str):
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if code in self._basic_info_cache: return self.client._fetch_basic_info(symbol, code)
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # Default 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
if acc_date:
info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, 'VN', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
# print("iFinD API Warning: No tables found in response.")
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# VN usually ends in 1231
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
df['end_date'] = target_date
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str) -> pd.DataFrame: def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "revenue_oas"}, return self.client.get_income_statement(symbol, code)
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame: def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "cash_equi_short_term_inve_oas"}, return self.client.get_balance_sheet(symbol, code)
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str) -> pd.DataFrame: def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [ code = self._get_ifind_code(symbol)
{"indicator": "net_cash_flows_from_oa_oas"}, return self.client.get_cash_flow(symbol, code)
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str) -> dict: def get_market_metrics(self, symbol: str) -> dict:
basic_info = self._fetch_basic_info(symbol) code = self._get_ifind_code(symbol)
metrics = { return self.client.get_market_metrics(symbol, code)
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame() return self.client.get_historical_metrics(symbol, code, dates)
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str) -> pd.DataFrame: def get_dividends(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_dividends(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str) -> pd.DataFrame: def get_repurchases(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_repurchases(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str) -> pd.DataFrame: def get_employee_count(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol) code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol) return self.client.get_employee_count(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
current_year = int(time.strftime("%Y")) def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
results = [] code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp

View File

@ -15,7 +15,7 @@ class BaseReporter(ABC):
""" """
pass pass
def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}) -> str: def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}, data_source: str = None) -> str:
if df.empty: if df.empty:
return f"No breakdown data available for {market} {symbol}" return f"No breakdown data available for {market} {symbol}"
@ -23,7 +23,7 @@ class BaseReporter(ABC):
headers = self._get_headers(df) headers = self._get_headers(df)
md = [] md = []
md.append(self._generate_md_company_info(symbol, metrics, market)) md.append(self._generate_md_company_info(symbol, metrics, market, data_source))
md.append("\n") md.append("\n")
for group_name, items in self.indicators.items(): for group_name, items in self.indicators.items():
@ -69,7 +69,7 @@ class BaseReporter(ABC):
disp_val = f"{val}" disp_val = f"{val}"
return disp_val return disp_val
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
return "" # Implemented in subclasses return "" # Implemented in subclasses
def _preprocess_data(self, df, market): def _preprocess_data(self, df, market):

View File

@ -108,7 +108,7 @@ class CN_ReportGenerator(BaseReporter):
def _get_headers(self, df): def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']] return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '') name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -121,15 +121,19 @@ class CN_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0 div = metrics.get('dividend_yield', 0) or 0
md = [] md = []
md.append(f"# {name} ({symbol}) - Financial Report") md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n") md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |") md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|") md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |") md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md) return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir): def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
# 1. Generate Markdown content # 1. Generate Markdown content
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
# 2. Save Markdown file # 2. Save Markdown file
md_path = os.path.join(output_dir, "report.md") md_path = os.path.join(output_dir, "report.md")
@ -144,7 +148,7 @@ class CN_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html) headers = self._get_headers(df_for_html)
else: else:
headers = [] headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html) html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content) final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html") html_path = os.path.join(output_dir, "report.html")
@ -152,7 +156,7 @@ class CN_ReportGenerator(BaseReporter):
f.write(final_html) f.write(final_html)
print(f"HTML report saved to {html_path}") print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df): def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -229,6 +233,7 @@ class CN_ReportGenerator(BaseReporter):
html_sections = [ html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>", f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>", f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table, company_table,
'<div class="table-gap"></div>', '<div class="table-gap"></div>',
metrics_table metrics_table

View File

@ -97,7 +97,7 @@ class HK_ReportGenerator(BaseReporter):
return [self._format_period_label(date_value) for date_value in df['date_str']] return [self._format_period_label(date_value) for date_value in df['date_str']]
return [] return []
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '') name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -113,15 +113,19 @@ class HK_ReportGenerator(BaseReporter):
md = [] md = []
md.append(f"# {name} ({symbol}) - Financial Report") md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n") md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | 年结日 | 市值(亿) | PE | PB | 股息率(%) |") md.append("| 代码 | 简称 | 上市日期 | 年结日 | 市值(亿) | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|:---|:---|") md.append("|:---|:---|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {acc_date} | {mcap:.2f} | {pe:.2f} | {pb:.2f} | {div:.2f}% |") md.append(f"| {symbol} | {name} | {list_date} | {acc_date} | {mcap:.2f} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md) return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir): def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
md_path = os.path.join(output_dir, "report.md") md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f: with open(md_path, "w", encoding='utf-8') as f:
f.write(md_content) f.write(md_content)
@ -132,14 +136,14 @@ class HK_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html) headers = self._get_headers(df_for_html)
else: else:
headers = [] headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html) html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content) final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html") html_path = os.path.join(output_dir, "report.html")
with open(html_path, "w", encoding='utf-8') as f: with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html) f.write(final_html)
def _build_html_content(self, symbol, metrics, headers, df): def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -222,6 +226,7 @@ class HK_ReportGenerator(BaseReporter):
html_sections = [ html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>", f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>", f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table, company_table,
'<div class="table-gap"></div>', '<div class="table-gap"></div>',
metrics_table metrics_table

View File

@ -109,7 +109,7 @@ class JP_ReportGenerator(BaseReporter):
def _get_headers(self, df): def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']] return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '') name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -122,14 +122,18 @@ class JP_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0 div = metrics.get('dividend_yield', 0) or 0
md = [] md = []
md.append(f"# {name} ({symbol}) - Financial Report") md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n") md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |") md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|") md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |") md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md) return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir): def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
md_path = os.path.join(output_dir, "report.md") md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f: with open(md_path, "w", encoding='utf-8') as f:
@ -141,7 +145,7 @@ class JP_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html) headers = self._get_headers(df_for_html)
else: else:
headers = [] headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html) html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
# Re-use the exact same styled HTML from CN_ReportGenerator # Re-use the exact same styled HTML from CN_ReportGenerator
final_html = self.to_html(symbol, html_content) final_html = self.to_html(symbol, html_content)
@ -149,7 +153,7 @@ class JP_ReportGenerator(BaseReporter):
with open(html_path, "w", encoding='utf-8') as f: with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html) f.write(final_html)
def _build_html_content(self, symbol, metrics, headers, df): def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
# Implementation identical to CN_ReportGenerator for style consistency # Implementation identical to CN_ReportGenerator for style consistency
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol name = metrics.get('name') or symbol
@ -227,6 +231,7 @@ class JP_ReportGenerator(BaseReporter):
html_sections = [ html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>", f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>", f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table, company_table,
'<div class="table-gap"></div>', '<div class="table-gap"></div>',
metrics_table metrics_table

View File

@ -58,7 +58,7 @@ class US_ReportGenerator(BaseReporter):
] ]
} }
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '') name = metrics.get('name', '')
fiscal_year_end = metrics.get('fiscal_year_end', '') fiscal_year_end = metrics.get('fiscal_year_end', '')
@ -68,14 +68,22 @@ class US_ReportGenerator(BaseReporter):
md = [] md = []
md.append(f"# {name} ({symbol}) - Financial Report") md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n") md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 财报日期 | PE | PB | 股息率(%) |") md.append("| 代码 | 简称 | 财报日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|") md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {fiscal_year_end} | {pe:.2f} | {pb:.2f} | {div_yield:.2f}% |") md.append(f"| {symbol} | {name} | {fiscal_year_end} | {pe:.2f} | {pb:.2f} | {div_yield:.2f}% |")
return "\n".join(md) return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir): def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) # Override to pass data_source to _generate_md_company_info
# Note: BaseReporter._generate_markdown_content calls _generate_md_company_info
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
md_path = os.path.join(output_dir, "report.md") md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f: with open(md_path, "w", encoding='utf-8') as f:
@ -88,7 +96,7 @@ class US_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html) headers = self._get_headers(df_for_html)
else: else:
headers = [] headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html) html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content) final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html") html_path = os.path.join(output_dir, "report.html")
@ -96,7 +104,7 @@ class US_ReportGenerator(BaseReporter):
f.write(final_html) f.write(final_html)
print(f"HTML report saved to {html_path}") print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df): def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol name = metrics.get('name') or symbol
fiscal_year_end = metrics.get('fiscal_year_end') or "-" fiscal_year_end = metrics.get('fiscal_year_end') or "-"
@ -169,6 +177,7 @@ class US_ReportGenerator(BaseReporter):
html_sections = [ html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>", f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>", f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table, company_table,
'<div class="table-gap"></div>', '<div class="table-gap"></div>',
metrics_table metrics_table

View File

@ -99,7 +99,7 @@ class VN_ReportGenerator(BaseReporter):
def _get_headers(self, df): def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']] return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market): def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '') name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -112,14 +112,18 @@ class VN_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0 div = metrics.get('dividend_yield', 0) or 0
md = [] md = []
md.append(f"# {name} ({symbol}) - Financial Report") md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n") md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |") md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|") md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |") md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md) return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir): def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
md_path = os.path.join(output_dir, "report.md") md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f: with open(md_path, "w", encoding='utf-8') as f:
@ -131,14 +135,15 @@ class VN_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html) headers = self._get_headers(df_for_html)
else: else:
headers = [] headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html) html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content) final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html") html_path = os.path.join(output_dir, "report.html")
with open(html_path, "w", encoding='utf-8') as f: with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html) f.write(final_html)
print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df): def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d") today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '') raw_list_date = metrics.get('list_date', '')
@ -215,6 +220,7 @@ class VN_ReportGenerator(BaseReporter):
html_sections = [ html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>", f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>", f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table, company_table,
'<div class="table-gap"></div>', '<div class="table-gap"></div>',
metrics_table metrics_table

View File

@ -52,7 +52,9 @@ class CN_Strategy(BaseStrategy):
symbol=self.stock_code, symbol=self.stock_code,
market='CN', market='CN',
metrics=self.raw_data['metrics'], metrics=self.raw_data['metrics'],
output_dir=output_dir output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'Tushare')
) )
else: else:
print("No analysis result to generate report.") print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -76,7 +76,9 @@ class HK_Strategy(BaseStrategy):
symbol=self.stock_code, symbol=self.stock_code,
market='HK', market='HK',
metrics=self.raw_data['metrics'], metrics=self.raw_data['metrics'],
output_dir=output_dir output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
) )
else: else:
print("No analysis result to generate report.") print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -71,9 +71,11 @@ class JP_Strategy(BaseStrategy):
symbol=self.stock_code, symbol=self.stock_code,
market='JP', market='JP',
metrics=self.raw_data['metrics'], metrics=self.raw_data['metrics'],
output_dir=output_dir output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
) )
else: else:
print("No analysis result to generate report.") print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
import pandas as pd # Import needed for the placeholder DataFrames import pandas as pd # Import needed for the placeholder DataFrames

View File

@ -6,10 +6,12 @@ from storage.file_io import DataStorage
import os import os
class US_Strategy(BaseStrategy): class US_Strategy(BaseStrategy):
def __init__(self, stock_code, av_key): def __init__(self, stock_code, av_key, data_source=None):
super().__init__(stock_code) super().__init__(stock_code)
self.av_key = av_key self.av_key = av_key
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key) # If using iFinD for US, we might need IFIND token, but factory handles retrieval from env if needed
# We pass av_key as it's the required arg for get_fetcher's av_key param (or we can make it optional in factory call if unused)
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key, data_source=data_source)
self.analyzer = US_Analyzer() self.analyzer = US_Analyzer()
self.reporter = US_ReportGenerator() self.reporter = US_ReportGenerator()
self.storage = DataStorage() self.storage = DataStorage()
@ -42,7 +44,9 @@ class US_Strategy(BaseStrategy):
symbol=self.stock_code, symbol=self.stock_code,
market='US', market='US',
metrics=self.raw_data['metrics'], metrics=self.raw_data['metrics'],
output_dir=output_dir output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', None)
) )
else: else:
print("No analysis result to generate report.") print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -70,7 +70,9 @@ class VN_Strategy(BaseStrategy):
symbol=self.stock_code, symbol=self.stock_code,
market='VN', market='VN',
metrics=self.raw_data['metrics'], metrics=self.raw_data['metrics'],
output_dir=output_dir output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
) )
else: else:
print("No analysis result to generate report.") print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")