更改数据源配置

This commit is contained in:
xucheng 2026-01-08 21:01:55 +08:00
parent 548ee242ba
commit b9c8f90cbc
29 changed files with 2198 additions and 2132 deletions

View File

@ -42,9 +42,12 @@ async def search_stock(request: StockSearchRequest, db: AsyncSession = Depends(g
@router.post("/analyze", response_model=ReportResponse)
async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
# Get AI model setting
model_setting = await db.get(Setting, "AI_MODEL")
model = model_setting.value if model_setting else "gemini-2.0-flash"
# Get AI model
if request.model:
model = request.model
else:
model_setting = await db.get(Setting, "AI_MODEL")
model = model_setting.value if model_setting else "gemini-2.0-flash"
new_report = Report(
market=request.market,
@ -71,7 +74,8 @@ async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundT
new_report.id,
request.market,
request.symbol,
api_key
api_key,
request.data_source
)
# Re-fetch with selectinload to avoid lazy loading issues
@ -122,12 +126,18 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
except Exception as e:
financial_html = f"<p>加载财务图表时出错: {str(e)}</p>"
# If content is not ready, add auto-refresh meta tag
meta_refresh = ""
if "财务图表尚未生成" in financial_html:
meta_refresh = '<meta http-equiv="refresh" content="2">'
# Only return financial charts, no analysis sections
final_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
{meta_refresh}
<title>{report.company_name} - 财务数据</title>
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; padding: 20px; line-height: 1.6; max-width: 1200px; margin: 0 auto; }}
@ -162,7 +172,7 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
</body>
</html>
"""
return final_html
return HTMLResponse(content=final_html, headers={"Cache-Control": "no-store, no-cache, must-revalidate", "Pragma": "no-cache", "Expires": "0"})
@router.get("/config")
async def get_config(db: AsyncSession = Depends(get_db)):

View File

@ -21,6 +21,8 @@ class AnalysisRequest(BaseModel):
market: str
symbol: str
company_name: str
model: Optional[str] = None
data_source: Optional[str] = None
class ReportSectionSchema(BaseModel):
section_name: str

View File

@ -113,12 +113,12 @@ async def search_stock(query: str, api_key: str, model: str = "gemini-2.0-flash"
print(f"Search error: {e}")
return {"error": f"搜索失败: {str(e)}"}
async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str):
async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str, data_source: str = None):
"""
Background task to run the full analysis pipeline.
Creates its own DB session.
"""
print(f"Starting analysis for report {report_id}: {market} {symbol}")
print(f"Starting analysis for report {report_id}: {market} {symbol} (Source: {data_source})")
# Create new session
from app.database import AsyncSessionLocal
@ -138,6 +138,8 @@ async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: s
# 2. Run Main Data Fetching Script (run_fetcher.py)
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
cmd = [sys.executable, "run_fetcher.py", market, symbol]
if data_source:
cmd.extend(["--data-source", data_source])
print(f"Executing data fetch command: {cmd} in {root_dir}")
process = await asyncio.create_subprocess_exec(

View File

@ -2,7 +2,7 @@
import { useEffect, useState, use, useRef } from "react"
import { getReport } from "@/lib/api"
import { Badge } from "@/components/ui/badge"
import { Card, CardContent } from "@/components/ui/card"
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"
import { MarkdownRenderer } from "@/components/markdown-renderer"
@ -101,19 +101,24 @@ export default function AnalysisPage({ params }: { params: Promise<{ id: string
</p>
</div>
<div className="flex items-center gap-4">
<Badge variant={
report.status === "completed" ? "default" :
report.status === "in_progress" ? "secondary" :
report.status === "failed" ? "destructive" : "outline"
}>
<Button
variant={
report.status === "completed" ? "default" :
report.status === "in_progress" ? "secondary" :
report.status === "failed" ? "destructive" : "outline"
}
size="sm"
className="pointer-events-none w-32"
>
{report.status === "in_progress" ? (
<div className="flex items-center gap-2">
<Loader2 className="h-3 w-3 animate-spin" />
</div>
<>
<Loader2 className="h-4 w-4 mr-2 animate-spin" />
</>
) : report.status === "completed" ? "已完成" : report.status === "failed" ? "失败" : report.status === "pending" ? "待处理" : report.status}
</Badge>
</Button>
{report.status === "completed" && (
<Button onClick={handleDownloadPDF} variant="outline" size="sm">
<Button onClick={handleDownloadPDF} variant="outline" size="sm" className="w-32">
<Download className="h-4 w-4 mr-2" />
PDF
</Button>

View File

@ -1,18 +1,54 @@
"use client"
import { useState } from "react"
import { searchStock, startAnalysis } from "@/lib/api"
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
import { useState, useEffect } from "react"
import { searchStock, startAnalysis, getConfig } from "@/lib/api"
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/components/ui/card"
import { Input } from "@/components/ui/input"
import { Button } from "@/components/ui/button"
import { Search, Loader2 } from "lucide-react"
import { Search, Loader2, Database, Bot } from "lucide-react"
import { useRouter } from "next/navigation"
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select"
import { Label } from "@/components/ui/label"
import { Badge } from "@/components/ui/badge"
export function SearchStock() {
const [query, setQuery] = useState("")
const [results, setResults] = useState<{ market: string; symbol: string; company_name: string }[]>([])
const [loading, setLoading] = useState(false)
const [error, setError] = useState("")
const [activeIndex, setActiveIndex] = useState<number | null>(null)
// Global Configuration State
const [selectedModel, setSelectedModel] = useState("gemini-2.0-flash")
const [dataSourcePrefs, setDataSourcePrefs] = useState<Record<string, string>>({
'CN': 'Tushare',
'HK': 'iFinD',
'US': 'Alpha Vantage',
'JP': 'iFinD',
'VN': 'iFinD'
})
// Fetch initial config
useEffect(() => {
const fetchConfig = async () => {
try {
const config = await getConfig()
if (config.AI_MODEL) {
setSelectedModel(config.AI_MODEL)
}
} catch (e) {
console.error("Failed to load config:", e)
}
}
fetchConfig()
}, [])
const router = useRouter()
const handleSearch = async () => {
@ -20,10 +56,12 @@ export function SearchStock() {
setLoading(true)
setError("")
setResults([])
setActiveIndex(null)
try {
const data = await searchStock(query)
setResults(data)
// Auto-select the first result if exists? Or keep null? User asked for "click to select". Keeping null is safer.
} catch (err: any) {
setError(err.message || "搜索失败")
} finally {
@ -34,7 +72,9 @@ export function SearchStock() {
const handleAnalyze = async (result: { market: string; symbol: string; company_name: string }) => {
setLoading(true)
try {
const report = await startAnalysis(result.market, result.symbol, result.company_name)
// Use global model selection
const dataSource = dataSourcePrefs[result.market]
const report = await startAnalysis(result.market, result.symbol, result.company_name, selectedModel, dataSource)
router.push(`/analysis/${report.id}`)
} catch (err: any) {
setError(err.message || "启动分析失败")
@ -43,47 +83,171 @@ export function SearchStock() {
}
}
// Dynamic Data Source Options (Mocking availability)
const dataSourceOptions: Record<string, string[]> = {
'CN': ['Tushare'],
'HK': ['iFinD'],
'US': ['Alpha Vantage', 'iFinD'],
'JP': ['iFinD'],
'VN': ['iFinD']
}
return (
<Card className="w-full max-w-2xl">
<CardHeader>
<CardTitle></CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="flex gap-2">
<Input
placeholder="输入公司名称例如腾讯或代码例如700"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleSearch()}
/>
<Button onClick={handleSearch} disabled={loading}>
{loading ? <Loader2 className="animate-spin" /> : <Search />}
</Button>
</div>
{error && <div className="text-red-500 text-sm">{error}</div>}
{results.length > 0 && (
<div className="space-y-2">
{results.length > 1 && (
<div className="text-sm text-muted-foreground"> {results.length} </div>
)}
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
{results.map((result, index) => (
<div key={index} className="bg-muted p-3 rounded-md space-y-2 border hover:border-primary transition-colors">
<div className="font-medium">{result.company_name}</div>
<div className="text-xs text-muted-foreground">
{result.market} | {result.symbol}
</div>
<Button onClick={() => handleAnalyze(result)} disabled={loading} className="w-full" size="sm">
{loading ? "正在启动分析..." : "运行分析"}
</Button>
</div>
))}
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6 w-full max-w-6xl">
{/* Left Column: Search & Results */}
<div className="lg:col-span-2 space-y-6">
<Card className="h-full flex flex-col">
<CardHeader>
<CardTitle></CardTitle>
<CardDescription></CardDescription>
</CardHeader>
<CardContent className="space-y-4 flex-grow">
<div className="flex gap-2">
<Input
placeholder="输入公司名称例如腾讯或代码例如700"
value={query}
onChange={(e) => setQuery(e.target.value)}
onKeyDown={(e) => e.key === "Enter" && handleSearch()}
/>
<Button onClick={handleSearch} disabled={loading} size="default" className="px-4">
{loading ? <Loader2 className="animate-spin" /> : <Search className="h-4 w-4" />}
</Button>
</div>
</div>
)}
</CardContent>
</Card>
{error && <div className="text-red-500 text-sm">{error}</div>}
{results.length > 0 && (
<div className="mt-6 space-y-3">
<div className="text-sm font-medium text-muted-foreground flex items-center justify-between">
<span> {results.length} </span>
<span className="text-xs"></span>
</div>
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
{results.map((result, index) => (
<div
key={index}
className={`group relative flex flex-col justify-between p-4 rounded-lg border transition-all cursor-pointer shadow-sm hover:shadow active:scale-95 duration-200 ${activeIndex === index ? 'border-primary bg-primary/5 ring-1 ring-primary' : 'bg-card hover:bg-accent hover:border-primary/50'}`}
onClick={() => setActiveIndex(index)}
>
<div className="space-y-2 mb-4">
<div className="font-semibold text-base line-clamp-2 leading-tight" title={result.company_name}>
{result.company_name}
</div>
<div className="flex items-center gap-2 text-xs text-muted-foreground">
<Badge variant="secondary" className="px-1.5 py-0 text-[10px] h-5">{result.market}</Badge>
<span className="font-mono">{result.symbol}</span>
</div>
</div>
<Button
onClick={(e) => {
e.stopPropagation();
handleAnalyze(result);
}}
disabled={loading}
size="sm"
variant={activeIndex === index ? "default" : "secondary"}
className="w-full mt-auto"
>
{loading ? <Loader2 className="animate-spin h-3 w-3 mr-2" /> : null}
</Button>
</div>
))}
</div>
</div>
)}
</CardContent>
</Card>
</div>
{/* Right Column: Configuration */}
<div className="lg:col-span-1 space-y-6">
<Card className="h-full border-dashed shadow-sm bg-muted/30 flex flex-col">
<CardHeader>
<CardTitle className="flex items-center gap-2 text-base">
<Bot className="h-5 w-5 text-primary" />
</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Section 1: AI Model */}
<div className="space-y-3">
<Label className="text-sm font-medium"> AI </Label>
<Select
value={selectedModel}
onValueChange={setSelectedModel}
>
<SelectTrigger className="w-full bg-background">
<SelectValue placeholder="Select model" />
</SelectTrigger>
<SelectContent>
<SelectItem value="gemini-2.0-flash">Gemini 2.0 Flash</SelectItem>
<SelectItem value="gemini-2.5-flash">Gemini 2.5 Flash</SelectItem>
<SelectItem value="gemini-3-flash-preview">Gemini 3 Flash Preview</SelectItem>
<SelectItem value="gemini-3-pro-preview">Gemini 3 Pro Preview</SelectItem>
{/* If the current selected model is custom and not in the list above, show it */}
{selectedModel &&
!["gemini-2.0-flash", "gemini-2.5-flash", "gemini-3-flash-preview", "gemini-3-pro-preview"].includes(selectedModel) && (
<SelectItem value={selectedModel}>{selectedModel} (Custom)</SelectItem>
)}
</SelectContent>
</Select>
<p className="text-xs text-muted-foreground">
</p>
</div>
{/* Section 2: Data Sources - Only show when results are available and for relevant markets */}
{results.length > 0 && (
<>
<div className="h-[1px] bg-border w-full my-4" />
<div className="space-y-3">
<div className="flex items-center gap-2">
<Database className="h-4 w-4 text-primary" />
<Label className="text-sm font-medium"></Label>
</div>
<div className="space-y-4">
{(activeIndex !== null && results[activeIndex] ? [results[activeIndex].market] : Array.from(new Set(results.map(r => r.market)))).map((market) => (
<div key={market} className="space-y-2 animate-in fade-in slide-in-from-right-4 duration-300">
<div className="flex items-center gap-2">
<Badge variant="outline" className="h-5 px-1.5 text-[10px] uppercase">{market}</Badge>
<span className="text-xs text-muted-foreground">:</span>
</div>
<div className="grid grid-cols-2 gap-2">
{(dataSourceOptions[market] || ['Default']).map((opt) => {
const isSelected = dataSourcePrefs[market] === opt;
return (
<div
key={opt}
onClick={() => setDataSourcePrefs(prev => ({ ...prev, [market]: opt }))}
className={`
cursor-pointer relative flex flex-col items-center justify-center p-2 rounded-md border text-xs font-medium transition-all
${isSelected
? 'border-primary bg-primary/10 text-primary ring-1 ring-primary/20'
: 'bg-background hover:bg-accent hover:border-primary/30 text-muted-foreground'}
`}
>
{opt}
{isSelected && <div className="absolute top-1 right-1 w-1.5 h-1.5 rounded-full bg-primary" />}
</div>
)
})}
</div>
</div>
))}
</div>
<p className="text-xs text-muted-foreground mt-2">
</p>
</div>
</>
)}
</CardContent>
</Card>
</div>
</div>
)
}

View File

@ -10,11 +10,11 @@ export async function searchStock(query: string) {
return res.json() as Promise<{ market: string; symbol: string; company_name: string }[]>;
}
export async function startAnalysis(market: string, symbol: string, company_name: string) {
export async function startAnalysis(market: string, symbol: string, company_name: string, model?: string, data_source?: string) {
const res = await fetch(`${API_BASE}/analyze`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ market, symbol, company_name }),
body: JSON.stringify({ market, symbol, company_name, model, data_source }),
});
if (!res.ok) {
const error = await res.json();

View File

@ -9,14 +9,14 @@ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
# from strategies.hk_strategy import HK_Strategy
# from strategies.jp_strategy import JP_Strategy
def get_strategy(market, stock_code, tushare_token=None, av_key=None):
def get_strategy(market, stock_code, tushare_token=None, av_key=None, data_source=None):
market = market.upper()
if market == 'CN':
from strategies.cn_strategy import CN_Strategy
return CN_Strategy(stock_code, tushare_token)
elif market == 'US':
from strategies.us_strategy import US_Strategy
return US_Strategy(stock_code, av_key)
return US_Strategy(stock_code, av_key, data_source)
elif market == 'HK':
from strategies.hk_strategy import HK_Strategy
ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
@ -37,10 +37,19 @@ def main():
tushare_token = os.getenv('TUSHARE_TOKEN')
av_key = os.getenv('ALPHA_VANTAGE_KEY')
if len(sys.argv) > 2:
market = sys.argv[1]
symbol = sys.argv[2]
strategy = get_strategy(market, symbol, tushare_token, av_key)
import argparse
parser = argparse.ArgumentParser(description='Run Stock Analysis Data Fetcher')
parser.add_argument('market', help='Market (CN, US, HK, JP, VN)')
parser.add_argument('symbol', help='Stock Symbol')
parser.add_argument('--data-source', help='Data Source Preference', default=None)
if len(sys.argv) > 1:
args = parser.parse_args()
market = args.market
symbol = args.symbol
data_source = args.data_source
strategy = get_strategy(market, symbol, tushare_token, av_key, data_source)
strategy.execute()
else:
print("Usage: python run_fetcher.py <MARKET> <SYMBOL>")

BIN
server.log Normal file

Binary file not shown.

View File

@ -0,0 +1,172 @@
import requests
import pandas as pd
import time
from storage.file_io import DataStorage
class AlphaVantageUsClient:
BASE_URL = "https://www.alphavantage.co/query"
def __init__(self, api_key: str):
self.api_key = api_key
self.storage = DataStorage()
def _save_raw_data(self, data, symbol: str, name: str):
if data is None:
return
df = pd.DataFrame()
if isinstance(data, list):
df = pd.DataFrame(data)
elif isinstance(data, dict):
# For single-record JSON objects, convert to a DataFrame
df = pd.DataFrame([data])
if not df.empty:
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
params = {
"function": function,
"symbol": symbol,
"apikey": self.api_key
}
try:
time.sleep(15)
response = requests.get(self.BASE_URL, params=params)
data = response.json()
except Exception as e:
print(f"Error requesting {function}: {e}")
return pd.DataFrame()
if data:
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
df_annual = pd.DataFrame()
if "annualReports" in data and data["annualReports"]:
df_annual = pd.DataFrame(data["annualReports"])
if "fiscalDateEnding" in df_annual.columns:
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
df_annual = df_annual.head(5)
else:
print(f"Error fetching {function} for {symbol}: {data}")
return pd.DataFrame()
return df_annual
def get_market_metrics(self, symbol: str) -> dict:
# 1. Get Overview for PE, PB, MarketCap, Employees
overview_data = {}
try:
time.sleep(15)
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
r = requests.get(self.BASE_URL, params=params)
overview_data = r.json()
# Clean up 'None' strings from API response before processing
if isinstance(overview_data, dict):
for key, value in overview_data.items():
if value == 'None':
overview_data[key] = None
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
except Exception as e:
print(f"Error fetching OVERVIEW for {symbol}: {e}")
market_cap = float(overview_data.get("MarketCapitalization") or 0)
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
price = 0
if shares_outstanding > 0:
price = market_cap / shares_outstanding
return {
"price": price,
"name": overview_data.get("Name"),
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
"dividend_yield": float(overview_data.get("DividendYield") or 0),
"market_cap": market_cap,
"pe": float(overview_data.get("PERatio") or 0),
"pb": float(overview_data.get("PriceToBookRatio") or 0),
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
"total_share_holders": 0 # Not typically provided in basic AV Overview
}
def get_income_statement(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("INCOME_STATEMENT", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalRevenue": "revenue",
"netIncome": "net_income",
"grossProfit": "gross_profit",
"costOfRevenue": "cogs",
"researchAndDevelopment": "rd_exp",
"sellingGeneralAndAdministrative": "sga_exp",
"interestExpense": "fin_exp",
"incomeBeforeTax": "total_profit",
"incomeTaxExpense": "income_tax",
"ebit": "ebit"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
"fin_exp", "total_profit", "income_tax", "ebit",
"depreciation", "depreciationAndAmortization"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("BALANCE_SHEET", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalShareholderEquity": "total_equity",
"totalLiabilities": "total_liabilities",
"totalCurrentAssets": "current_assets",
"totalCurrentLiabilities": "current_liabilities",
"cashAndCashEquivalentsAtCarryingValue": "cash",
"currentNetReceivables": "receivables",
"inventory": "inventory",
"propertyPlantEquipment": "fixed_assets",
"totalAssets": "total_assets",
"goodwill": "goodwill",
"longTermInvestments": "lt_invest",
"shortTermDebt": "short_term_debt",
"currentLongTermDebt": "short_term_debt_part",
"longTermDebt": "long_term_debt",
"currentAccountsPayable": "accounts_payable",
"otherCurrentAssets": "prepayment",
"otherNonCurrentAssets": "other_assets",
"deferredRevenue": "adv_receipts"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("CASH_FLOW", symbol)
cols_map = {
"fiscalDateEnding": "date",
"operatingCashflow": "ocf",
"capitalExpenditures": "capex",
"dividendPayout": "dividends",
"depreciationDepletionAndAmortization": "depreciation"
}
df = df.rename(columns=cols_map)
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df

View File

@ -1,243 +1,39 @@
import tushare as ts
import pandas as pd
from .base import DataFetcher
import time
from storage.file_io import DataStorage
class CnFetcher(DataFetcher):
def __init__(self, api_key: str):
def __init__(self, api_key: str, data_source: str = 'Tushare'):
super().__init__(api_key)
ts.set_token(self.api_key)
self.pro = ts.pro_api()
self.storage = DataStorage()
self.data_source = data_source
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str):
if df is None or df.empty:
return
market = 'CN'
self.storage.save_data(df, market, symbol, f"raw_{name}")
def _get_ts_code(self, symbol: str) -> str:
return symbol
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
if self.data_source == 'Tushare':
from .tushare_cn_client import TushareCnClient
self.client = TushareCnClient(api_key)
else:
# Default to Tushare if unknown, or raise error.
# For robustness, we can default to Tushare or handle Akshare later.
if self.data_source == 'Akshare':
raise NotImplementedError("Akshare client not yet implemented")
from .tushare_cn_client import TushareCnClient
self.client = TushareCnClient(api_key)
def get_income_statement(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.income(ts_code=ts_code)
self._save_raw_data(df, ts_code, "income_statement")
rename_map = {
'end_date': 'date',
'revenue': 'revenue',
'n_income_attr_p': 'net_income'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
return self.client.get_income_statement(symbol)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.balancesheet(ts_code=ts_code)
self._save_raw_data(df, ts_code, "balance_sheet")
rename_map = {
'end_date': 'date',
'total_hldr_eqy_exc_min_int': 'total_equity',
'total_liab': 'total_liabilities',
'total_cur_assets': 'current_assets',
'total_cur_liab': 'current_liabilities'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
return self.client.get_balance_sheet(symbol)
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.cashflow(ts_code=ts_code)
self._save_raw_data(df, ts_code, "cash_flow")
df = self._filter_data(df)
df = df.rename(columns={
'end_date': 'date',
'n_cashflow_act': 'net_cash_flow',
'depr_fa_coga_dpba': 'depreciation'
})
return df
return self.client.get_cash_flow(symbol)
def get_market_metrics(self, symbol: str) -> dict:
ts_code = self._get_ts_code(symbol)
metrics = {
"price": 0.0,
"market_cap": 0.0,
"pe": 0.0,
"pb": 0.0,
"total_share_holders": 0,
"employee_count": 0
}
try:
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
if not df_daily.empty:
row = df_daily.iloc[0]
metrics["price"] = row.get('close', 0.0)
metrics["pe"] = row.get('pe', 0.0)
metrics["pb"] = row.get('pb', 0.0)
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
if not df_basic.empty:
metrics['name'] = df_basic.iloc[0]['name']
metrics['list_date'] = df_basic.iloc[0]['list_date']
df_comp = self.pro.stock_company(ts_code=ts_code)
if not df_comp.empty:
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
if not df_holder.empty:
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
except Exception as e:
print(f"Error fetching market metrics for {symbol}: {e}")
return metrics
return self.client.get_market_metrics(symbol)
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
results = []
if not dates:
return pd.DataFrame()
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
try:
import datetime
min_date = min(unique_dates)
max_date = max(unique_dates)
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
if not df_daily.empty:
df_daily = df_daily.sort_values('trade_date', ascending=False)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
if not df_holder.empty:
df_holder = df_holder.sort_values('end_date', ascending=False)
for date_str in unique_dates:
metrics = {'date_str': date_str}
if not df_daily.empty:
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
if not closest_daily.empty:
row = closest_daily.iloc[0]
metrics['Price'] = row.get('close')
metrics['PE'] = row.get('pe')
metrics['PB'] = row.get('pb')
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
if not df_holder.empty:
closest_holder = df_holder[df_holder['end_date'] <= date_str]
if not closest_holder.empty:
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
results.append(metrics)
except Exception as e:
print(f"Error fetching historical metrics for {symbol}: {e}")
return pd.DataFrame(results)
return self.client.get_historical_metrics(symbol, dates)
def get_dividends(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
self._save_raw_data(df_div, ts_code, "dividends_raw")
if df_div.empty:
return pd.DataFrame()
# Filter for implemented cash dividends
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
if df_div.empty:
return pd.DataFrame()
df_div['total_cash_div'] = 0.0
# Get total shares for each ex_date
for index, row in df_div.iterrows():
ex_date = row['ex_date']
if not ex_date or pd.isna(ex_date):
continue
try:
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
if not df_daily.empty and not df_daily['total_share'].empty:
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
cash_div_per_share = row['cash_div'] # This is per-share
# Total dividend in Yuan
total_cash_dividend = (cash_div_per_share * total_share * 10000)
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
except Exception as e:
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
return dividends_by_year[['date_str', 'dividends']]
return self.client.get_dividends(symbol)
def get_repurchases(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.repurchase(ts_code=ts_code)
self._save_raw_data(df, ts_code, "repurchases")
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
return pd.DataFrame()
# Filter for repurchases with a valid amount
df = df[df['amount'] > 0]
if df.empty:
return pd.DataFrame()
# Extract year and group by it
df['year'] = pd.to_datetime(df['ann_date']).dt.year
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
# Create date_str for merging (YYYY1231)
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
# Rename for merging.
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
return repurchases_by_year[['date_str', 'repurchases']]
return self.client.get_repurchases(symbol)

View File

@ -1,6 +1,6 @@
class FetcherFactory:
@staticmethod
def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, **kwargs):
def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, data_source: str = None, **kwargs):
from .base import DataFetcher
market = market.upper()
if market == 'CN':
@ -18,10 +18,24 @@ class FetcherFactory:
from .hk_fetcher import HkFetcher
return HkFetcher(ifind_token)
elif market == 'US':
# Default to Alpha Vantage if not specified or explicit
if data_source == 'iFinD':
ifind_token = kwargs.get('ifind_refresh_token')
if not ifind_token:
import os
ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
if not ifind_token:
# Fallback or error? Let's error if specifically requested
raise ValueError("iFinD Refresh Token is required for US market when iFinD is selected")
from .us_fetcher import UsFetcher
# We need to update UsFetcher to accept data_source or handle it internally
# For now, let's assume UsFetcher handles switching if we pass data_source
return UsFetcher(ifind_token, data_source='iFinD')
if not av_key:
raise ValueError("Alpha Vantage key is required for US market")
from .us_fetcher import UsFetcher
return UsFetcher(av_key)
return UsFetcher(av_key, data_source='Alpha Vantage')
elif market == 'JP':
ifind_token = kwargs.get('ifind_refresh_token') or kwargs.get('jquants_refresh_token')
if not ifind_token:

View File

@ -1,746 +1,61 @@
import pandas as pd
import time
from .base import DataFetcher
from .ifind_client import IFindClient
from .ifind_hk_client import IFindHKClient
from storage.file_io import DataStorage
class HkFetcher(DataFetcher):
def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token
super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key)
self.data_source = 'iFinD'
self.client = IFindHKClient(api_key)
self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str:
"""保持逻辑一致性,如果是纯数字则补齐后缀 .HK否则直接传"""
# Strip .HK suffix if present to handle input like '00700.HK'
clean_symbol = symbol.replace('.HK', '').replace('.hk', '')
if clean_symbol.isdigit():
# Force 4 digits for HK (e.g., 700 -> 0700.HK, 00700 -> 0700.HK)
# e.g. 01651 -> 1651.HK
code_int = int(clean_symbol)
normalized_code = str(code_int).zfill(4)
return f"{normalized_code}.HK"
# HK stock codes are 4-5 digits, often 0 padded to 5 or 4 in other systems
# iFinD usually expects 4 digits like '0700.HK', '0005.HK'
# Input symbol might be '700', '0700', '5', '0005'
if symbol.isdigit():
padded = symbol.zfill(4)
return f"{padded}.HK"
return symbol
def _fetch_basic_info(self, symbol: str) -> dict:
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
def _fetch_basic_info(self, symbol: str):
code = self._get_ifind_code(symbol)
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # 默认 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
info["acc_date"] = str(row.get("accounting_date", "1231"))
info["accounting_date"] = "1231"
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, 'HK', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
# Default to 0 if not present (for lenient mocking) or check properly
error_code = res.get("errorcode", 0)
if error_code != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
if comparable_record.empty:
dfs_to_concat = [latest_record, df]
else:
dfs_to_concat = [latest_record, comparable_record, df]
# HK typically has 1231 or 0331 or 0630 etc. but annual is annual.
combined = pd.concat(dfs_to_concat) # Include all for now and dedup
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
"""通用获取历年财务数据 (HKD 为主,但 iFinD 支持转 CNY)"""
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
# 1. First, determine the most recent valid year by trying backwards from current year
last_valid_year = None
# Try up to 3 years back to find the latest available report
# e.g., in Jan 2026, try 2026 -> fail, 2025 -> success
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
# Use the first indicator to test availability
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# Check for non-null values
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
# Fallback to current year if nothing found (will likely return empty/zeros, but keeps logic flowing)
last_valid_year = current_year
# 2. Fetch 5 years starting from the last valid year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}1231"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
if not df[valid_cols].isnull().all().all():
df['end_date'] = target_date
df = df.dropna(axis=1, how='all')
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
return self.client._fetch_basic_info(symbol, code)
def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "total_oi"},
{"indicator": "prime_oi"},
{"indicator": "other_oi"},
{"indicator": "operating_cost"},
{"indicator": "operating_expense"},
{"indicator": "operating_fee"},
{"indicator": "p_depreciation_and_amortization"},
{"indicator": "gross_profit"},
{"indicator": "sales_ad_and_ga"},
{"indicator": "rad_cost"},
{"indicator": "sales_fee"},
{"indicator": "financial_expense"},
{"indicator": "sales_income"},
{"indicator": "sales_cost"},
{"indicator": "other_income"},
{"indicator": "manage_fee"},
{"indicator": "deprec_and_amorti"},
{"indicator": "total_other_opearting_expense"},
{"indicator": "p_total_cost"},
{"indicator": "operating_profit"},
{"indicator": "total_gal"},
{"indicator": "interest_income"},
{"indicator": "interest_net_pay"},
{"indicator": "interest_expense"},
{"indicator": "income_from_asso_and_joint"},
{"indicator": "other_gal_effct_profit_pre_tax"},
{"indicator": "conti_op_before_tax"},
{"indicator": "profit_before_noncurrent_items"},
{"indicator": "profit_and_loss_of_noncurrent_items"},
{"indicator": "profit_before_tax"},
{"indicator": "income_tax"},
{"indicator": "profit_after_tax"},
{"indicator": "minoritygal"},
{"indicator": "continue_operate_net_profit"},
{"indicator": "noncontinue_operate_net_profit"},
{"indicator": "other_special_items"},
{"indicator": "ni_attr_to_cs"},
{"indicator": "np_atms"},
{"indicator": "preferred_divid_and_other_adjust"},
{"indicator": "oci"},
{"indicator": "total_oci"},
{"indicator": "oci_from_parent"},
{"indicator": "oci_from_minority"},
{"indicator": "invest_property_fv_chg"},
{"indicator": "operating_amt"},
{"indicator": "oi_si"},
{"indicator": "operating_premium_profit_si"},
{"indicator": "to_toallied_corp_perf"},
{"indicator": "to_joint_control_entity_perf"},
{"indicator": "pre_tax_profit_si"},
{"indicator": "after_tax_profit_si"},
{"indicator": "profit_attrbt_to_nonholders"},
{"indicator": "total_income_atncs"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'total_oi': 'revenue',
'operating_amt': 'turnover', # Backup for revenue
'gross_profit': 'gross_profit',
'sales_ad_and_ga': 'sga_exp',
'sales_fee': 'selling_marketing_exp',
'manage_fee': 'ga_exp',
'rad_cost': 'rd_exp',
'income_tax': 'income_tax',
'ni_attr_to_cs': 'net_income',
'operating_profit': 'operating_profit',
'depreciation': 'depreciation',
'deprec_and_amorti': 'depreciation', # Backup
'p_depreciation_and_amortization': 'depreciation' # Another backup
}
df_filtered = df.rename(columns=rename_map)
# Calculate EBIT if not present but operating_profit is there
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
# Simple approximation: Operating Profit is often used as EBIT
df_filtered['ebit'] = df_filtered['operating_profit']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_income_statement(symbol, code)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "cce"},
{"indicator": "st_investment"},
{"indicator": "total_cash"},
{"indicator": "account_receivable"},
{"indicator": "tradable_fnncl_asset"},
{"indicator": "derivative_fnncl_assets"},
{"indicator": "restriv_fund"},
{"indicator": "other_short_term_investment"},
{"indicator": "ar_nr"},
{"indicator": "total_ar"},
{"indicator": "or"},
{"indicator": "inventory"},
{"indicator": "flow_assets_dit"},
{"indicator": "pre_payment"},
{"indicator": "other_cunrrent_assets_si"},
{"indicator": "other_ca"},
{"indicator": "total_ca"},
{"indicator": "receivables_from_allied_corp"},
{"indicator": "current_assets_si"},
{"indicator": "prepay_deposits_etc"},
{"indicator": "receivables_from_jce"},
{"indicator": "receivables_from_ac"},
{"indicator": "recoverable_tax"},
{"indicator": "total_fixed_assets"},
{"indicator": "depreciation"},
{"indicator": "equity_and_lt_invest"},
{"indicator": "net_fixed_assets"},
{"indicator": "invest_property"},
{"indicator": "equity_investment"},
{"indicator": "investment_in_associate"},
{"indicator": "investment_in_joints"},
{"indicator": "held_to_maturity_invest"},
{"indicator": "goodwill_and_intangible_asset"},
{"indicator": "intangible_assets"},
{"indicator": "accum_amortized"},
{"indicator": "noncurrent_assets_dit"},
{"indicator": "other_noncurrent_assets_si"},
{"indicator": "dt_assets"},
{"indicator": "total_noncurrent_assets"},
{"indicator": "total_assets"},
{"indicator": "ac_equity"},
{"indicator": "lease_prepay"},
{"indicator": "noncurrent_assets_si"},
{"indicator": "st_lt_current_loan"},
{"indicator": "trade_financial_lia"},
{"indicator": "derivative_financial_lia"},
{"indicator": "ap_np"},
{"indicator": "accounts_payable"},
{"indicator": "advance_payment"},
{"indicator": "st_debt"},
{"indicator": "contra_liab"},
{"indicator": "tax_payable"},
{"indicator": "accrued_liab"},
{"indicator": "flow_debt_deferred_income"},
{"indicator": "other_cl"},
{"indicator": "other_cunrrent_liab_si"},
{"indicator": "total_cl"},
{"indicator": "accrued_expenses_etc"},
{"indicator": "money_payable_toac"},
{"indicator": "joint_control_entity_payable"},
{"indicator": "payable_to_associated_corp"},
{"indicator": "lt_debt"},
{"indicator": "long_term_loan"},
{"indicator": "other_noncurrent_liabi"},
{"indicator": "deferred_tax_liability"},
{"indicator": "ncl_deferred_income"},
{"indicator": "other_noncurrent_liab_si"},
{"indicator": "noncurrent_liab_si"},
{"indicator": "total_noncurrent_liab"},
{"indicator": "total_liab"},
{"indicator": "common_shares"},
{"indicator": "capital_reserve"},
{"indicator": "equity_premium"},
{"indicator": "treasury_stock"},
{"indicator": "accumgal"},
{"indicator": "equity_atsopc_sbi"},
{"indicator": "preferred_stock"},
{"indicator": "perpetual_debt"},
{"indicator": "reserve"},
{"indicator": "other_reserves"},
{"indicator": "retained_earnings"},
{"indicator": "oci_bs"},
{"indicator": "total_common_equity"},
{"indicator": "equity_belong_to_parent"},
{"indicator": "minority_interests"},
{"indicator": "other_equity_si"},
{"indicator": "total_equity"},
{"indicator": "total_lib_and_equity"},
{"indicator": "equity_si"},
{"indicator": "equity_atncs"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cce': 'cash',
'ar_nr': 'receivables',
'inventory': 'inventory',
'net_fixed_assets': 'fixed_assets',
'equity_and_lt_invest': 'long_term_investments',
'goodwill_and_intangible_asset': 'goodwill',
'st_debt': 'short_term_debt',
'st_lt_current_loan': 'short_term_borrowings',
'ap_np': 'accounts_payable',
'contra_liab': 'contract_liabilities',
'advance_payment': 'advances_from_customers',
'flow_debt_deferred_income': 'deferred_revenue',
'lt_debt': 'long_term_debt',
'long_term_loan': 'long_term_borrowings',
'total_assets': 'total_assets',
'equity_belong_to_parent': 'total_equity',
'pre_payment': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
# Deduplicate columns just in case
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_liab' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_liab']
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
# Deduplicate again in case total_liabilities logic added a dupe (unlikely)
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_balance_sheet(symbol, code)
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "ni"},
{"indicator": "depreciation_and_amortization"},
{"indicator": "operating_capital_change"},
{"indicator": "ncf_from_oa"},
{"indicator": "capital_cost"},
{"indicator": "invest_buy"},
{"indicator": "ncf_from_ia"},
{"indicator": "increase_in_share_capital"},
{"indicator": "decrease_in_share_capital"},
{"indicator": "total_dividends_paid"},
{"indicator": "ncf_from_fa"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'ncf_from_oa': 'ocf',
'capital_cost': 'capex',
'total_dividends_paid': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_cash_flow(symbol, code)
def get_market_metrics(self, symbol: str) -> dict:
"""获取公司基本信息(通过 ths_*_stock 基础指标)"""
basic_info = self._fetch_basic_info(symbol)
code = self._get_ifind_code(symbol)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", ""),
"accounting_date": basic_info.get("accounting_date", ""),
"acc_date": basic_info.get("acc_date", ""),
"price": 0,
"market_cap": 0,
"pe": 0,
"pb": 0,
"dividend_yield": 0
}
# Fetch current market data using ths_* indicators confirmed for HK
params = {
"codes": code,
"indipara": [
{"indicator": "ths_close_price_stock", "indiparams": []},
{"indicator": "ths_market_value_stock", "indiparams": []},
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
{"indicator": "ths_pb_stock", "indiparams": []},
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
row = df.iloc[0]
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
return metrics
return self.client.get_market_metrics(symbol, code)
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
return self.client.get_historical_metrics(symbol, code, dates)
def get_dividends(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}1231", # Assume yearend for dividends
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
return self.client.get_dividends(symbol, code)
def get_repurchases(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-12-31"
end_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
return self.client.get_repurchases(symbol, code)
def get_employee_count(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
return self.client.get_employee_count(symbol, code)
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
"""获取官方计算的财务指标(比率、周转天数等)"""
code = self._get_ifind_code(symbol)
current_year = int(time.strftime("%Y"))
# 1. Determine the latest valid year
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
# Try getting ROE as a proxy for data availability
test_date = f"{test_year}1231"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
# 2. Fetch 5 years starting from last valid year
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}1231"
year_str = str(target_year)
indipara = []
# 1. 人均指标 (参数: Year, "100")
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
# 2. 财务比率与周转率 (参数: Date YYYYMMDD)
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
# Filter out columns that are all NaN
df = df.dropna(axis=1, how='all')
# Identify if we have meaningful data (at least one valid metric)
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)
return self.client.get_financial_ratios(symbol, code)

View File

@ -0,0 +1,699 @@
import pandas as pd
import time
from .ifind_client import IFindClient
from storage.file_io import DataStorage
class IFindHKClient:
"""
iFinD Client specifically for Hong Kong Market.
Uses 'THS' indicators and Chinese accounting standard mappings often used for HK stocks in iFinD.
"""
def __init__(self, api_key: str):
self.cli = IFindClient(refresh_token=api_key)
self.storage = DataStorage()
self.market = 'HK'
self._basic_info_cache = {}
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
if not res:
return pd.DataFrame()
error_code = res.get("errorcode", 0)
if error_code != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
if comparable_record.empty:
dfs_to_concat = [latest_record, df]
else:
dfs_to_concat = [latest_record, comparable_record, df]
combined = pd.concat(dfs_to_concat)
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231",
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
# HK logic typically defaults to 1231, ignoring accounting_date output in HkFetcher
info["acc_date"] = str(row.get("accounting_date", "1231"))
info["accounting_date"] = "1231"
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}1231"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
if not df[valid_cols].isnull().all().all():
df['end_date'] = target_date
df = df.dropna(axis=1, how='all')
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "total_oi"},
{"indicator": "prime_oi"},
{"indicator": "other_oi"},
{"indicator": "operating_cost"},
{"indicator": "operating_expense"},
{"indicator": "operating_fee"},
{"indicator": "p_depreciation_and_amortization"},
{"indicator": "gross_profit"},
{"indicator": "sales_ad_and_ga"},
{"indicator": "rad_cost"},
{"indicator": "sales_fee"},
{"indicator": "financial_expense"},
{"indicator": "sales_income"},
{"indicator": "sales_cost"},
{"indicator": "other_income"},
{"indicator": "manage_fee"},
{"indicator": "deprec_and_amorti"},
{"indicator": "total_other_opearting_expense"},
{"indicator": "p_total_cost"},
{"indicator": "operating_profit"},
{"indicator": "total_gal"},
{"indicator": "interest_income"},
{"indicator": "interest_net_pay"},
{"indicator": "interest_expense"},
{"indicator": "income_from_asso_and_joint"},
{"indicator": "other_gal_effct_profit_pre_tax"},
{"indicator": "conti_op_before_tax"},
{"indicator": "profit_before_noncurrent_items"},
{"indicator": "profit_and_loss_of_noncurrent_items"},
{"indicator": "profit_before_tax"},
{"indicator": "income_tax"},
{"indicator": "profit_after_tax"},
{"indicator": "minoritygal"},
{"indicator": "continue_operate_net_profit"},
{"indicator": "noncontinue_operate_net_profit"},
{"indicator": "other_special_items"},
{"indicator": "ni_attr_to_cs"},
{"indicator": "np_atms"},
{"indicator": "preferred_divid_and_other_adjust"},
{"indicator": "oci"},
{"indicator": "total_oci"},
{"indicator": "oci_from_parent"},
{"indicator": "oci_from_minority"},
{"indicator": "invest_property_fv_chg"},
{"indicator": "operating_amt"},
{"indicator": "oi_si"},
{"indicator": "operating_premium_profit_si"},
{"indicator": "to_toallied_corp_perf"},
{"indicator": "to_joint_control_entity_perf"},
{"indicator": "pre_tax_profit_si"},
{"indicator": "after_tax_profit_si"},
{"indicator": "profit_attrbt_to_nonholders"},
{"indicator": "total_income_atncs"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'total_oi': 'revenue',
'operating_amt': 'turnover',
'gross_profit': 'gross_profit',
'sales_ad_and_ga': 'sga_exp',
'sales_fee': 'selling_marketing_exp',
'manage_fee': 'ga_exp',
'rad_cost': 'rd_exp',
'income_tax': 'income_tax',
'ni_attr_to_cs': 'net_income',
'operating_profit': 'operating_profit',
'depreciation': 'depreciation',
'deprec_and_amorti': 'depreciation',
'p_depreciation_and_amortization': 'depreciation'
}
df_filtered = df.rename(columns=rename_map)
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
df_filtered['ebit'] = df_filtered['operating_profit']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "cce"},
{"indicator": "st_investment"},
{"indicator": "total_cash"},
{"indicator": "account_receivable"},
{"indicator": "tradable_fnncl_asset"},
{"indicator": "derivative_fnncl_assets"},
{"indicator": "restriv_fund"},
{"indicator": "other_short_term_investment"},
{"indicator": "ar_nr"},
{"indicator": "total_ar"},
{"indicator": "or"},
{"indicator": "inventory"},
{"indicator": "flow_assets_dit"},
{"indicator": "pre_payment"},
{"indicator": "other_cunrrent_assets_si"},
{"indicator": "other_ca"},
{"indicator": "total_ca"},
{"indicator": "receivables_from_allied_corp"},
{"indicator": "current_assets_si"},
{"indicator": "prepay_deposits_etc"},
{"indicator": "receivables_from_jce"},
{"indicator": "receivables_from_ac"},
{"indicator": "recoverable_tax"},
{"indicator": "total_fixed_assets"},
{"indicator": "depreciation"},
{"indicator": "equity_and_lt_invest"},
{"indicator": "net_fixed_assets"},
{"indicator": "invest_property"},
{"indicator": "equity_investment"},
{"indicator": "investment_in_associate"},
{"indicator": "investment_in_joints"},
{"indicator": "held_to_maturity_invest"},
{"indicator": "goodwill_and_intangible_asset"},
{"indicator": "intangible_assets"},
{"indicator": "accum_amortized"},
{"indicator": "noncurrent_assets_dit"},
{"indicator": "other_noncurrent_assets_si"},
{"indicator": "dt_assets"},
{"indicator": "total_noncurrent_assets"},
{"indicator": "total_assets"},
{"indicator": "ac_equity"},
{"indicator": "lease_prepay"},
{"indicator": "noncurrent_assets_si"},
{"indicator": "st_lt_current_loan"},
{"indicator": "trade_financial_lia"},
{"indicator": "derivative_financial_lia"},
{"indicator": "ap_np"},
{"indicator": "accounts_payable"},
{"indicator": "advance_payment"},
{"indicator": "st_debt"},
{"indicator": "contra_liab"},
{"indicator": "tax_payable"},
{"indicator": "accrued_liab"},
{"indicator": "flow_debt_deferred_income"},
{"indicator": "other_cl"},
{"indicator": "other_cunrrent_liab_si"},
{"indicator": "total_cl"},
{"indicator": "accrued_expenses_etc"},
{"indicator": "money_payable_toac"},
{"indicator": "joint_control_entity_payable"},
{"indicator": "payable_to_associated_corp"},
{"indicator": "lt_debt"},
{"indicator": "long_term_loan"},
{"indicator": "other_noncurrent_liabi"},
{"indicator": "deferred_tax_liability"},
{"indicator": "ncl_deferred_income"},
{"indicator": "other_noncurrent_liab_si"},
{"indicator": "noncurrent_liab_si"},
{"indicator": "total_noncurrent_liab"},
{"indicator": "total_liab"},
{"indicator": "common_shares"},
{"indicator": "capital_reserve"},
{"indicator": "equity_premium"},
{"indicator": "treasury_stock"},
{"indicator": "accumgal"},
{"indicator": "equity_atsopc_sbi"},
{"indicator": "preferred_stock"},
{"indicator": "perpetual_debt"},
{"indicator": "reserve"},
{"indicator": "other_reserves"},
{"indicator": "retained_earnings"},
{"indicator": "oci_bs"},
{"indicator": "total_common_equity"},
{"indicator": "equity_belong_to_parent"},
{"indicator": "minority_interests"},
{"indicator": "other_equity_si"},
{"indicator": "total_equity"},
{"indicator": "total_lib_and_equity"},
{"indicator": "equity_si"},
{"indicator": "equity_atncs"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cce': 'cash',
'ar_nr': 'receivables',
'inventory': 'inventory',
'net_fixed_assets': 'fixed_assets',
'equity_and_lt_invest': 'long_term_investments',
'goodwill_and_intangible_asset': 'goodwill',
'st_debt': 'short_term_debt',
'st_lt_current_loan': 'short_term_borrowings',
'ap_np': 'accounts_payable',
'contra_liab': 'contract_liabilities',
'advance_payment': 'advances_from_customers',
'flow_debt_deferred_income': 'deferred_revenue',
'lt_debt': 'long_term_debt',
'long_term_loan': 'long_term_borrowings',
'total_assets': 'total_assets',
'equity_belong_to_parent': 'total_equity',
'pre_payment': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_liab' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_liab']
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "ni"},
{"indicator": "depreciation_and_amortization"},
{"indicator": "operating_capital_change"},
{"indicator": "ncf_from_oa"},
{"indicator": "capital_cost"},
{"indicator": "invest_buy"},
{"indicator": "ncf_from_ia"},
{"indicator": "increase_in_share_capital"},
{"indicator": "decrease_in_share_capital"},
{"indicator": "total_dividends_paid"},
{"indicator": "ncf_from_fa"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'ncf_from_oa': 'ocf',
'capital_cost': 'capex',
'total_dividends_paid': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str, code: str) -> dict:
basic_info = self._fetch_basic_info(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", ""),
"accounting_date": basic_info.get("accounting_date", ""),
"price": 0,
"market_cap": 0,
"pe": 0,
"pb": 0,
"dividend_yield": 0
}
params = {
"codes": code,
"indipara": [
{"indicator": "ths_close_price_stock", "indiparams": []},
{"indicator": "ths_market_value_stock", "indiparams": []},
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
{"indicator": "ths_pb_stock", "indiparams": []},
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
row = df.iloc[0]
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
return metrics
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}1231",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-12-31"
end_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-12-31"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}1231",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
current_year = int(time.strftime("%Y"))
# 1. Determine the latest valid year
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}1231"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}1231"
year_str = str(target_year)
indipara = []
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
df = df.dropna(axis=1, how='all')
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)

View File

@ -0,0 +1,556 @@
import pandas as pd
import time
from .ifind_client import IFindClient
from storage.file_io import DataStorage
class IFindIntClient:
"""
Generic iFinD Client for International Markets (JP, VN, US).
Uses 'OAS' (Original Accounting Standards?) or similar standardized indicators
typically available for international stocks in iFinD.
"""
def __init__(self, api_key: str, market: str):
self.cli = IFindClient(refresh_token=api_key)
self.storage = DataStorage()
self.market = market
self._basic_info_cache = {}
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# Try to include standard fiscal year ends or just all annuals?
# JP/VN/US usually have annual reports.
# iFinD often returns data for specific requested dates.
# We will keep it simple and just dedup.
# But for consistency with existing logic which concatenates records:
is_annual = df['end_date'].astype(str).str.endswith('1231') | df['end_date'].astype(str).str.endswith('0331')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231",
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
if acc_date:
info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
last_valid_year = None
# 1. Determine most recent valid year
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
df['end_date'] = target_date
all_dfs.append(df)
# Filter and concat
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "revenue_oas"},
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "cash_equi_short_term_inve_oas"},
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
indicators = [
{"indicator": "net_cash_flows_from_oa_oas"},
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, code, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
def get_market_metrics(self, symbol: str, code: str) -> dict:
basic_info = self._fetch_basic_info(symbol, code)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
# Need MM-DD from acc_date (assuming MMDD or YYYYMMDD? acc_date is MMDD usually)
# However, basic_info sets default to "1231" or reads MMDD.
mm = acc_date[:2]
dd = acc_date[2:]
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
# Generic Implementation if available.
# JP fetcher has it, VN fetcher did not show it but might support it.
# We will implement it based on JP fetcher.
current_year = int(time.strftime("%Y"))
basic_info = self._fetch_basic_info(symbol, code)
acc_date = basic_info.get("accounting_date", "1231")
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
params = {
"codes": code,
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(val) and val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
date_str = f"{target_year}{acc_date}"
year_str = str(target_year)
indipara = []
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
ratio_keys = [
"roe", "roa", "roic",
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
"operating_revenue_yoy", "np_atsopc_yoy",
"ibdebt_ratio_asset_base",
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
"fixed_asset_turnover_ratio", "total_capital_turnover"
]
for key in ratio_keys:
indipara.append({"indicator": key, "indiparams": [date_str]})
params = {
"codes": code,
"indipara": indipara
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
if 'end_date' not in df.columns:
df['end_date'] = date_str
df = df.dropna(axis=1, how='all')
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
if not df[valid_cols].isnull().all().all():
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
combined = pd.concat(all_dfs, ignore_index=True)
self._save_raw_data(combined, symbol, "financial_ratios_raw")
rename_map = {
"salary_pp": "salary_per_employee",
"revenue_pp": "revenue_per_employee",
"profit_pp": "profit_per_employee",
"sales_fee_to_or": "selling_expense_ratio",
"manage_fee_to_revenue": "admin_expense_ratio",
"rad_expense_to_total_income": "rd_expense_ratio",
"operating_revenue_yoy": "revenue_growth",
"np_atsopc_yoy": "net_profit_growth",
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
"total_capital_turnover": "total_asset_turnover"
}
df_final = combined.rename(columns=rename_map)
for col in df_final.columns:
if col not in ['date', 'end_date']:
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
return self._filter_data(df_final)

View File

@ -1,515 +1,60 @@
import pandas as pd
import os
import time
from .base import DataFetcher
from .ifind_client import IFindClient
from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage
class JpFetcher(DataFetcher):
def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token
super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key)
self.data_source = 'iFinD'
self.client = IFindIntClient(api_key, 'JP')
self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str:
"""保持逻辑一致性,如果是纯数字则补齐后缀 .T否则直接传"""
# Simple logic: if pure digits, append .T (Tokyo SE).
# Otherwise assume it's already a code or handled.
if symbol.isdigit():
return f"{symbol}.T"
return symbol
def _fetch_basic_info(self, symbol: str) -> dict:
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
def _fetch_basic_info(self, symbol: str):
# Delegate to client
code = self._get_ifind_code(symbol)
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # 默认 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
# accounting_date 通常返回类似 "03-31" 或 "1231"
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
# 好像是ifind的API有问题明明财报是0331但如果去读20240331就是空数据
# if acc_date:
# info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
# 如果是字典API 响应),直接保存
if isinstance(data, dict):
df = pd.DataFrame([data]) # 包装成单行 DF 或简单处理
else:
df = data
self.storage.save_data(df, 'JP', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
print("iFinD API Warning: No tables found in response.")
return pd.DataFrame()
# 提取第一个 table
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
# Ensure all values are lists to avoid pd.DataFrame ValueError with scalars
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
# If still no end_date, look for it in columns
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
# Handle YoY logic: YYYYMMDD -> (YYYY-1)MMDD
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# 对齐 CN 逻辑,日本公司虽然多是 0331 截止
is_annual = df['end_date'].astype(str).str.endswith('0331') | df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
"""通用获取历年会计年结日的财务数据 (CNY 结算)"""
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
# 1. First, determine the most recent valid year by trying backwards from current year
last_valid_year = None
# Try up to 3 years back to find the latest available report
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
# Use the first indicator to test availability
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# Check for non-null values
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
# 2. Fetch 5 years starting from the last valid year
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
# 强制设置 end_date 以防 API 返回不一致
df['end_date'] = target_date
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
# Remove empty or Check for all-NA columns DataFrames (Fixing FutureWarning)
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
return self.client._fetch_basic_info(symbol, code)
def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "revenue_oas"},
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
# 数值转换
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_income_statement(symbol, code)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "cash_equi_short_term_inve_oas"},
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
# 如果没有负债合计,用资产减权益
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_balance_sheet(symbol, code)
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "net_cash_flows_from_oa_oas"},
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_cash_flow(symbol, code)
def get_market_metrics(self, symbol: str) -> dict:
"""获取公司基本信息(名称、上市日期等静态数据)"""
basic_info = self._fetch_basic_info(symbol)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
code = self._get_ifind_code(symbol)
return self.client.get_market_metrics(symbol, code)
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
"""获取历史日期的收盘价和市值 (通过 cmd_history_quotation)"""
code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame()
results = []
# get_historical_metrics里面不要拿所有日期数据了而是一个一个数据拿
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
# print(f"iFinD API Request: endpoint=date_sequence, params={params}")
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
# 找到最接近该日期且不晚于该日期的记录
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
return self.client.get_historical_metrics(symbol, code, dates)
def get_dividends(self, symbol: str) -> pd.DataFrame:
"""获取历年年度累计分红记录 (逐年获取)"""
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
# 获取最近 5 年的数据
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
return self.client.get_dividends(symbol, code)
def get_repurchases(self, symbol: str) -> pd.DataFrame:
"""获取历年年度回购记录 (从 repur_num_new 获取)"""
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
# 为了对应日期格式 YYYY-MM-DD
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
# 获取最近 5 年的数据
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
return self.client.get_repurchases(symbol, code)
def get_employee_count(self, symbol: str) -> pd.DataFrame:
"""获取历年员工人数"""
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
return self.client.get_employee_count(symbol, code)
current_year = int(time.strftime("%Y"))
results = []
# 获取最近 5 年的数据
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)

View File

@ -0,0 +1,242 @@
import tushare as ts
import pandas as pd
from storage.file_io import DataStorage
class TushareCnClient:
def __init__(self, api_key: str):
ts.set_token(api_key)
self.pro = ts.pro_api()
self.storage = DataStorage()
self.api_key = api_key
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str):
if df is None or df.empty:
return
market = 'CN'
self.storage.save_data(df, market, symbol, f"raw_{name}")
def _get_ts_code(self, symbol: str) -> str:
return symbol
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def get_income_statement(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.income(ts_code=ts_code)
self._save_raw_data(df, ts_code, "income_statement")
rename_map = {
'end_date': 'date',
'revenue': 'revenue',
'n_income_attr_p': 'net_income'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.balancesheet(ts_code=ts_code)
self._save_raw_data(df, ts_code, "balance_sheet")
rename_map = {
'end_date': 'date',
'total_hldr_eqy_exc_min_int': 'total_equity',
'total_liab': 'total_liabilities',
'total_cur_assets': 'current_assets',
'total_cur_liab': 'current_liabilities'
}
df = self._filter_data(df)
df = df.rename(columns=rename_map)
return df
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.cashflow(ts_code=ts_code)
self._save_raw_data(df, ts_code, "cash_flow")
df = self._filter_data(df)
df = df.rename(columns={
'end_date': 'date',
'n_cashflow_act': 'net_cash_flow',
'depr_fa_coga_dpba': 'depreciation'
})
return df
def get_market_metrics(self, symbol: str) -> dict:
ts_code = self._get_ts_code(symbol)
metrics = {
"price": 0.0,
"market_cap": 0.0,
"pe": 0.0,
"pb": 0.0,
"total_share_holders": 0,
"employee_count": 0
}
try:
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
if not df_daily.empty:
row = df_daily.iloc[0]
metrics["price"] = row.get('close', 0.0)
metrics["pe"] = row.get('pe', 0.0)
metrics["pb"] = row.get('pb', 0.0)
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
if not df_basic.empty:
metrics['name'] = df_basic.iloc[0]['name']
metrics['list_date'] = df_basic.iloc[0]['list_date']
df_comp = self.pro.stock_company(ts_code=ts_code)
if not df_comp.empty:
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
if not df_holder.empty:
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
except Exception as e:
print(f"Error fetching market metrics for {symbol}: {e}")
return metrics
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
results = []
if not dates:
return pd.DataFrame()
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
try:
import datetime
min_date = min(unique_dates)
max_date = max(unique_dates)
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
if not df_daily.empty:
df_daily = df_daily.sort_values('trade_date', ascending=False)
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
if not df_holder.empty:
df_holder = df_holder.sort_values('end_date', ascending=False)
for date_str in unique_dates:
metrics = {'date_str': date_str}
if not df_daily.empty:
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
if not closest_daily.empty:
row = closest_daily.iloc[0]
metrics['Price'] = row.get('close')
metrics['PE'] = row.get('pe')
metrics['PB'] = row.get('pb')
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
if not df_holder.empty:
closest_holder = df_holder[df_holder['end_date'] <= date_str]
if not closest_holder.empty:
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
results.append(metrics)
except Exception as e:
print(f"Error fetching historical metrics for {symbol}: {e}")
return pd.DataFrame(results)
def get_dividends(self, symbol: str) -> pd.DataFrame:
import time
ts_code = self._get_ts_code(symbol)
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
self._save_raw_data(df_div, ts_code, "dividends_raw")
if df_div.empty:
return pd.DataFrame()
# Filter for implemented cash dividends
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
if df_div.empty:
return pd.DataFrame()
df_div['total_cash_div'] = 0.0
# Get total shares for each ex_date
for index, row in df_div.iterrows():
ex_date = row['ex_date']
if not ex_date or pd.isna(ex_date):
continue
try:
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
if not df_daily.empty and not df_daily['total_share'].empty:
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
cash_div_per_share = row['cash_div'] # This is per-share
# Total dividend in Yuan
total_cash_dividend = (cash_div_per_share * total_share * 10000)
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
except Exception as e:
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
return dividends_by_year[['date_str', 'dividends']]
def get_repurchases(self, symbol: str) -> pd.DataFrame:
ts_code = self._get_ts_code(symbol)
df = self.pro.repurchase(ts_code=ts_code)
self._save_raw_data(df, ts_code, "repurchases")
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
return pd.DataFrame()
# Filter for repurchases with a valid amount
df = df[df['amount'] > 0]
if df.empty:
return pd.DataFrame()
# Extract year and group by it
df['year'] = pd.to_datetime(df['ann_date']).dt.year
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
# Create date_str for merging (YYYY1231)
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
# Rename for merging.
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
return repurchases_by_year[['date_str', 'repurchases']]

View File

@ -1,182 +1,93 @@
import requests
import pandas as pd
import time
from .base import DataFetcher
# Import clients
from .alphavantage_us_client import AlphaVantageUsClient
from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage
class UsFetcher(DataFetcher):
BASE_URL = "https://www.alphavantage.co/query"
def __init__(self, api_key: str):
def __init__(self, api_key: str, data_source: str = 'Alpha Vantage'):
super().__init__(api_key)
self.data_source = data_source
self.storage = DataStorage()
def _save_raw_data(self, data, symbol: str, name: str):
if data is None:
return
df = pd.DataFrame()
if isinstance(data, list):
df = pd.DataFrame(data)
elif isinstance(data, dict):
# For single-record JSON objects, convert to a DataFrame
df = pd.DataFrame([data])
if not df.empty:
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
params = {
"function": function,
"symbol": symbol,
"apikey": self.api_key
}
try:
time.sleep(15)
response = requests.get(self.BASE_URL, params=params)
data = response.json()
except Exception as e:
print(f"Error requesting {function}: {e}")
return pd.DataFrame()
if data:
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
df_annual = pd.DataFrame()
if "annualReports" in data and data["annualReports"]:
df_annual = pd.DataFrame(data["annualReports"])
if "fiscalDateEnding" in df_annual.columns:
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
# Dynamic year filtering: Find the latest report with valid data and take surrounding 5 years
# For Alpha Vantage, data is already sorted by date descending.
# We simply check for the first row with non-None values in critical columns if possible,
# but usually AV returns valid blocks. We'll just take the top 5.
# Unlike iFinD, AV returns a list of available reports, so we don't need to probe year by year.
# Keep top 5 latest entries
df_annual = df_annual.head(5)
if self.data_source == 'iFinD':
self.client = IFindIntClient(api_key, 'US')
else:
print(f"Error fetching {function} for {symbol}: {data}")
return pd.DataFrame()
self.client = AlphaVantageUsClient(api_key)
return df_annual
def _get_ifind_code(self, symbol: str) -> str:
# If using iFinD for US, what's the code?
# Often ticker is enough or ticker + suffix.
# Since IFindClient takes list of codes, maybe just 'AAPL'?
# Let's return symbol for now.
return symbol
def _save_raw_data(self, data, symbol: str, name: str):
# Only used if strictly needed by fetcher itself, but now client handles it.
# However, let's keep it for compatibility if something else calls it or legacy.
pass
def get_market_metrics(self, symbol: str) -> dict:
# 1. Get Overview for PE, PB, MarketCap, Employees
overview_data = {}
try:
time.sleep(15)
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
r = requests.get(self.BASE_URL, params=params)
overview_data = r.json()
# Clean up 'None' strings from API response before processing
if isinstance(overview_data, dict):
for key, value in overview_data.items():
if value == 'None':
overview_data[key] = None
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
except Exception as e:
print(f"Error fetching OVERVIEW for {symbol}: {e}")
market_cap = float(overview_data.get("MarketCapitalization") or 0)
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
price = 0
if shares_outstanding > 0:
price = market_cap / shares_outstanding
return {
"price": price,
"name": overview_data.get("Name"),
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
"dividend_yield": float(overview_data.get("DividendYield") or 0),
"market_cap": market_cap,
"pe": float(overview_data.get("PERatio") or 0),
"pb": float(overview_data.get("PriceToBookRatio") or 0),
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
"total_share_holders": 0 # Not typically provided in basic AV Overview
}
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_market_metrics(symbol, code)
else:
return self.client.get_market_metrics(symbol)
def get_income_statement(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("INCOME_STATEMENT", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalRevenue": "revenue",
"netIncome": "net_income",
"grossProfit": "gross_profit",
"costOfRevenue": "cogs",
"researchAndDevelopment": "rd_exp",
"sellingGeneralAndAdministrative": "sga_exp",
"interestExpense": "fin_exp",
"incomeBeforeTax": "total_profit",
"incomeTaxExpense": "income_tax",
"ebit": "ebit"
}
df = df.rename(columns=cols_map)
# Convert numeric columns for analysis, keep others as is
numeric_cols = [
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
"fin_exp", "total_profit", "income_tax", "ebit",
"depreciation", "depreciationAndAmortization"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_income_statement(symbol, code)
else:
return self.client.get_income_statement(symbol)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("BALANCE_SHEET", symbol)
cols_map = {
"fiscalDateEnding": "date",
"totalShareholderEquity": "total_equity",
"totalLiabilities": "total_liabilities",
"totalCurrentAssets": "current_assets",
"totalCurrentLiabilities": "current_liabilities",
"cashAndCashEquivalentsAtCarryingValue": "cash",
"currentNetReceivables": "receivables",
"inventory": "inventory",
"propertyPlantEquipment": "fixed_assets",
"totalAssets": "total_assets",
"goodwill": "goodwill",
"longTermInvestments": "lt_invest",
"shortTermDebt": "short_term_debt",
"currentLongTermDebt": "short_term_debt_part",
"longTermDebt": "long_term_debt",
"currentAccountsPayable": "accounts_payable",
"otherCurrentAssets": "prepayment",
"otherNonCurrentAssets": "other_assets",
"deferredRevenue": "adv_receipts"
}
df = df.rename(columns=cols_map)
numeric_cols = [
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_balance_sheet(symbol, code)
else:
return self.client.get_balance_sheet(symbol)
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
df = self._fetch_data("CASH_FLOW", symbol)
cols_map = {
"fiscalDateEnding": "date",
"operatingCashflow": "ocf",
"capitalExpenditures": "capex",
"dividendPayout": "dividends",
"depreciationDepletionAndAmortization": "depreciation"
}
df = df.rename(columns=cols_map)
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_cash_flow(symbol, code)
else:
return self.client.get_cash_flow(symbol)
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
# Optional methods not originally in UsFetcher but available in iFinD client
# If using Alpha Vantage, these might not be supported or need adding to AV client.
# We will only expose if iFinD or if AV client supports it (AV client currently doesn't implement these)
# So we can create empty/dummy implementations for AV or check data_source.
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_historical_metrics(symbol, code, dates)
return pd.DataFrame()
def get_dividends(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_dividends(symbol, code)
return pd.DataFrame()
def get_repurchases(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_repurchases(symbol, code)
return pd.DataFrame()
def get_employee_count(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_employee_count(symbol, code)
# AV Metrics has employee count in market metrics, but not historical series yet.
return pd.DataFrame()
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
if self.data_source == 'iFinD':
code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)
return pd.DataFrame()

View File

@ -1,474 +1,56 @@
import pandas as pd
import os
import time
from .base import DataFetcher
from .ifind_client import IFindClient
from .ifind_int_client import IFindIntClient
from storage.file_io import DataStorage
class VnFetcher(DataFetcher):
def __init__(self, api_key: str):
# api_key is the iFinD Refresh Token
super().__init__(api_key)
self.cli = IFindClient(refresh_token=api_key)
self.data_source = 'iFinD'
self.client = IFindIntClient(api_key, 'VN')
self.storage = DataStorage()
self._basic_info_cache = {}
def _get_ifind_code(self, symbol: str) -> str:
# Vietnam stocks usually have 3 letter codes.
# We assume the user provides the correct code (e.g. VNM, or VNM.VN).
# We can add simple logic: if it's 3 letters, maybe append nothing?
# iFinD codes often need suffix. But without documentation, safest is to pass through.
# VN stocks in iFinD usually just symbol (e.g. VNM)
return symbol
def _fetch_basic_info(self, symbol: str) -> dict:
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
def _fetch_basic_info(self, symbol: str):
code = self._get_ifind_code(symbol)
if code in self._basic_info_cache:
return self._basic_info_cache[code]
params = {
"codes": code,
"indipara": [
{"indicator": "corp_cn_name", "indiparams": []},
{"indicator": "accounting_date", "indiparams": []},
{"indicator": "ipo_date", "indiparams": []}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
self._save_raw_data(df, symbol, "basic_info_raw")
info = {
"name": "",
"accounting_date": "1231", # Default 12-31
"ipo_date": ""
}
if not df.empty:
row = df.iloc[0]
info["name"] = str(row.get("corp_cn_name", ""))
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
if acc_date:
info["accounting_date"] = acc_date
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
self._basic_info_cache[code] = info
return info
def _save_raw_data(self, data: any, symbol: str, name: str):
if data is None:
return
if isinstance(data, dict):
df = pd.DataFrame([data])
else:
df = data
self.storage.save_data(df, 'VN', symbol, f"raw_{name}")
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
if not res:
return pd.DataFrame()
if res.get("errorcode") != 0:
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
return pd.DataFrame()
tables = res.get("tables", [])
if not tables:
# print("iFinD API Warning: No tables found in response.")
return pd.DataFrame()
table_info = tables[0]
table_data = table_info.get("table", {})
times = table_info.get("time", [])
if not table_data:
return pd.DataFrame()
processed_table_data = {}
for k, v in table_data.items():
if not isinstance(v, list):
processed_table_data[k] = [v]
else:
processed_table_data[k] = v
df = pd.DataFrame(processed_table_data)
if times and len(times) == len(df):
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
elif times and len(df) == 1:
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
if 'end_date' not in df.columns:
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
if col in df.columns:
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
break
return df
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty or 'end_date' not in df.columns:
return df
df = df.sort_values(by='end_date', ascending=False)
df = df.drop_duplicates(subset=['end_date'], keep='first')
if df.empty:
return df
latest_record = df.iloc[[0]]
try:
latest_date_str = str(latest_record['end_date'].values[0])
last_year_date_str = str(int(latest_date_str) - 10000)
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
except:
comparable_record = pd.DataFrame()
# VN usually ends in 1231
is_annual = df['end_date'].astype(str).str.endswith('1231')
annual_records = df[is_annual]
combined = pd.concat([latest_record, comparable_record, annual_records])
combined = combined.drop_duplicates(subset=['end_date'])
combined = combined.sort_values(by='end_date', ascending=False)
return combined
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
last_valid_year = None
for offset in range(3):
test_year = current_year - offset
test_date = f"{test_year}{acc_date}"
first_indicator = indicator_configs[0]
params = {
"codes": code,
"indipara": [
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
if pd.notna(valid_val) and valid_val != 0:
last_valid_year = test_year
break
if last_valid_year is None:
last_valid_year = current_year
all_dfs = []
for i in range(5):
target_year = last_valid_year - i
target_date = f"{target_year}{acc_date}"
params = {
"codes": code,
"indipara": [
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
for item in indicator_configs
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty:
df['end_date'] = target_date
all_dfs.append(df)
if not all_dfs:
return pd.DataFrame()
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
if not all_dfs:
return pd.DataFrame()
return pd.concat(all_dfs, ignore_index=True)
return self.client._fetch_basic_info(symbol, code)
def get_income_statement(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "revenue_oas"},
{"indicator": "gross_profit_oas"},
{"indicator": "sga_expenses_oas"},
{"indicator": "selling_marketing_expenses_oas"},
{"indicator": "ga_expenses_oas"},
{"indicator": "rd_expenses_oas"},
{"indicator": "income_tax_expense_oas"},
{"indicator": "net_income_attri_to_common_sh_oas"},
{"indicator": "operating_income_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "income_statement_raw")
rename_map = {
'revenue_oas': 'revenue',
'gross_profit_oas': 'gross_profit',
'sga_expenses_oas': 'sga_exp',
'selling_marketing_expenses_oas': 'selling_marketing_exp',
'ga_expenses_oas': 'ga_exp',
'rd_expenses_oas': 'rd_exp',
'income_tax_expense_oas': 'income_tax',
'net_income_attri_to_common_sh_oas': 'net_income',
'operating_income_oas': 'operating_profit'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_income_statement(symbol, code)
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "cash_equi_short_term_inve_oas"},
{"indicator": "accou_and_notes_recei_oas"},
{"indicator": "inventories_oas"},
{"indicator": "ppe_net_oas"},
{"indicator": "long_term_inv_and_receiv_oas"},
{"indicator": "goodwill_and_intasset_oas"},
{"indicator": "short_term_debt_oas"},
{"indicator": "short_term_borrowings_oas"},
{"indicator": "account_and_note_payable_oas"},
{"indicator": "contra_liabilities_current_oas"},
{"indicator": "advance_from_cust_current_oas"},
{"indicator": "defer_revenue_current_oas"},
{"indicator": "long_term_debt_oas"},
{"indicator": "long_term_borrowings_oas"},
{"indicator": "total_assets_oas"},
{"indicator": "equity_attri_to_companyowner_oas"},
{"indicator": "prepaid_expenses_current_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "balance_sheet_raw")
rename_map = {
'cash_equi_short_term_inve_oas': 'cash',
'accou_and_notes_recei_oas': 'receivables',
'inventories_oas': 'inventory',
'ppe_net_oas': 'fixed_assets',
'long_term_inv_and_receiv_oas': 'long_term_investments',
'goodwill_and_intasset_oas': 'goodwill',
'short_term_debt_oas': 'short_term_debt',
'short_term_borrowings_oas': 'short_term_borrowings',
'account_and_note_payable_oas': 'accounts_payable',
'contra_liabilities_current_oas': 'contract_liabilities',
'advance_from_cust_current_oas': 'advances_from_customers',
'defer_revenue_current_oas': 'deferred_revenue',
'long_term_debt_oas': 'long_term_debt',
'long_term_borrowings_oas': 'long_term_borrowings',
'total_assets_oas': 'total_assets',
'equity_attri_to_companyowner_oas': 'total_equity',
'prepaid_expenses_current_oas': 'prepayment'
}
df_filtered = df.rename(columns=rename_map)
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_balance_sheet(symbol, code)
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
indicators = [
{"indicator": "net_cash_flows_from_oa_oas"},
{"indicator": "purchase_of_ppe_and_ia_oas"},
{"indicator": "dividends_paid_oas"}
]
df = self._fetch_financial_data_annual(symbol, indicators)
if df.empty: return df
self._save_raw_data(df, symbol, "cash_flow_raw")
rename_map = {
'net_cash_flows_from_oa_oas': 'ocf',
'purchase_of_ppe_and_ia_oas': 'capex',
'dividends_paid_oas': 'dividends'
}
df_filtered = df.rename(columns=rename_map)
for col in df_filtered.columns:
if col not in ['date', 'end_date']:
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
if 'capex' in df_filtered.columns:
df_filtered['capex'] = df_filtered['capex'].abs()
return self._filter_data(df_filtered)
code = self._get_ifind_code(symbol)
return self.client.get_cash_flow(symbol, code)
def get_market_metrics(self, symbol: str) -> dict:
basic_info = self._fetch_basic_info(symbol)
metrics = {
"name": basic_info.get("name", ""),
"list_date": basic_info.get("ipo_date", "")
}
return metrics
code = self._get_ifind_code(symbol)
return self.client.get_market_metrics(symbol, code)
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
if not dates: return pd.DataFrame()
results = []
for d in dates:
d_str = str(d).replace('-', '').replace('/', '')
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
params = {
"codes": code,
"startdate": fmt_d,
"enddate": fmt_d,
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
"indipara": [
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
{"indicator": "market_value", "indiparams": ["", "CNY"]}
]
}
res = self.cli.post("date_sequence", params)
df_seq = self._parse_ifind_tables(res)
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
if not df_seq.empty:
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
if not match.empty:
if 'pre_close' in match.columns:
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
if 'market_value' in match.columns:
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
results.append(metrics)
df_hist = pd.DataFrame(results)
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
return df_hist
return self.client.get_historical_metrics(symbol, code, dates)
def get_dividends(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
year_str = str(current_year - i)
params = {
"codes": code,
"indipara": [
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'annual_cum_dividend' in df.columns:
val = df['annual_cum_dividend'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{year_str}{acc_date}",
'dividends': float(val)
})
if not results:
return pd.DataFrame()
df_div = pd.DataFrame(results)
self._save_raw_data(df_div, symbol, "dividends_raw")
return df_div
return self.client.get_dividends(symbol, code)
def get_repurchases(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
fmt_mm_dd = f"{mm}-{dd}"
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
start_date = f"{target_year - 1}-{fmt_mm_dd}"
end_date = f"{target_year}-{fmt_mm_dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'repur_num_new' in df.columns:
val = df['repur_num_new'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'repurchases': float(val)
})
if not results:
return pd.DataFrame()
df_repur = pd.DataFrame(results)
self._save_raw_data(df_repur, symbol, "repurchases_raw")
return df_repur
return self.client.get_repurchases(symbol, code)
def get_employee_count(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
basic_info = self._fetch_basic_info(symbol)
acc_date = basic_info.get("accounting_date", "1231")
mm = acc_date[:2]
dd = acc_date[2:]
return self.client.get_employee_count(symbol, code)
current_year = int(time.strftime("%Y"))
results = []
for i in range(5):
target_year = current_year - i
target_date = f"{target_year}-{mm}-{dd}"
params = {
"codes": code,
"indipara": [
{"indicator": "staff_num", "indiparams": [target_date]}
]
}
res = self.cli.post("basic_data_service", params)
df = self._parse_ifind_tables(res)
if not df.empty and 'staff_num' in df.columns:
val = df['staff_num'].iloc[0]
if pd.notna(val) and val != 0:
results.append({
'date_str': f"{target_year}{acc_date}",
'employee_count': float(val)
})
if not results:
return pd.DataFrame()
df_emp = pd.DataFrame(results)
self._save_raw_data(df_emp, symbol, "employee_count_raw")
return df_emp
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
code = self._get_ifind_code(symbol)
return self.client.get_financial_ratios(symbol, code)

View File

@ -15,7 +15,7 @@ class BaseReporter(ABC):
"""
pass
def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}) -> str:
def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}, data_source: str = None) -> str:
if df.empty:
return f"No breakdown data available for {market} {symbol}"
@ -23,7 +23,7 @@ class BaseReporter(ABC):
headers = self._get_headers(df)
md = []
md.append(self._generate_md_company_info(symbol, metrics, market))
md.append(self._generate_md_company_info(symbol, metrics, market, data_source))
md.append("\n")
for group_name, items in self.indicators.items():
@ -69,7 +69,7 @@ class BaseReporter(ABC):
disp_val = f"{val}"
return disp_val
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
return "" # Implemented in subclasses
def _preprocess_data(self, df, market):

View File

@ -108,7 +108,7 @@ class CN_ReportGenerator(BaseReporter):
def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '')
@ -121,15 +121,19 @@ class CN_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0
md = []
md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n")
md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
# 1. Generate Markdown content
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
# 2. Save Markdown file
md_path = os.path.join(output_dir, "report.md")
@ -144,7 +148,7 @@ class CN_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html)
else:
headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html")
@ -152,7 +156,7 @@ class CN_ReportGenerator(BaseReporter):
f.write(final_html)
print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df):
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '')
@ -229,6 +233,7 @@ class CN_ReportGenerator(BaseReporter):
html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table,
'<div class="table-gap"></div>',
metrics_table

View File

@ -97,7 +97,7 @@ class HK_ReportGenerator(BaseReporter):
return [self._format_period_label(date_value) for date_value in df['date_str']]
return []
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '')
@ -113,15 +113,19 @@ class HK_ReportGenerator(BaseReporter):
md = []
md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n")
md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | 年结日 | 市值(亿) | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {acc_date} | {mcap:.2f} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
os.makedirs(output_dir, exist_ok=True)
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f:
f.write(md_content)
@ -132,14 +136,14 @@ class HK_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html)
else:
headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html")
with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html)
def _build_html_content(self, symbol, metrics, headers, df):
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '')
@ -222,6 +226,7 @@ class HK_ReportGenerator(BaseReporter):
html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table,
'<div class="table-gap"></div>',
metrics_table

View File

@ -109,7 +109,7 @@ class JP_ReportGenerator(BaseReporter):
def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '')
@ -122,14 +122,18 @@ class JP_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0
md = []
md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n")
md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
os.makedirs(output_dir, exist_ok=True)
md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f:
@ -141,7 +145,7 @@ class JP_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html)
else:
headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
# Re-use the exact same styled HTML from CN_ReportGenerator
final_html = self.to_html(symbol, html_content)
@ -149,7 +153,7 @@ class JP_ReportGenerator(BaseReporter):
with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html)
def _build_html_content(self, symbol, metrics, headers, df):
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
# Implementation identical to CN_ReportGenerator for style consistency
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol
@ -227,6 +231,7 @@ class JP_ReportGenerator(BaseReporter):
html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table,
'<div class="table-gap"></div>',
metrics_table

View File

@ -58,7 +58,7 @@ class US_ReportGenerator(BaseReporter):
]
}
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '')
fiscal_year_end = metrics.get('fiscal_year_end', '')
@ -68,14 +68,22 @@ class US_ReportGenerator(BaseReporter):
md = []
md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n")
md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 财报日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {fiscal_year_end} | {pe:.2f} | {pb:.2f} | {div_yield:.2f}% |")
return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
# Override to pass data_source to _generate_md_company_info
# Note: BaseReporter._generate_markdown_content calls _generate_md_company_info
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f:
@ -88,7 +96,7 @@ class US_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html)
else:
headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html")
@ -96,7 +104,7 @@ class US_ReportGenerator(BaseReporter):
f.write(final_html)
print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df):
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol
fiscal_year_end = metrics.get('fiscal_year_end') or "-"
@ -169,6 +177,7 @@ class US_ReportGenerator(BaseReporter):
html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table,
'<div class="table-gap"></div>',
metrics_table

View File

@ -99,7 +99,7 @@ class VN_ReportGenerator(BaseReporter):
def _get_headers(self, df):
return [self._format_period_label(date_value) for date_value in df['date_str']]
def _generate_md_company_info(self, symbol, metrics, market):
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name', '')
raw_list_date = metrics.get('list_date', '')
@ -112,14 +112,18 @@ class VN_ReportGenerator(BaseReporter):
div = metrics.get('dividend_yield', 0) or 0
md = []
md.append(f"# {name} ({symbol}) - Financial Report")
md.append(f"*Report generated on: {today_str}*\n")
md.append(f"*Report generated on: {today_str}*")
if data_source:
md.append(f"*Data Source: {data_source}*\n")
else:
md.append("\n")
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
md.append("|:---|:---|:---|:---|:---|:---|")
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
return "\n".join(md)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
os.makedirs(output_dir, exist_ok=True)
md_path = os.path.join(output_dir, "report.md")
with open(md_path, "w", encoding='utf-8') as f:
@ -131,14 +135,15 @@ class VN_ReportGenerator(BaseReporter):
headers = self._get_headers(df_for_html)
else:
headers = []
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
final_html = self.to_html(symbol, html_content)
html_path = os.path.join(output_dir, "report.html")
with open(html_path, "w", encoding='utf-8') as f:
f.write(final_html)
print(f"HTML report saved to {html_path}")
def _build_html_content(self, symbol, metrics, headers, df):
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
today_str = datetime.date.today().strftime("%Y-%m-%d")
name = metrics.get('name') or symbol
raw_list_date = metrics.get('list_date', '')
@ -215,6 +220,7 @@ class VN_ReportGenerator(BaseReporter):
html_sections = [
f"<h1>{name} ({symbol}) - Financial Report</h1>",
f"<p><em>Report generated on: {today_str}</em></p>",
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
company_table,
'<div class="table-gap"></div>',
metrics_table

View File

@ -52,7 +52,9 @@ class CN_Strategy(BaseStrategy):
symbol=self.stock_code,
market='CN',
metrics=self.raw_data['metrics'],
output_dir=output_dir
output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'Tushare')
)
else:
print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -76,7 +76,9 @@ class HK_Strategy(BaseStrategy):
symbol=self.stock_code,
market='HK',
metrics=self.raw_data['metrics'],
output_dir=output_dir
output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
)
else:
print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -71,9 +71,11 @@ class JP_Strategy(BaseStrategy):
symbol=self.stock_code,
market='JP',
metrics=self.raw_data['metrics'],
output_dir=output_dir
output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
)
else:
print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
import pandas as pd # Import needed for the placeholder DataFrames

View File

@ -6,10 +6,12 @@ from storage.file_io import DataStorage
import os
class US_Strategy(BaseStrategy):
def __init__(self, stock_code, av_key):
def __init__(self, stock_code, av_key, data_source=None):
super().__init__(stock_code)
self.av_key = av_key
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key)
# If using iFinD for US, we might need IFIND token, but factory handles retrieval from env if needed
# We pass av_key as it's the required arg for get_fetcher's av_key param (or we can make it optional in factory call if unused)
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key, data_source=data_source)
self.analyzer = US_Analyzer()
self.reporter = US_ReportGenerator()
self.storage = DataStorage()
@ -42,7 +44,9 @@ class US_Strategy(BaseStrategy):
symbol=self.stock_code,
market='US',
metrics=self.raw_data['metrics'],
output_dir=output_dir
output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', None)
)
else:
print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")

View File

@ -70,7 +70,9 @@ class VN_Strategy(BaseStrategy):
symbol=self.stock_code,
market='VN',
metrics=self.raw_data['metrics'],
output_dir=output_dir
output_dir=output_dir,
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
)
else:
print("No analysis result to generate report.")
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")