更改数据源配置
This commit is contained in:
parent
548ee242ba
commit
b9c8f90cbc
@ -42,9 +42,12 @@ async def search_stock(request: StockSearchRequest, db: AsyncSession = Depends(g
|
||||
|
||||
@router.post("/analyze", response_model=ReportResponse)
|
||||
async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
|
||||
# Get AI model setting
|
||||
model_setting = await db.get(Setting, "AI_MODEL")
|
||||
model = model_setting.value if model_setting else "gemini-2.0-flash"
|
||||
# Get AI model
|
||||
if request.model:
|
||||
model = request.model
|
||||
else:
|
||||
model_setting = await db.get(Setting, "AI_MODEL")
|
||||
model = model_setting.value if model_setting else "gemini-2.0-flash"
|
||||
|
||||
new_report = Report(
|
||||
market=request.market,
|
||||
@ -71,7 +74,8 @@ async def start_analysis(request: AnalysisRequest, background_tasks: BackgroundT
|
||||
new_report.id,
|
||||
request.market,
|
||||
request.symbol,
|
||||
api_key
|
||||
api_key,
|
||||
request.data_source
|
||||
)
|
||||
|
||||
# Re-fetch with selectinload to avoid lazy loading issues
|
||||
@ -122,12 +126,18 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
|
||||
except Exception as e:
|
||||
financial_html = f"<p>加载财务图表时出错: {str(e)}</p>"
|
||||
|
||||
# If content is not ready, add auto-refresh meta tag
|
||||
meta_refresh = ""
|
||||
if "财务图表尚未生成" in financial_html:
|
||||
meta_refresh = '<meta http-equiv="refresh" content="2">'
|
||||
|
||||
# Only return financial charts, no analysis sections
|
||||
final_html = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
{meta_refresh}
|
||||
<title>{report.company_name} - 财务数据</title>
|
||||
<style>
|
||||
body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; padding: 20px; line-height: 1.6; max-width: 1200px; margin: 0 auto; }}
|
||||
@ -162,7 +172,7 @@ async def get_report_html(report_id: int, db: AsyncSession = Depends(get_db)):
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return final_html
|
||||
return HTMLResponse(content=final_html, headers={"Cache-Control": "no-store, no-cache, must-revalidate", "Pragma": "no-cache", "Expires": "0"})
|
||||
|
||||
@router.get("/config")
|
||||
async def get_config(db: AsyncSession = Depends(get_db)):
|
||||
|
||||
@ -21,6 +21,8 @@ class AnalysisRequest(BaseModel):
|
||||
market: str
|
||||
symbol: str
|
||||
company_name: str
|
||||
model: Optional[str] = None
|
||||
data_source: Optional[str] = None
|
||||
|
||||
class ReportSectionSchema(BaseModel):
|
||||
section_name: str
|
||||
|
||||
@ -113,12 +113,12 @@ async def search_stock(query: str, api_key: str, model: str = "gemini-2.0-flash"
|
||||
print(f"Search error: {e}")
|
||||
return {"error": f"搜索失败: {str(e)}"}
|
||||
|
||||
async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str):
|
||||
async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: str, data_source: str = None):
|
||||
"""
|
||||
Background task to run the full analysis pipeline.
|
||||
Creates its own DB session.
|
||||
"""
|
||||
print(f"Starting analysis for report {report_id}: {market} {symbol}")
|
||||
print(f"Starting analysis for report {report_id}: {market} {symbol} (Source: {data_source})")
|
||||
|
||||
# Create new session
|
||||
from app.database import AsyncSessionLocal
|
||||
@ -138,6 +138,8 @@ async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: s
|
||||
# 2. Run Main Data Fetching Script (run_fetcher.py)
|
||||
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
|
||||
cmd = [sys.executable, "run_fetcher.py", market, symbol]
|
||||
if data_source:
|
||||
cmd.extend(["--data-source", data_source])
|
||||
|
||||
print(f"Executing data fetch command: {cmd} in {root_dir}")
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
import { useEffect, useState, use, useRef } from "react"
|
||||
import { getReport } from "@/lib/api"
|
||||
import { Badge } from "@/components/ui/badge"
|
||||
|
||||
import { Card, CardContent } from "@/components/ui/card"
|
||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"
|
||||
import { MarkdownRenderer } from "@/components/markdown-renderer"
|
||||
@ -101,19 +101,24 @@ export default function AnalysisPage({ params }: { params: Promise<{ id: string
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-4">
|
||||
<Badge variant={
|
||||
report.status === "completed" ? "default" :
|
||||
report.status === "in_progress" ? "secondary" :
|
||||
report.status === "failed" ? "destructive" : "outline"
|
||||
}>
|
||||
<Button
|
||||
variant={
|
||||
report.status === "completed" ? "default" :
|
||||
report.status === "in_progress" ? "secondary" :
|
||||
report.status === "failed" ? "destructive" : "outline"
|
||||
}
|
||||
size="sm"
|
||||
className="pointer-events-none w-32"
|
||||
>
|
||||
{report.status === "in_progress" ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<Loader2 className="h-3 w-3 animate-spin" /> 进行中
|
||||
</div>
|
||||
<>
|
||||
<Loader2 className="h-4 w-4 mr-2 animate-spin" />
|
||||
进行中
|
||||
</>
|
||||
) : report.status === "completed" ? "已完成" : report.status === "failed" ? "失败" : report.status === "pending" ? "待处理" : report.status}
|
||||
</Badge>
|
||||
</Button>
|
||||
{report.status === "completed" && (
|
||||
<Button onClick={handleDownloadPDF} variant="outline" size="sm">
|
||||
<Button onClick={handleDownloadPDF} variant="outline" size="sm" className="w-32">
|
||||
<Download className="h-4 w-4 mr-2" />
|
||||
下载 PDF
|
||||
</Button>
|
||||
|
||||
@ -1,18 +1,54 @@
|
||||
"use client"
|
||||
|
||||
import { useState } from "react"
|
||||
import { searchStock, startAnalysis } from "@/lib/api"
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
|
||||
import { useState, useEffect } from "react"
|
||||
import { searchStock, startAnalysis, getConfig } from "@/lib/api"
|
||||
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/components/ui/card"
|
||||
import { Input } from "@/components/ui/input"
|
||||
import { Button } from "@/components/ui/button"
|
||||
import { Search, Loader2 } from "lucide-react"
|
||||
import { Search, Loader2, Database, Bot } from "lucide-react"
|
||||
import { useRouter } from "next/navigation"
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from "@/components/ui/select"
|
||||
import { Label } from "@/components/ui/label"
|
||||
import { Badge } from "@/components/ui/badge"
|
||||
|
||||
export function SearchStock() {
|
||||
const [query, setQuery] = useState("")
|
||||
const [results, setResults] = useState<{ market: string; symbol: string; company_name: string }[]>([])
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [error, setError] = useState("")
|
||||
const [activeIndex, setActiveIndex] = useState<number | null>(null)
|
||||
|
||||
// Global Configuration State
|
||||
const [selectedModel, setSelectedModel] = useState("gemini-2.0-flash")
|
||||
const [dataSourcePrefs, setDataSourcePrefs] = useState<Record<string, string>>({
|
||||
'CN': 'Tushare',
|
||||
'HK': 'iFinD',
|
||||
'US': 'Alpha Vantage',
|
||||
'JP': 'iFinD',
|
||||
'VN': 'iFinD'
|
||||
})
|
||||
|
||||
// Fetch initial config
|
||||
useEffect(() => {
|
||||
const fetchConfig = async () => {
|
||||
try {
|
||||
const config = await getConfig()
|
||||
if (config.AI_MODEL) {
|
||||
setSelectedModel(config.AI_MODEL)
|
||||
}
|
||||
} catch (e) {
|
||||
console.error("Failed to load config:", e)
|
||||
}
|
||||
}
|
||||
fetchConfig()
|
||||
}, [])
|
||||
|
||||
const router = useRouter()
|
||||
|
||||
const handleSearch = async () => {
|
||||
@ -20,10 +56,12 @@ export function SearchStock() {
|
||||
setLoading(true)
|
||||
setError("")
|
||||
setResults([])
|
||||
setActiveIndex(null)
|
||||
|
||||
try {
|
||||
const data = await searchStock(query)
|
||||
setResults(data)
|
||||
// Auto-select the first result if exists? Or keep null? User asked for "click to select". Keeping null is safer.
|
||||
} catch (err: any) {
|
||||
setError(err.message || "搜索失败")
|
||||
} finally {
|
||||
@ -34,7 +72,9 @@ export function SearchStock() {
|
||||
const handleAnalyze = async (result: { market: string; symbol: string; company_name: string }) => {
|
||||
setLoading(true)
|
||||
try {
|
||||
const report = await startAnalysis(result.market, result.symbol, result.company_name)
|
||||
// Use global model selection
|
||||
const dataSource = dataSourcePrefs[result.market]
|
||||
const report = await startAnalysis(result.market, result.symbol, result.company_name, selectedModel, dataSource)
|
||||
router.push(`/analysis/${report.id}`)
|
||||
} catch (err: any) {
|
||||
setError(err.message || "启动分析失败")
|
||||
@ -43,47 +83,171 @@ export function SearchStock() {
|
||||
}
|
||||
}
|
||||
|
||||
// Dynamic Data Source Options (Mocking availability)
|
||||
const dataSourceOptions: Record<string, string[]> = {
|
||||
'CN': ['Tushare'],
|
||||
'HK': ['iFinD'],
|
||||
'US': ['Alpha Vantage', 'iFinD'],
|
||||
'JP': ['iFinD'],
|
||||
'VN': ['iFinD']
|
||||
}
|
||||
|
||||
return (
|
||||
<Card className="w-full max-w-2xl">
|
||||
<CardHeader>
|
||||
<CardTitle>开始新的分析</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="flex gap-2">
|
||||
<Input
|
||||
placeholder="输入公司名称(例如:腾讯)或代码(例如:700)"
|
||||
value={query}
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
onKeyDown={(e) => e.key === "Enter" && handleSearch()}
|
||||
/>
|
||||
<Button onClick={handleSearch} disabled={loading}>
|
||||
{loading ? <Loader2 className="animate-spin" /> : <Search />}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{error && <div className="text-red-500 text-sm">{error}</div>}
|
||||
|
||||
{results.length > 0 && (
|
||||
<div className="space-y-2">
|
||||
{results.length > 1 && (
|
||||
<div className="text-sm text-muted-foreground">找到 {results.length} 个结果,请选择:</div>
|
||||
)}
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
|
||||
{results.map((result, index) => (
|
||||
<div key={index} className="bg-muted p-3 rounded-md space-y-2 border hover:border-primary transition-colors">
|
||||
<div className="font-medium">{result.company_name}</div>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
{result.market} | {result.symbol}
|
||||
</div>
|
||||
<Button onClick={() => handleAnalyze(result)} disabled={loading} className="w-full" size="sm">
|
||||
{loading ? "正在启动分析..." : "运行分析"}
|
||||
</Button>
|
||||
</div>
|
||||
))}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6 w-full max-w-6xl">
|
||||
{/* Left Column: Search & Results */}
|
||||
<div className="lg:col-span-2 space-y-6">
|
||||
<Card className="h-full flex flex-col">
|
||||
<CardHeader>
|
||||
<CardTitle>开始新的分析</CardTitle>
|
||||
<CardDescription>输入公司名称或股票代码开始搜索</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4 flex-grow">
|
||||
<div className="flex gap-2">
|
||||
<Input
|
||||
placeholder="输入公司名称(例如:腾讯)或代码(例如:700)"
|
||||
value={query}
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
onKeyDown={(e) => e.key === "Enter" && handleSearch()}
|
||||
/>
|
||||
<Button onClick={handleSearch} disabled={loading} size="default" className="px-4">
|
||||
{loading ? <Loader2 className="animate-spin" /> : <Search className="h-4 w-4" />}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{error && <div className="text-red-500 text-sm">{error}</div>}
|
||||
|
||||
{results.length > 0 && (
|
||||
<div className="mt-6 space-y-3">
|
||||
<div className="text-sm font-medium text-muted-foreground flex items-center justify-between">
|
||||
<span>找到 {results.length} 个结果</span>
|
||||
<span className="text-xs">点击下方卡片运行分析</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{results.map((result, index) => (
|
||||
<div
|
||||
key={index}
|
||||
className={`group relative flex flex-col justify-between p-4 rounded-lg border transition-all cursor-pointer shadow-sm hover:shadow active:scale-95 duration-200 ${activeIndex === index ? 'border-primary bg-primary/5 ring-1 ring-primary' : 'bg-card hover:bg-accent hover:border-primary/50'}`}
|
||||
onClick={() => setActiveIndex(index)}
|
||||
>
|
||||
<div className="space-y-2 mb-4">
|
||||
<div className="font-semibold text-base line-clamp-2 leading-tight" title={result.company_name}>
|
||||
{result.company_name}
|
||||
</div>
|
||||
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<Badge variant="secondary" className="px-1.5 py-0 text-[10px] h-5">{result.market}</Badge>
|
||||
<span className="font-mono">{result.symbol}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
handleAnalyze(result);
|
||||
}}
|
||||
disabled={loading}
|
||||
size="sm"
|
||||
variant={activeIndex === index ? "default" : "secondary"}
|
||||
className="w-full mt-auto"
|
||||
>
|
||||
{loading ? <Loader2 className="animate-spin h-3 w-3 mr-2" /> : null}
|
||||
运行分析
|
||||
</Button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
|
||||
{/* Right Column: Configuration */}
|
||||
<div className="lg:col-span-1 space-y-6">
|
||||
<Card className="h-full border-dashed shadow-sm bg-muted/30 flex flex-col">
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2 text-base">
|
||||
<Bot className="h-5 w-5 text-primary" />
|
||||
分析配置
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-6">
|
||||
{/* Section 1: AI Model */}
|
||||
<div className="space-y-3">
|
||||
<Label className="text-sm font-medium">选择 AI 模型</Label>
|
||||
<Select
|
||||
value={selectedModel}
|
||||
onValueChange={setSelectedModel}
|
||||
>
|
||||
<SelectTrigger className="w-full bg-background">
|
||||
<SelectValue placeholder="Select model" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="gemini-2.0-flash">Gemini 2.0 Flash</SelectItem>
|
||||
<SelectItem value="gemini-2.5-flash">Gemini 2.5 Flash</SelectItem>
|
||||
<SelectItem value="gemini-3-flash-preview">Gemini 3 Flash Preview</SelectItem>
|
||||
<SelectItem value="gemini-3-pro-preview">Gemini 3 Pro Preview</SelectItem>
|
||||
|
||||
{/* If the current selected model is custom and not in the list above, show it */}
|
||||
{selectedModel &&
|
||||
!["gemini-2.0-flash", "gemini-2.5-flash", "gemini-3-flash-preview", "gemini-3-pro-preview"].includes(selectedModel) && (
|
||||
<SelectItem value={selectedModel}>{selectedModel} (Custom)</SelectItem>
|
||||
)}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
选择用于分析财报和生成报告的大语言模型。
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Section 2: Data Sources - Only show when results are available and for relevant markets */}
|
||||
{results.length > 0 && (
|
||||
<>
|
||||
<div className="h-[1px] bg-border w-full my-4" />
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<Database className="h-4 w-4 text-primary" />
|
||||
<Label className="text-sm font-medium">数据源偏好</Label>
|
||||
</div>
|
||||
|
||||
<div className="space-y-4">
|
||||
{(activeIndex !== null && results[activeIndex] ? [results[activeIndex].market] : Array.from(new Set(results.map(r => r.market)))).map((market) => (
|
||||
<div key={market} className="space-y-2 animate-in fade-in slide-in-from-right-4 duration-300">
|
||||
<div className="flex items-center gap-2">
|
||||
<Badge variant="outline" className="h-5 px-1.5 text-[10px] uppercase">{market}</Badge>
|
||||
<span className="text-xs text-muted-foreground">选择数据源:</span>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
{(dataSourceOptions[market] || ['Default']).map((opt) => {
|
||||
const isSelected = dataSourcePrefs[market] === opt;
|
||||
return (
|
||||
<div
|
||||
key={opt}
|
||||
onClick={() => setDataSourcePrefs(prev => ({ ...prev, [market]: opt }))}
|
||||
className={`
|
||||
cursor-pointer relative flex flex-col items-center justify-center p-2 rounded-md border text-xs font-medium transition-all
|
||||
${isSelected
|
||||
? 'border-primary bg-primary/10 text-primary ring-1 ring-primary/20'
|
||||
: 'bg-background hover:bg-accent hover:border-primary/30 text-muted-foreground'}
|
||||
`}
|
||||
>
|
||||
{opt}
|
||||
{isSelected && <div className="absolute top-1 right-1 w-1.5 h-1.5 rounded-full bg-primary" />}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground mt-2">
|
||||
根据当前搜索结果的市场,选择首选的数据提供商。
|
||||
</p>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@ -10,11 +10,11 @@ export async function searchStock(query: string) {
|
||||
return res.json() as Promise<{ market: string; symbol: string; company_name: string }[]>;
|
||||
}
|
||||
|
||||
export async function startAnalysis(market: string, symbol: string, company_name: string) {
|
||||
export async function startAnalysis(market: string, symbol: string, company_name: string, model?: string, data_source?: string) {
|
||||
const res = await fetch(`${API_BASE}/analyze`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ market, symbol, company_name }),
|
||||
body: JSON.stringify({ market, symbol, company_name, model, data_source }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const error = await res.json();
|
||||
|
||||
@ -9,14 +9,14 @@ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
||||
# from strategies.hk_strategy import HK_Strategy
|
||||
# from strategies.jp_strategy import JP_Strategy
|
||||
|
||||
def get_strategy(market, stock_code, tushare_token=None, av_key=None):
|
||||
def get_strategy(market, stock_code, tushare_token=None, av_key=None, data_source=None):
|
||||
market = market.upper()
|
||||
if market == 'CN':
|
||||
from strategies.cn_strategy import CN_Strategy
|
||||
return CN_Strategy(stock_code, tushare_token)
|
||||
elif market == 'US':
|
||||
from strategies.us_strategy import US_Strategy
|
||||
return US_Strategy(stock_code, av_key)
|
||||
return US_Strategy(stock_code, av_key, data_source)
|
||||
elif market == 'HK':
|
||||
from strategies.hk_strategy import HK_Strategy
|
||||
ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
|
||||
@ -37,10 +37,19 @@ def main():
|
||||
tushare_token = os.getenv('TUSHARE_TOKEN')
|
||||
av_key = os.getenv('ALPHA_VANTAGE_KEY')
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
market = sys.argv[1]
|
||||
symbol = sys.argv[2]
|
||||
strategy = get_strategy(market, symbol, tushare_token, av_key)
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Run Stock Analysis Data Fetcher')
|
||||
parser.add_argument('market', help='Market (CN, US, HK, JP, VN)')
|
||||
parser.add_argument('symbol', help='Stock Symbol')
|
||||
parser.add_argument('--data-source', help='Data Source Preference', default=None)
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
args = parser.parse_args()
|
||||
market = args.market
|
||||
symbol = args.symbol
|
||||
data_source = args.data_source
|
||||
|
||||
strategy = get_strategy(market, symbol, tushare_token, av_key, data_source)
|
||||
strategy.execute()
|
||||
else:
|
||||
print("Usage: python run_fetcher.py <MARKET> <SYMBOL>")
|
||||
|
||||
BIN
server.log
Normal file
BIN
server.log
Normal file
Binary file not shown.
172
src/fetchers/alphavantage_us_client.py
Normal file
172
src/fetchers/alphavantage_us_client.py
Normal file
@ -0,0 +1,172 @@
|
||||
import requests
|
||||
import pandas as pd
|
||||
import time
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class AlphaVantageUsClient:
|
||||
BASE_URL = "https://www.alphavantage.co/query"
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
self.api_key = api_key
|
||||
self.storage = DataStorage()
|
||||
|
||||
def _save_raw_data(self, data, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
|
||||
df = pd.DataFrame()
|
||||
if isinstance(data, list):
|
||||
df = pd.DataFrame(data)
|
||||
elif isinstance(data, dict):
|
||||
# For single-record JSON objects, convert to a DataFrame
|
||||
df = pd.DataFrame([data])
|
||||
|
||||
if not df.empty:
|
||||
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
|
||||
|
||||
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
|
||||
params = {
|
||||
"function": function,
|
||||
"symbol": symbol,
|
||||
"apikey": self.api_key
|
||||
}
|
||||
try:
|
||||
time.sleep(15)
|
||||
response = requests.get(self.BASE_URL, params=params)
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
print(f"Error requesting {function}: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
if data:
|
||||
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
|
||||
|
||||
df_annual = pd.DataFrame()
|
||||
|
||||
if "annualReports" in data and data["annualReports"]:
|
||||
df_annual = pd.DataFrame(data["annualReports"])
|
||||
if "fiscalDateEnding" in df_annual.columns:
|
||||
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
|
||||
df_annual = df_annual.head(5)
|
||||
else:
|
||||
print(f"Error fetching {function} for {symbol}: {data}")
|
||||
return pd.DataFrame()
|
||||
|
||||
return df_annual
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
# 1. Get Overview for PE, PB, MarketCap, Employees
|
||||
overview_data = {}
|
||||
try:
|
||||
time.sleep(15)
|
||||
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
|
||||
r = requests.get(self.BASE_URL, params=params)
|
||||
overview_data = r.json()
|
||||
# Clean up 'None' strings from API response before processing
|
||||
if isinstance(overview_data, dict):
|
||||
for key, value in overview_data.items():
|
||||
if value == 'None':
|
||||
overview_data[key] = None
|
||||
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
|
||||
except Exception as e:
|
||||
print(f"Error fetching OVERVIEW for {symbol}: {e}")
|
||||
|
||||
market_cap = float(overview_data.get("MarketCapitalization") or 0)
|
||||
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
|
||||
|
||||
price = 0
|
||||
if shares_outstanding > 0:
|
||||
price = market_cap / shares_outstanding
|
||||
|
||||
return {
|
||||
"price": price,
|
||||
"name": overview_data.get("Name"),
|
||||
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
|
||||
"dividend_yield": float(overview_data.get("DividendYield") or 0),
|
||||
"market_cap": market_cap,
|
||||
"pe": float(overview_data.get("PERatio") or 0),
|
||||
"pb": float(overview_data.get("PriceToBookRatio") or 0),
|
||||
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
|
||||
"total_share_holders": 0 # Not typically provided in basic AV Overview
|
||||
}
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("INCOME_STATEMENT", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"totalRevenue": "revenue",
|
||||
"netIncome": "net_income",
|
||||
"grossProfit": "gross_profit",
|
||||
"costOfRevenue": "cogs",
|
||||
"researchAndDevelopment": "rd_exp",
|
||||
"sellingGeneralAndAdministrative": "sga_exp",
|
||||
"interestExpense": "fin_exp",
|
||||
"incomeBeforeTax": "total_profit",
|
||||
"incomeTaxExpense": "income_tax",
|
||||
"ebit": "ebit"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
|
||||
numeric_cols = [
|
||||
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
|
||||
"fin_exp", "total_profit", "income_tax", "ebit",
|
||||
"depreciation", "depreciationAndAmortization"
|
||||
]
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("BALANCE_SHEET", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"totalShareholderEquity": "total_equity",
|
||||
"totalLiabilities": "total_liabilities",
|
||||
"totalCurrentAssets": "current_assets",
|
||||
"totalCurrentLiabilities": "current_liabilities",
|
||||
"cashAndCashEquivalentsAtCarryingValue": "cash",
|
||||
"currentNetReceivables": "receivables",
|
||||
"inventory": "inventory",
|
||||
"propertyPlantEquipment": "fixed_assets",
|
||||
"totalAssets": "total_assets",
|
||||
"goodwill": "goodwill",
|
||||
"longTermInvestments": "lt_invest",
|
||||
"shortTermDebt": "short_term_debt",
|
||||
"currentLongTermDebt": "short_term_debt_part",
|
||||
"longTermDebt": "long_term_debt",
|
||||
"currentAccountsPayable": "accounts_payable",
|
||||
"otherCurrentAssets": "prepayment",
|
||||
"otherNonCurrentAssets": "other_assets",
|
||||
"deferredRevenue": "adv_receipts"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
|
||||
numeric_cols = [
|
||||
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
|
||||
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
|
||||
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
|
||||
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
|
||||
]
|
||||
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("CASH_FLOW", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"operatingCashflow": "ocf",
|
||||
"capitalExpenditures": "capex",
|
||||
"dividendPayout": "dividends",
|
||||
"depreciationDepletionAndAmortization": "depreciation"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
|
||||
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
@ -1,243 +1,39 @@
|
||||
import tushare as ts
|
||||
import pandas as pd
|
||||
from .base import DataFetcher
|
||||
import time
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class CnFetcher(DataFetcher):
|
||||
def __init__(self, api_key: str):
|
||||
def __init__(self, api_key: str, data_source: str = 'Tushare'):
|
||||
super().__init__(api_key)
|
||||
ts.set_token(self.api_key)
|
||||
self.pro = ts.pro_api()
|
||||
self.storage = DataStorage()
|
||||
self.data_source = data_source
|
||||
|
||||
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str):
|
||||
if df is None or df.empty:
|
||||
return
|
||||
market = 'CN'
|
||||
self.storage.save_data(df, market, symbol, f"raw_{name}")
|
||||
|
||||
def _get_ts_code(self, symbol: str) -> str:
|
||||
return symbol
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
if df.empty:
|
||||
return df
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
is_annual = df['end_date'].astype(str).str.endswith('1231')
|
||||
annual_records = df[is_annual]
|
||||
combined = pd.concat([latest_record, comparable_record, annual_records])
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
if self.data_source == 'Tushare':
|
||||
from .tushare_cn_client import TushareCnClient
|
||||
self.client = TushareCnClient(api_key)
|
||||
else:
|
||||
# Default to Tushare if unknown, or raise error.
|
||||
# For robustness, we can default to Tushare or handle Akshare later.
|
||||
if self.data_source == 'Akshare':
|
||||
raise NotImplementedError("Akshare client not yet implemented")
|
||||
from .tushare_cn_client import TushareCnClient
|
||||
self.client = TushareCnClient(api_key)
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.income(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "income_statement")
|
||||
rename_map = {
|
||||
'end_date': 'date',
|
||||
'revenue': 'revenue',
|
||||
'n_income_attr_p': 'net_income'
|
||||
}
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns=rename_map)
|
||||
return df
|
||||
return self.client.get_income_statement(symbol)
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.balancesheet(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "balance_sheet")
|
||||
rename_map = {
|
||||
'end_date': 'date',
|
||||
'total_hldr_eqy_exc_min_int': 'total_equity',
|
||||
'total_liab': 'total_liabilities',
|
||||
'total_cur_assets': 'current_assets',
|
||||
'total_cur_liab': 'current_liabilities'
|
||||
}
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns=rename_map)
|
||||
return df
|
||||
return self.client.get_balance_sheet(symbol)
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.cashflow(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "cash_flow")
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns={
|
||||
'end_date': 'date',
|
||||
'n_cashflow_act': 'net_cash_flow',
|
||||
'depr_fa_coga_dpba': 'depreciation'
|
||||
})
|
||||
return df
|
||||
return self.client.get_cash_flow(symbol)
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
metrics = {
|
||||
"price": 0.0,
|
||||
"market_cap": 0.0,
|
||||
"pe": 0.0,
|
||||
"pb": 0.0,
|
||||
"total_share_holders": 0,
|
||||
"employee_count": 0
|
||||
}
|
||||
|
||||
try:
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
|
||||
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
|
||||
if not df_daily.empty:
|
||||
row = df_daily.iloc[0]
|
||||
metrics["price"] = row.get('close', 0.0)
|
||||
metrics["pe"] = row.get('pe', 0.0)
|
||||
metrics["pb"] = row.get('pb', 0.0)
|
||||
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
|
||||
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
|
||||
|
||||
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
|
||||
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
|
||||
if not df_basic.empty:
|
||||
metrics['name'] = df_basic.iloc[0]['name']
|
||||
metrics['list_date'] = df_basic.iloc[0]['list_date']
|
||||
|
||||
df_comp = self.pro.stock_company(ts_code=ts_code)
|
||||
if not df_comp.empty:
|
||||
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
|
||||
|
||||
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
|
||||
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
|
||||
if not df_holder.empty:
|
||||
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching market metrics for {symbol}: {e}")
|
||||
|
||||
return metrics
|
||||
return self.client.get_market_metrics(symbol)
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
results = []
|
||||
|
||||
if not dates:
|
||||
return pd.DataFrame()
|
||||
|
||||
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
|
||||
|
||||
try:
|
||||
import datetime
|
||||
min_date = min(unique_dates)
|
||||
max_date = max(unique_dates)
|
||||
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
|
||||
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
|
||||
if not df_daily.empty:
|
||||
df_daily = df_daily.sort_values('trade_date', ascending=False)
|
||||
|
||||
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
|
||||
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
|
||||
if not df_holder.empty:
|
||||
df_holder = df_holder.sort_values('end_date', ascending=False)
|
||||
|
||||
for date_str in unique_dates:
|
||||
metrics = {'date_str': date_str}
|
||||
|
||||
if not df_daily.empty:
|
||||
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
|
||||
if not closest_daily.empty:
|
||||
row = closest_daily.iloc[0]
|
||||
metrics['Price'] = row.get('close')
|
||||
metrics['PE'] = row.get('pe')
|
||||
metrics['PB'] = row.get('pb')
|
||||
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
|
||||
|
||||
if not df_holder.empty:
|
||||
closest_holder = df_holder[df_holder['end_date'] <= date_str]
|
||||
if not closest_holder.empty:
|
||||
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
|
||||
|
||||
results.append(metrics)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching historical metrics for {symbol}: {e}")
|
||||
|
||||
return pd.DataFrame(results)
|
||||
return self.client.get_historical_metrics(symbol, dates)
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
|
||||
self._save_raw_data(df_div, ts_code, "dividends_raw")
|
||||
|
||||
if df_div.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Filter for implemented cash dividends
|
||||
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
|
||||
|
||||
if df_div.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div['total_cash_div'] = 0.0
|
||||
|
||||
# Get total shares for each ex_date
|
||||
for index, row in df_div.iterrows():
|
||||
ex_date = row['ex_date']
|
||||
if not ex_date or pd.isna(ex_date):
|
||||
continue
|
||||
|
||||
try:
|
||||
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
|
||||
|
||||
if not df_daily.empty and not df_daily['total_share'].empty:
|
||||
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
|
||||
cash_div_per_share = row['cash_div'] # This is per-share
|
||||
|
||||
# Total dividend in Yuan
|
||||
total_cash_dividend = (cash_div_per_share * total_share * 10000)
|
||||
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
|
||||
except Exception as e:
|
||||
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
|
||||
|
||||
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
|
||||
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
|
||||
|
||||
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
|
||||
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
|
||||
|
||||
return dividends_by_year[['date_str', 'dividends']]
|
||||
return self.client.get_dividends(symbol)
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.repurchase(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "repurchases")
|
||||
|
||||
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Filter for repurchases with a valid amount
|
||||
df = df[df['amount'] > 0]
|
||||
|
||||
if df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Extract year and group by it
|
||||
df['year'] = pd.to_datetime(df['ann_date']).dt.year
|
||||
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
|
||||
|
||||
# Create date_str for merging (YYYY1231)
|
||||
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
|
||||
|
||||
# Rename for merging.
|
||||
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
|
||||
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
|
||||
|
||||
return repurchases_by_year[['date_str', 'repurchases']]
|
||||
return self.client.get_repurchases(symbol)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
class FetcherFactory:
|
||||
@staticmethod
|
||||
def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, **kwargs):
|
||||
def get_fetcher(market: str, tushare_token: str = None, av_key: str = None, data_source: str = None, **kwargs):
|
||||
from .base import DataFetcher
|
||||
market = market.upper()
|
||||
if market == 'CN':
|
||||
@ -18,10 +18,24 @@ class FetcherFactory:
|
||||
from .hk_fetcher import HkFetcher
|
||||
return HkFetcher(ifind_token)
|
||||
elif market == 'US':
|
||||
# Default to Alpha Vantage if not specified or explicit
|
||||
if data_source == 'iFinD':
|
||||
ifind_token = kwargs.get('ifind_refresh_token')
|
||||
if not ifind_token:
|
||||
import os
|
||||
ifind_token = os.getenv('IFIND_REFRESH_TOKEN')
|
||||
if not ifind_token:
|
||||
# Fallback or error? Let's error if specifically requested
|
||||
raise ValueError("iFinD Refresh Token is required for US market when iFinD is selected")
|
||||
from .us_fetcher import UsFetcher
|
||||
# We need to update UsFetcher to accept data_source or handle it internally
|
||||
# For now, let's assume UsFetcher handles switching if we pass data_source
|
||||
return UsFetcher(ifind_token, data_source='iFinD')
|
||||
|
||||
if not av_key:
|
||||
raise ValueError("Alpha Vantage key is required for US market")
|
||||
from .us_fetcher import UsFetcher
|
||||
return UsFetcher(av_key)
|
||||
return UsFetcher(av_key, data_source='Alpha Vantage')
|
||||
elif market == 'JP':
|
||||
ifind_token = kwargs.get('ifind_refresh_token') or kwargs.get('jquants_refresh_token')
|
||||
if not ifind_token:
|
||||
|
||||
@ -1,746 +1,61 @@
|
||||
import pandas as pd
|
||||
import time
|
||||
from .base import DataFetcher
|
||||
from .ifind_client import IFindClient
|
||||
from .ifind_hk_client import IFindHKClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class HkFetcher(DataFetcher):
|
||||
def __init__(self, api_key: str):
|
||||
# api_key is the iFinD Refresh Token
|
||||
super().__init__(api_key)
|
||||
self.cli = IFindClient(refresh_token=api_key)
|
||||
self.data_source = 'iFinD'
|
||||
self.client = IFindHKClient(api_key)
|
||||
self.storage = DataStorage()
|
||||
self._basic_info_cache = {}
|
||||
|
||||
def _get_ifind_code(self, symbol: str) -> str:
|
||||
"""保持逻辑一致性,如果是纯数字则补齐后缀 .HK,否则直接传"""
|
||||
# Strip .HK suffix if present to handle input like '00700.HK'
|
||||
clean_symbol = symbol.replace('.HK', '').replace('.hk', '')
|
||||
|
||||
if clean_symbol.isdigit():
|
||||
# Force 4 digits for HK (e.g., 700 -> 0700.HK, 00700 -> 0700.HK)
|
||||
# e.g. 01651 -> 1651.HK
|
||||
code_int = int(clean_symbol)
|
||||
normalized_code = str(code_int).zfill(4)
|
||||
return f"{normalized_code}.HK"
|
||||
# HK stock codes are 4-5 digits, often 0 padded to 5 or 4 in other systems
|
||||
# iFinD usually expects 4 digits like '0700.HK', '0005.HK'
|
||||
# Input symbol might be '700', '0700', '5', '0005'
|
||||
if symbol.isdigit():
|
||||
padded = symbol.zfill(4)
|
||||
return f"{padded}.HK"
|
||||
return symbol
|
||||
|
||||
def _fetch_basic_info(self, symbol: str) -> dict:
|
||||
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
|
||||
def _fetch_basic_info(self, symbol: str):
|
||||
code = self._get_ifind_code(symbol)
|
||||
if code in self._basic_info_cache:
|
||||
return self._basic_info_cache[code]
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "corp_cn_name", "indiparams": []},
|
||||
{"indicator": "accounting_date", "indiparams": []},
|
||||
{"indicator": "ipo_date", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
self._save_raw_data(df, symbol, "basic_info_raw")
|
||||
|
||||
info = {
|
||||
"name": "",
|
||||
"accounting_date": "1231", # 默认 12-31
|
||||
"ipo_date": ""
|
||||
}
|
||||
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
info["name"] = str(row.get("corp_cn_name", ""))
|
||||
info["acc_date"] = str(row.get("accounting_date", "1231"))
|
||||
info["accounting_date"] = "1231"
|
||||
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
|
||||
|
||||
self._basic_info_cache[code] = info
|
||||
return info
|
||||
|
||||
def _save_raw_data(self, data: any, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
df = pd.DataFrame([data])
|
||||
else:
|
||||
df = data
|
||||
self.storage.save_data(df, 'HK', symbol, f"raw_{name}")
|
||||
|
||||
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
|
||||
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
|
||||
if not res:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Default to 0 if not present (for lenient mocking) or check properly
|
||||
error_code = res.get("errorcode", 0)
|
||||
if error_code != 0:
|
||||
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
|
||||
return pd.DataFrame()
|
||||
|
||||
tables = res.get("tables", [])
|
||||
if not tables:
|
||||
return pd.DataFrame()
|
||||
|
||||
table_info = tables[0]
|
||||
table_data = table_info.get("table", {})
|
||||
times = table_info.get("time", [])
|
||||
|
||||
if not table_data:
|
||||
return pd.DataFrame()
|
||||
|
||||
processed_table_data = {}
|
||||
for k, v in table_data.items():
|
||||
if not isinstance(v, list):
|
||||
processed_table_data[k] = [v]
|
||||
else:
|
||||
processed_table_data[k] = v
|
||||
|
||||
df = pd.DataFrame(processed_table_data)
|
||||
if times and len(times) == len(df):
|
||||
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
|
||||
elif times and len(df) == 1:
|
||||
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
|
||||
|
||||
if 'end_date' not in df.columns:
|
||||
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
|
||||
if col in df.columns:
|
||||
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
|
||||
break
|
||||
|
||||
return df
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
|
||||
if comparable_record.empty:
|
||||
dfs_to_concat = [latest_record, df]
|
||||
else:
|
||||
dfs_to_concat = [latest_record, comparable_record, df]
|
||||
|
||||
# HK typically has 1231 or 0331 or 0630 etc. but annual is annual.
|
||||
combined = pd.concat(dfs_to_concat) # Include all for now and dedup
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
|
||||
"""通用获取历年财务数据 (HKD 为主,但 iFinD 支持转 CNY)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
current_year = int(time.strftime("%Y"))
|
||||
|
||||
# 1. First, determine the most recent valid year by trying backwards from current year
|
||||
last_valid_year = None
|
||||
|
||||
# Try up to 3 years back to find the latest available report
|
||||
# e.g., in Jan 2026, try 2026 -> fail, 2025 -> success
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}1231"
|
||||
|
||||
# Use the first indicator to test availability
|
||||
first_indicator = indicator_configs[0]
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
# Check for non-null values
|
||||
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and valid_val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
# Fallback to current year if nothing found (will likely return empty/zeros, but keeps logic flowing)
|
||||
last_valid_year = current_year
|
||||
|
||||
# 2. Fetch 5 years starting from the last valid year
|
||||
all_dfs = []
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
target_date = f"{target_year}1231"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
|
||||
for item in indicator_configs
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
|
||||
if not df[valid_cols].isnull().all().all():
|
||||
df['end_date'] = target_date
|
||||
df = df.dropna(axis=1, how='all')
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
return pd.concat(all_dfs, ignore_index=True)
|
||||
return self.client._fetch_basic_info(symbol, code)
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "total_oi"},
|
||||
{"indicator": "prime_oi"},
|
||||
{"indicator": "other_oi"},
|
||||
{"indicator": "operating_cost"},
|
||||
{"indicator": "operating_expense"},
|
||||
{"indicator": "operating_fee"},
|
||||
{"indicator": "p_depreciation_and_amortization"},
|
||||
{"indicator": "gross_profit"},
|
||||
{"indicator": "sales_ad_and_ga"},
|
||||
{"indicator": "rad_cost"},
|
||||
{"indicator": "sales_fee"},
|
||||
{"indicator": "financial_expense"},
|
||||
{"indicator": "sales_income"},
|
||||
{"indicator": "sales_cost"},
|
||||
{"indicator": "other_income"},
|
||||
{"indicator": "manage_fee"},
|
||||
{"indicator": "deprec_and_amorti"},
|
||||
{"indicator": "total_other_opearting_expense"},
|
||||
{"indicator": "p_total_cost"},
|
||||
{"indicator": "operating_profit"},
|
||||
{"indicator": "total_gal"},
|
||||
{"indicator": "interest_income"},
|
||||
{"indicator": "interest_net_pay"},
|
||||
{"indicator": "interest_expense"},
|
||||
{"indicator": "income_from_asso_and_joint"},
|
||||
{"indicator": "other_gal_effct_profit_pre_tax"},
|
||||
{"indicator": "conti_op_before_tax"},
|
||||
{"indicator": "profit_before_noncurrent_items"},
|
||||
{"indicator": "profit_and_loss_of_noncurrent_items"},
|
||||
{"indicator": "profit_before_tax"},
|
||||
{"indicator": "income_tax"},
|
||||
{"indicator": "profit_after_tax"},
|
||||
{"indicator": "minoritygal"},
|
||||
{"indicator": "continue_operate_net_profit"},
|
||||
{"indicator": "noncontinue_operate_net_profit"},
|
||||
{"indicator": "other_special_items"},
|
||||
{"indicator": "ni_attr_to_cs"},
|
||||
{"indicator": "np_atms"},
|
||||
{"indicator": "preferred_divid_and_other_adjust"},
|
||||
{"indicator": "oci"},
|
||||
{"indicator": "total_oci"},
|
||||
{"indicator": "oci_from_parent"},
|
||||
{"indicator": "oci_from_minority"},
|
||||
{"indicator": "invest_property_fv_chg"},
|
||||
{"indicator": "operating_amt"},
|
||||
{"indicator": "oi_si"},
|
||||
{"indicator": "operating_premium_profit_si"},
|
||||
{"indicator": "to_toallied_corp_perf"},
|
||||
{"indicator": "to_joint_control_entity_perf"},
|
||||
{"indicator": "pre_tax_profit_si"},
|
||||
{"indicator": "after_tax_profit_si"},
|
||||
{"indicator": "profit_attrbt_to_nonholders"},
|
||||
{"indicator": "total_income_atncs"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "income_statement_raw")
|
||||
|
||||
rename_map = {
|
||||
'total_oi': 'revenue',
|
||||
'operating_amt': 'turnover', # Backup for revenue
|
||||
'gross_profit': 'gross_profit',
|
||||
'sales_ad_and_ga': 'sga_exp',
|
||||
'sales_fee': 'selling_marketing_exp',
|
||||
'manage_fee': 'ga_exp',
|
||||
'rad_cost': 'rd_exp',
|
||||
'income_tax': 'income_tax',
|
||||
'ni_attr_to_cs': 'net_income',
|
||||
'operating_profit': 'operating_profit',
|
||||
'depreciation': 'depreciation',
|
||||
'deprec_and_amorti': 'depreciation', # Backup
|
||||
'p_depreciation_and_amortization': 'depreciation' # Another backup
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
# Calculate EBIT if not present but operating_profit is there
|
||||
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
|
||||
# Simple approximation: Operating Profit is often used as EBIT
|
||||
df_filtered['ebit'] = df_filtered['operating_profit']
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_income_statement(symbol, code)
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "cce"},
|
||||
{"indicator": "st_investment"},
|
||||
{"indicator": "total_cash"},
|
||||
{"indicator": "account_receivable"},
|
||||
{"indicator": "tradable_fnncl_asset"},
|
||||
{"indicator": "derivative_fnncl_assets"},
|
||||
{"indicator": "restriv_fund"},
|
||||
{"indicator": "other_short_term_investment"},
|
||||
{"indicator": "ar_nr"},
|
||||
{"indicator": "total_ar"},
|
||||
{"indicator": "or"},
|
||||
{"indicator": "inventory"},
|
||||
{"indicator": "flow_assets_dit"},
|
||||
{"indicator": "pre_payment"},
|
||||
{"indicator": "other_cunrrent_assets_si"},
|
||||
{"indicator": "other_ca"},
|
||||
{"indicator": "total_ca"},
|
||||
{"indicator": "receivables_from_allied_corp"},
|
||||
{"indicator": "current_assets_si"},
|
||||
{"indicator": "prepay_deposits_etc"},
|
||||
{"indicator": "receivables_from_jce"},
|
||||
{"indicator": "receivables_from_ac"},
|
||||
{"indicator": "recoverable_tax"},
|
||||
{"indicator": "total_fixed_assets"},
|
||||
{"indicator": "depreciation"},
|
||||
{"indicator": "equity_and_lt_invest"},
|
||||
{"indicator": "net_fixed_assets"},
|
||||
{"indicator": "invest_property"},
|
||||
{"indicator": "equity_investment"},
|
||||
{"indicator": "investment_in_associate"},
|
||||
{"indicator": "investment_in_joints"},
|
||||
{"indicator": "held_to_maturity_invest"},
|
||||
{"indicator": "goodwill_and_intangible_asset"},
|
||||
{"indicator": "intangible_assets"},
|
||||
{"indicator": "accum_amortized"},
|
||||
{"indicator": "noncurrent_assets_dit"},
|
||||
{"indicator": "other_noncurrent_assets_si"},
|
||||
{"indicator": "dt_assets"},
|
||||
{"indicator": "total_noncurrent_assets"},
|
||||
{"indicator": "total_assets"},
|
||||
{"indicator": "ac_equity"},
|
||||
{"indicator": "lease_prepay"},
|
||||
{"indicator": "noncurrent_assets_si"},
|
||||
{"indicator": "st_lt_current_loan"},
|
||||
{"indicator": "trade_financial_lia"},
|
||||
{"indicator": "derivative_financial_lia"},
|
||||
{"indicator": "ap_np"},
|
||||
{"indicator": "accounts_payable"},
|
||||
{"indicator": "advance_payment"},
|
||||
{"indicator": "st_debt"},
|
||||
{"indicator": "contra_liab"},
|
||||
{"indicator": "tax_payable"},
|
||||
{"indicator": "accrued_liab"},
|
||||
{"indicator": "flow_debt_deferred_income"},
|
||||
{"indicator": "other_cl"},
|
||||
{"indicator": "other_cunrrent_liab_si"},
|
||||
{"indicator": "total_cl"},
|
||||
{"indicator": "accrued_expenses_etc"},
|
||||
{"indicator": "money_payable_toac"},
|
||||
{"indicator": "joint_control_entity_payable"},
|
||||
{"indicator": "payable_to_associated_corp"},
|
||||
{"indicator": "lt_debt"},
|
||||
{"indicator": "long_term_loan"},
|
||||
{"indicator": "other_noncurrent_liabi"},
|
||||
{"indicator": "deferred_tax_liability"},
|
||||
{"indicator": "ncl_deferred_income"},
|
||||
{"indicator": "other_noncurrent_liab_si"},
|
||||
{"indicator": "noncurrent_liab_si"},
|
||||
{"indicator": "total_noncurrent_liab"},
|
||||
{"indicator": "total_liab"},
|
||||
{"indicator": "common_shares"},
|
||||
{"indicator": "capital_reserve"},
|
||||
{"indicator": "equity_premium"},
|
||||
{"indicator": "treasury_stock"},
|
||||
{"indicator": "accumgal"},
|
||||
{"indicator": "equity_atsopc_sbi"},
|
||||
{"indicator": "preferred_stock"},
|
||||
{"indicator": "perpetual_debt"},
|
||||
{"indicator": "reserve"},
|
||||
{"indicator": "other_reserves"},
|
||||
{"indicator": "retained_earnings"},
|
||||
{"indicator": "oci_bs"},
|
||||
{"indicator": "total_common_equity"},
|
||||
{"indicator": "equity_belong_to_parent"},
|
||||
{"indicator": "minority_interests"},
|
||||
{"indicator": "other_equity_si"},
|
||||
{"indicator": "total_equity"},
|
||||
{"indicator": "total_lib_and_equity"},
|
||||
{"indicator": "equity_si"},
|
||||
{"indicator": "equity_atncs"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "balance_sheet_raw")
|
||||
|
||||
rename_map = {
|
||||
'cce': 'cash',
|
||||
'ar_nr': 'receivables',
|
||||
'inventory': 'inventory',
|
||||
'net_fixed_assets': 'fixed_assets',
|
||||
'equity_and_lt_invest': 'long_term_investments',
|
||||
'goodwill_and_intangible_asset': 'goodwill',
|
||||
'st_debt': 'short_term_debt',
|
||||
'st_lt_current_loan': 'short_term_borrowings',
|
||||
'ap_np': 'accounts_payable',
|
||||
'contra_liab': 'contract_liabilities',
|
||||
'advance_payment': 'advances_from_customers',
|
||||
'flow_debt_deferred_income': 'deferred_revenue',
|
||||
'lt_debt': 'long_term_debt',
|
||||
'long_term_loan': 'long_term_borrowings',
|
||||
'total_assets': 'total_assets',
|
||||
'equity_belong_to_parent': 'total_equity',
|
||||
'pre_payment': 'prepayment'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
# Deduplicate columns just in case
|
||||
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
|
||||
|
||||
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
|
||||
if 'total_liab' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_liab']
|
||||
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
|
||||
|
||||
# Deduplicate again in case total_liabilities logic added a dupe (unlikely)
|
||||
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_balance_sheet(symbol, code)
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "ni"},
|
||||
{"indicator": "depreciation_and_amortization"},
|
||||
{"indicator": "operating_capital_change"},
|
||||
{"indicator": "ncf_from_oa"},
|
||||
{"indicator": "capital_cost"},
|
||||
{"indicator": "invest_buy"},
|
||||
{"indicator": "ncf_from_ia"},
|
||||
{"indicator": "increase_in_share_capital"},
|
||||
{"indicator": "decrease_in_share_capital"},
|
||||
{"indicator": "total_dividends_paid"},
|
||||
{"indicator": "ncf_from_fa"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "cash_flow_raw")
|
||||
|
||||
rename_map = {
|
||||
'ncf_from_oa': 'ocf',
|
||||
'capital_cost': 'capex',
|
||||
'total_dividends_paid': 'dividends'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
if 'capex' in df_filtered.columns:
|
||||
df_filtered['capex'] = df_filtered['capex'].abs()
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_cash_flow(symbol, code)
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
"""获取公司基本信息(通过 ths_*_stock 基础指标)"""
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
code = self._get_ifind_code(symbol)
|
||||
|
||||
metrics = {
|
||||
"name": basic_info.get("name", ""),
|
||||
"list_date": basic_info.get("ipo_date", ""),
|
||||
"accounting_date": basic_info.get("accounting_date", ""),
|
||||
"acc_date": basic_info.get("acc_date", ""),
|
||||
"price": 0,
|
||||
"market_cap": 0,
|
||||
"pe": 0,
|
||||
"pb": 0,
|
||||
"dividend_yield": 0
|
||||
}
|
||||
|
||||
# Fetch current market data using ths_* indicators confirmed for HK
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "ths_close_price_stock", "indiparams": []},
|
||||
{"indicator": "ths_market_value_stock", "indiparams": []},
|
||||
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
|
||||
{"indicator": "ths_pb_stock", "indiparams": []},
|
||||
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
|
||||
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
|
||||
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
|
||||
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
|
||||
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
|
||||
|
||||
return metrics
|
||||
return self.client.get_market_metrics(symbol, code)
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
if not dates: return pd.DataFrame()
|
||||
|
||||
results = []
|
||||
for d in dates:
|
||||
d_str = str(d).replace('-', '').replace('/', '')
|
||||
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"startdate": fmt_d,
|
||||
"enddate": fmt_d,
|
||||
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
|
||||
"indipara": [
|
||||
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
|
||||
{"indicator": "market_value", "indiparams": ["", "CNY"]}
|
||||
]
|
||||
}
|
||||
|
||||
res = self.cli.post("date_sequence", params)
|
||||
df_seq = self._parse_ifind_tables(res)
|
||||
|
||||
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
|
||||
|
||||
if not df_seq.empty:
|
||||
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
|
||||
if not match.empty:
|
||||
if 'pre_close' in match.columns:
|
||||
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
|
||||
if 'market_value' in match.columns:
|
||||
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
|
||||
results.append(metrics)
|
||||
|
||||
df_hist = pd.DataFrame(results)
|
||||
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
|
||||
return df_hist
|
||||
return self.client.get_historical_metrics(symbol, code, dates)
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
year_str = str(current_year - i)
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'annual_cum_dividend' in df.columns:
|
||||
val = df['annual_cum_dividend'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{year_str}1231", # Assume yearend for dividends
|
||||
'dividends': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div = pd.DataFrame(results)
|
||||
self._save_raw_data(df_div, symbol, "dividends_raw")
|
||||
return df_div
|
||||
return self.client.get_dividends(symbol, code)
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
start_date = f"{target_year - 1}-12-31"
|
||||
end_date = f"{target_year}-12-31"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'repur_num_new' in df.columns:
|
||||
val = df['repur_num_new'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}1231",
|
||||
'repurchases': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_repur = pd.DataFrame(results)
|
||||
self._save_raw_data(df_repur, symbol, "repurchases_raw")
|
||||
return df_repur
|
||||
return self.client.get_repurchases(symbol, code)
|
||||
|
||||
def get_employee_count(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
target_date = f"{target_year}-12-31"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "staff_num", "indiparams": [target_date]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'staff_num' in df.columns:
|
||||
val = df['staff_num'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}1231",
|
||||
'employee_count': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_emp = pd.DataFrame(results)
|
||||
self._save_raw_data(df_emp, symbol, "employee_count_raw")
|
||||
return df_emp
|
||||
return self.client.get_employee_count(symbol, code)
|
||||
|
||||
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
|
||||
"""获取官方计算的财务指标(比率、周转天数等)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
current_year = int(time.strftime("%Y"))
|
||||
|
||||
# 1. Determine the latest valid year
|
||||
last_valid_year = None
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
# Try getting ROE as a proxy for data availability
|
||||
test_date = f"{test_year}1231"
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(val) and val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
|
||||
# 2. Fetch 5 years starting from last valid year
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
date_str = f"{target_year}1231"
|
||||
year_str = str(target_year)
|
||||
|
||||
indipara = []
|
||||
|
||||
# 1. 人均指标 (参数: Year, "100")
|
||||
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
|
||||
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
|
||||
|
||||
# 2. 财务比率与周转率 (参数: Date YYYYMMDD)
|
||||
ratio_keys = [
|
||||
"roe", "roa", "roic",
|
||||
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
|
||||
"operating_revenue_yoy", "np_atsopc_yoy",
|
||||
"ibdebt_ratio_asset_base",
|
||||
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
|
||||
"fixed_asset_turnover_ratio", "total_capital_turnover"
|
||||
]
|
||||
for key in ratio_keys:
|
||||
indipara.append({"indicator": key, "indiparams": [date_str]})
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": indipara
|
||||
}
|
||||
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
if 'end_date' not in df.columns:
|
||||
df['end_date'] = date_str
|
||||
|
||||
# Filter out columns that are all NaN
|
||||
df = df.dropna(axis=1, how='all')
|
||||
|
||||
# Identify if we have meaningful data (at least one valid metric)
|
||||
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
|
||||
if not df[valid_cols].isnull().all().all():
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
combined = pd.concat(all_dfs, ignore_index=True)
|
||||
self._save_raw_data(combined, symbol, "financial_ratios_raw")
|
||||
|
||||
rename_map = {
|
||||
"salary_pp": "salary_per_employee",
|
||||
"revenue_pp": "revenue_per_employee",
|
||||
"profit_pp": "profit_per_employee",
|
||||
"sales_fee_to_or": "selling_expense_ratio",
|
||||
"manage_fee_to_revenue": "admin_expense_ratio",
|
||||
"rad_expense_to_total_income": "rd_expense_ratio",
|
||||
"operating_revenue_yoy": "revenue_growth",
|
||||
"np_atsopc_yoy": "net_profit_growth",
|
||||
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
|
||||
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
|
||||
"total_capital_turnover": "total_asset_turnover"
|
||||
}
|
||||
|
||||
df_final = combined.rename(columns=rename_map)
|
||||
|
||||
for col in df_final.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_final)
|
||||
return self.client.get_financial_ratios(symbol, code)
|
||||
|
||||
699
src/fetchers/ifind_hk_client.py
Normal file
699
src/fetchers/ifind_hk_client.py
Normal file
@ -0,0 +1,699 @@
|
||||
import pandas as pd
|
||||
import time
|
||||
from .ifind_client import IFindClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class IFindHKClient:
|
||||
"""
|
||||
iFinD Client specifically for Hong Kong Market.
|
||||
Uses 'THS' indicators and Chinese accounting standard mappings often used for HK stocks in iFinD.
|
||||
"""
|
||||
def __init__(self, api_key: str):
|
||||
self.cli = IFindClient(refresh_token=api_key)
|
||||
self.storage = DataStorage()
|
||||
self.market = 'HK'
|
||||
self._basic_info_cache = {}
|
||||
|
||||
def _save_raw_data(self, data: any, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
df = pd.DataFrame([data])
|
||||
else:
|
||||
df = data
|
||||
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
|
||||
|
||||
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
|
||||
if not res:
|
||||
return pd.DataFrame()
|
||||
|
||||
error_code = res.get("errorcode", 0)
|
||||
if error_code != 0:
|
||||
print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})")
|
||||
return pd.DataFrame()
|
||||
|
||||
tables = res.get("tables", [])
|
||||
if not tables:
|
||||
return pd.DataFrame()
|
||||
|
||||
table_info = tables[0]
|
||||
table_data = table_info.get("table", {})
|
||||
times = table_info.get("time", [])
|
||||
|
||||
if not table_data:
|
||||
return pd.DataFrame()
|
||||
|
||||
processed_table_data = {}
|
||||
for k, v in table_data.items():
|
||||
if not isinstance(v, list):
|
||||
processed_table_data[k] = [v]
|
||||
else:
|
||||
processed_table_data[k] = v
|
||||
|
||||
df = pd.DataFrame(processed_table_data)
|
||||
if times and len(times) == len(df):
|
||||
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
|
||||
elif times and len(df) == 1:
|
||||
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
|
||||
|
||||
if 'end_date' not in df.columns:
|
||||
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
|
||||
if col in df.columns:
|
||||
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
|
||||
break
|
||||
|
||||
return df
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
|
||||
if comparable_record.empty:
|
||||
dfs_to_concat = [latest_record, df]
|
||||
else:
|
||||
dfs_to_concat = [latest_record, comparable_record, df]
|
||||
|
||||
combined = pd.concat(dfs_to_concat)
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
|
||||
if code in self._basic_info_cache:
|
||||
return self._basic_info_cache[code]
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "corp_cn_name", "indiparams": []},
|
||||
{"indicator": "accounting_date", "indiparams": []},
|
||||
{"indicator": "ipo_date", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
self._save_raw_data(df, symbol, "basic_info_raw")
|
||||
|
||||
info = {
|
||||
"name": "",
|
||||
"accounting_date": "1231",
|
||||
"ipo_date": ""
|
||||
}
|
||||
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
info["name"] = str(row.get("corp_cn_name", ""))
|
||||
# HK logic typically defaults to 1231, ignoring accounting_date output in HkFetcher
|
||||
info["acc_date"] = str(row.get("accounting_date", "1231"))
|
||||
info["accounting_date"] = "1231"
|
||||
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
|
||||
|
||||
self._basic_info_cache[code] = info
|
||||
return info
|
||||
|
||||
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
|
||||
current_year = int(time.strftime("%Y"))
|
||||
|
||||
last_valid_year = None
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}1231"
|
||||
first_indicator = indicator_configs[0]
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and valid_val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
target_date = f"{target_year}1231"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
|
||||
for item in indicator_configs
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
valid_cols = [c for c in df.columns if c not in ['end_date', 'date']]
|
||||
if not df[valid_cols].isnull().all().all():
|
||||
df['end_date'] = target_date
|
||||
df = df.dropna(axis=1, how='all')
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
return pd.concat(all_dfs, ignore_index=True)
|
||||
|
||||
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "total_oi"},
|
||||
{"indicator": "prime_oi"},
|
||||
{"indicator": "other_oi"},
|
||||
{"indicator": "operating_cost"},
|
||||
{"indicator": "operating_expense"},
|
||||
{"indicator": "operating_fee"},
|
||||
{"indicator": "p_depreciation_and_amortization"},
|
||||
{"indicator": "gross_profit"},
|
||||
{"indicator": "sales_ad_and_ga"},
|
||||
{"indicator": "rad_cost"},
|
||||
{"indicator": "sales_fee"},
|
||||
{"indicator": "financial_expense"},
|
||||
{"indicator": "sales_income"},
|
||||
{"indicator": "sales_cost"},
|
||||
{"indicator": "other_income"},
|
||||
{"indicator": "manage_fee"},
|
||||
{"indicator": "deprec_and_amorti"},
|
||||
{"indicator": "total_other_opearting_expense"},
|
||||
{"indicator": "p_total_cost"},
|
||||
{"indicator": "operating_profit"},
|
||||
{"indicator": "total_gal"},
|
||||
{"indicator": "interest_income"},
|
||||
{"indicator": "interest_net_pay"},
|
||||
{"indicator": "interest_expense"},
|
||||
{"indicator": "income_from_asso_and_joint"},
|
||||
{"indicator": "other_gal_effct_profit_pre_tax"},
|
||||
{"indicator": "conti_op_before_tax"},
|
||||
{"indicator": "profit_before_noncurrent_items"},
|
||||
{"indicator": "profit_and_loss_of_noncurrent_items"},
|
||||
{"indicator": "profit_before_tax"},
|
||||
{"indicator": "income_tax"},
|
||||
{"indicator": "profit_after_tax"},
|
||||
{"indicator": "minoritygal"},
|
||||
{"indicator": "continue_operate_net_profit"},
|
||||
{"indicator": "noncontinue_operate_net_profit"},
|
||||
{"indicator": "other_special_items"},
|
||||
{"indicator": "ni_attr_to_cs"},
|
||||
{"indicator": "np_atms"},
|
||||
{"indicator": "preferred_divid_and_other_adjust"},
|
||||
{"indicator": "oci"},
|
||||
{"indicator": "total_oci"},
|
||||
{"indicator": "oci_from_parent"},
|
||||
{"indicator": "oci_from_minority"},
|
||||
{"indicator": "invest_property_fv_chg"},
|
||||
{"indicator": "operating_amt"},
|
||||
{"indicator": "oi_si"},
|
||||
{"indicator": "operating_premium_profit_si"},
|
||||
{"indicator": "to_toallied_corp_perf"},
|
||||
{"indicator": "to_joint_control_entity_perf"},
|
||||
{"indicator": "pre_tax_profit_si"},
|
||||
{"indicator": "after_tax_profit_si"},
|
||||
{"indicator": "profit_attrbt_to_nonholders"},
|
||||
{"indicator": "total_income_atncs"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "income_statement_raw")
|
||||
|
||||
rename_map = {
|
||||
'total_oi': 'revenue',
|
||||
'operating_amt': 'turnover',
|
||||
'gross_profit': 'gross_profit',
|
||||
'sales_ad_and_ga': 'sga_exp',
|
||||
'sales_fee': 'selling_marketing_exp',
|
||||
'manage_fee': 'ga_exp',
|
||||
'rad_cost': 'rd_exp',
|
||||
'income_tax': 'income_tax',
|
||||
'ni_attr_to_cs': 'net_income',
|
||||
'operating_profit': 'operating_profit',
|
||||
'depreciation': 'depreciation',
|
||||
'deprec_and_amorti': 'depreciation',
|
||||
'p_depreciation_and_amortization': 'depreciation'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns:
|
||||
df_filtered['ebit'] = df_filtered['operating_profit']
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "cce"},
|
||||
{"indicator": "st_investment"},
|
||||
{"indicator": "total_cash"},
|
||||
{"indicator": "account_receivable"},
|
||||
{"indicator": "tradable_fnncl_asset"},
|
||||
{"indicator": "derivative_fnncl_assets"},
|
||||
{"indicator": "restriv_fund"},
|
||||
{"indicator": "other_short_term_investment"},
|
||||
{"indicator": "ar_nr"},
|
||||
{"indicator": "total_ar"},
|
||||
{"indicator": "or"},
|
||||
{"indicator": "inventory"},
|
||||
{"indicator": "flow_assets_dit"},
|
||||
{"indicator": "pre_payment"},
|
||||
{"indicator": "other_cunrrent_assets_si"},
|
||||
{"indicator": "other_ca"},
|
||||
{"indicator": "total_ca"},
|
||||
{"indicator": "receivables_from_allied_corp"},
|
||||
{"indicator": "current_assets_si"},
|
||||
{"indicator": "prepay_deposits_etc"},
|
||||
{"indicator": "receivables_from_jce"},
|
||||
{"indicator": "receivables_from_ac"},
|
||||
{"indicator": "recoverable_tax"},
|
||||
{"indicator": "total_fixed_assets"},
|
||||
{"indicator": "depreciation"},
|
||||
{"indicator": "equity_and_lt_invest"},
|
||||
{"indicator": "net_fixed_assets"},
|
||||
{"indicator": "invest_property"},
|
||||
{"indicator": "equity_investment"},
|
||||
{"indicator": "investment_in_associate"},
|
||||
{"indicator": "investment_in_joints"},
|
||||
{"indicator": "held_to_maturity_invest"},
|
||||
{"indicator": "goodwill_and_intangible_asset"},
|
||||
{"indicator": "intangible_assets"},
|
||||
{"indicator": "accum_amortized"},
|
||||
{"indicator": "noncurrent_assets_dit"},
|
||||
{"indicator": "other_noncurrent_assets_si"},
|
||||
{"indicator": "dt_assets"},
|
||||
{"indicator": "total_noncurrent_assets"},
|
||||
{"indicator": "total_assets"},
|
||||
{"indicator": "ac_equity"},
|
||||
{"indicator": "lease_prepay"},
|
||||
{"indicator": "noncurrent_assets_si"},
|
||||
{"indicator": "st_lt_current_loan"},
|
||||
{"indicator": "trade_financial_lia"},
|
||||
{"indicator": "derivative_financial_lia"},
|
||||
{"indicator": "ap_np"},
|
||||
{"indicator": "accounts_payable"},
|
||||
{"indicator": "advance_payment"},
|
||||
{"indicator": "st_debt"},
|
||||
{"indicator": "contra_liab"},
|
||||
{"indicator": "tax_payable"},
|
||||
{"indicator": "accrued_liab"},
|
||||
{"indicator": "flow_debt_deferred_income"},
|
||||
{"indicator": "other_cl"},
|
||||
{"indicator": "other_cunrrent_liab_si"},
|
||||
{"indicator": "total_cl"},
|
||||
{"indicator": "accrued_expenses_etc"},
|
||||
{"indicator": "money_payable_toac"},
|
||||
{"indicator": "joint_control_entity_payable"},
|
||||
{"indicator": "payable_to_associated_corp"},
|
||||
{"indicator": "lt_debt"},
|
||||
{"indicator": "long_term_loan"},
|
||||
{"indicator": "other_noncurrent_liabi"},
|
||||
{"indicator": "deferred_tax_liability"},
|
||||
{"indicator": "ncl_deferred_income"},
|
||||
{"indicator": "other_noncurrent_liab_si"},
|
||||
{"indicator": "noncurrent_liab_si"},
|
||||
{"indicator": "total_noncurrent_liab"},
|
||||
{"indicator": "total_liab"},
|
||||
{"indicator": "common_shares"},
|
||||
{"indicator": "capital_reserve"},
|
||||
{"indicator": "equity_premium"},
|
||||
{"indicator": "treasury_stock"},
|
||||
{"indicator": "accumgal"},
|
||||
{"indicator": "equity_atsopc_sbi"},
|
||||
{"indicator": "preferred_stock"},
|
||||
{"indicator": "perpetual_debt"},
|
||||
{"indicator": "reserve"},
|
||||
{"indicator": "other_reserves"},
|
||||
{"indicator": "retained_earnings"},
|
||||
{"indicator": "oci_bs"},
|
||||
{"indicator": "total_common_equity"},
|
||||
{"indicator": "equity_belong_to_parent"},
|
||||
{"indicator": "minority_interests"},
|
||||
{"indicator": "other_equity_si"},
|
||||
{"indicator": "total_equity"},
|
||||
{"indicator": "total_lib_and_equity"},
|
||||
{"indicator": "equity_si"},
|
||||
{"indicator": "equity_atncs"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "balance_sheet_raw")
|
||||
|
||||
rename_map = {
|
||||
'cce': 'cash',
|
||||
'ar_nr': 'receivables',
|
||||
'inventory': 'inventory',
|
||||
'net_fixed_assets': 'fixed_assets',
|
||||
'equity_and_lt_invest': 'long_term_investments',
|
||||
'goodwill_and_intangible_asset': 'goodwill',
|
||||
'st_debt': 'short_term_debt',
|
||||
'st_lt_current_loan': 'short_term_borrowings',
|
||||
'ap_np': 'accounts_payable',
|
||||
'contra_liab': 'contract_liabilities',
|
||||
'advance_payment': 'advances_from_customers',
|
||||
'flow_debt_deferred_income': 'deferred_revenue',
|
||||
'lt_debt': 'long_term_debt',
|
||||
'long_term_loan': 'long_term_borrowings',
|
||||
'total_assets': 'total_assets',
|
||||
'equity_belong_to_parent': 'total_equity',
|
||||
'pre_payment': 'prepayment'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
|
||||
|
||||
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
|
||||
if 'total_liab' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_liab']
|
||||
elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
|
||||
|
||||
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()]
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "ni"},
|
||||
{"indicator": "depreciation_and_amortization"},
|
||||
{"indicator": "operating_capital_change"},
|
||||
{"indicator": "ncf_from_oa"},
|
||||
{"indicator": "capital_cost"},
|
||||
{"indicator": "invest_buy"},
|
||||
{"indicator": "ncf_from_ia"},
|
||||
{"indicator": "increase_in_share_capital"},
|
||||
{"indicator": "decrease_in_share_capital"},
|
||||
{"indicator": "total_dividends_paid"},
|
||||
{"indicator": "ncf_from_fa"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "cash_flow_raw")
|
||||
|
||||
rename_map = {
|
||||
'ncf_from_oa': 'ocf',
|
||||
'capital_cost': 'capex',
|
||||
'total_dividends_paid': 'dividends'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
if 'capex' in df_filtered.columns:
|
||||
df_filtered['capex'] = df_filtered['capex'].abs()
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_market_metrics(self, symbol: str, code: str) -> dict:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
|
||||
metrics = {
|
||||
"name": basic_info.get("name", ""),
|
||||
"list_date": basic_info.get("ipo_date", ""),
|
||||
"accounting_date": basic_info.get("accounting_date", ""),
|
||||
"price": 0,
|
||||
"market_cap": 0,
|
||||
"pe": 0,
|
||||
"pb": 0,
|
||||
"dividend_yield": 0
|
||||
}
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "ths_close_price_stock", "indiparams": []},
|
||||
{"indicator": "ths_market_value_stock", "indiparams": []},
|
||||
{"indicator": "ths_pe_ttm_stock", "indiparams": []},
|
||||
{"indicator": "ths_pb_stock", "indiparams": []},
|
||||
{"indicator": "ths_dividend_ratio_stock", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
metrics["price"] = float(row.get("ths_close_price_stock") or 0)
|
||||
metrics["market_cap"] = float(row.get("ths_market_value_stock") or 0)
|
||||
metrics["pe"] = float(row.get("ths_pe_ttm_stock") or 0)
|
||||
metrics["pb"] = float(row.get("ths_pb_stock") or 0)
|
||||
metrics["dividend_yield"] = float(row.get("ths_dividend_ratio_stock") or 0)
|
||||
|
||||
return metrics
|
||||
|
||||
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
|
||||
if not dates: return pd.DataFrame()
|
||||
|
||||
results = []
|
||||
for d in dates:
|
||||
d_str = str(d).replace('-', '').replace('/', '')
|
||||
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"startdate": fmt_d,
|
||||
"enddate": fmt_d,
|
||||
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
|
||||
"indipara": [
|
||||
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
|
||||
{"indicator": "market_value", "indiparams": ["", "CNY"]}
|
||||
]
|
||||
}
|
||||
|
||||
res = self.cli.post("date_sequence", params)
|
||||
df_seq = self._parse_ifind_tables(res)
|
||||
|
||||
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
|
||||
|
||||
if not df_seq.empty:
|
||||
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
|
||||
if not match.empty:
|
||||
if 'pre_close' in match.columns:
|
||||
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
|
||||
if 'market_value' in match.columns:
|
||||
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
|
||||
results.append(metrics)
|
||||
|
||||
df_hist = pd.DataFrame(results)
|
||||
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
|
||||
return df_hist
|
||||
|
||||
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
year_str = str(current_year - i)
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'annual_cum_dividend' in df.columns:
|
||||
val = df['annual_cum_dividend'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{year_str}1231",
|
||||
'dividends': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div = pd.DataFrame(results)
|
||||
self._save_raw_data(df_div, symbol, "dividends_raw")
|
||||
return df_div
|
||||
|
||||
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
start_date = f"{target_year - 1}-12-31"
|
||||
end_date = f"{target_year}-12-31"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'repur_num_new' in df.columns:
|
||||
val = df['repur_num_new'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}1231",
|
||||
'repurchases': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_repur = pd.DataFrame(results)
|
||||
self._save_raw_data(df_repur, symbol, "repurchases_raw")
|
||||
return df_repur
|
||||
|
||||
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
target_date = f"{target_year}-12-31"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "staff_num", "indiparams": [target_date]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'staff_num' in df.columns:
|
||||
val = df['staff_num'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}1231",
|
||||
'employee_count': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_emp = pd.DataFrame(results)
|
||||
self._save_raw_data(df_emp, symbol, "employee_count_raw")
|
||||
return df_emp
|
||||
|
||||
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
current_year = int(time.strftime("%Y"))
|
||||
|
||||
# 1. Determine the latest valid year
|
||||
last_valid_year = None
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}1231"
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
date_str = f"{target_year}1231"
|
||||
year_str = str(target_year)
|
||||
|
||||
indipara = []
|
||||
|
||||
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
|
||||
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
|
||||
|
||||
ratio_keys = [
|
||||
"roe", "roa", "roic",
|
||||
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
|
||||
"operating_revenue_yoy", "np_atsopc_yoy",
|
||||
"ibdebt_ratio_asset_base",
|
||||
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
|
||||
"fixed_asset_turnover_ratio", "total_capital_turnover"
|
||||
]
|
||||
for key in ratio_keys:
|
||||
indipara.append({"indicator": key, "indiparams": [date_str]})
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": indipara
|
||||
}
|
||||
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
if 'end_date' not in df.columns:
|
||||
df['end_date'] = date_str
|
||||
|
||||
df = df.dropna(axis=1, how='all')
|
||||
|
||||
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
|
||||
if not df[valid_cols].isnull().all().all():
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
combined = pd.concat(all_dfs, ignore_index=True)
|
||||
self._save_raw_data(combined, symbol, "financial_ratios_raw")
|
||||
|
||||
rename_map = {
|
||||
"salary_pp": "salary_per_employee",
|
||||
"revenue_pp": "revenue_per_employee",
|
||||
"profit_pp": "profit_per_employee",
|
||||
"sales_fee_to_or": "selling_expense_ratio",
|
||||
"manage_fee_to_revenue": "admin_expense_ratio",
|
||||
"rad_expense_to_total_income": "rd_expense_ratio",
|
||||
"operating_revenue_yoy": "revenue_growth",
|
||||
"np_atsopc_yoy": "net_profit_growth",
|
||||
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
|
||||
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
|
||||
"total_capital_turnover": "total_asset_turnover"
|
||||
}
|
||||
|
||||
df_final = combined.rename(columns=rename_map)
|
||||
|
||||
for col in df_final.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_final)
|
||||
556
src/fetchers/ifind_int_client.py
Normal file
556
src/fetchers/ifind_int_client.py
Normal file
@ -0,0 +1,556 @@
|
||||
import pandas as pd
|
||||
import time
|
||||
from .ifind_client import IFindClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class IFindIntClient:
|
||||
"""
|
||||
Generic iFinD Client for International Markets (JP, VN, US).
|
||||
Uses 'OAS' (Original Accounting Standards?) or similar standardized indicators
|
||||
typically available for international stocks in iFinD.
|
||||
"""
|
||||
def __init__(self, api_key: str, market: str):
|
||||
self.cli = IFindClient(refresh_token=api_key)
|
||||
self.storage = DataStorage()
|
||||
self.market = market
|
||||
self._basic_info_cache = {}
|
||||
|
||||
def _save_raw_data(self, data: any, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
df = pd.DataFrame([data])
|
||||
else:
|
||||
df = data
|
||||
self.storage.save_data(df, self.market, symbol, f"raw_{name}")
|
||||
|
||||
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
|
||||
if not res:
|
||||
return pd.DataFrame()
|
||||
|
||||
if res.get("errorcode") != 0:
|
||||
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
|
||||
return pd.DataFrame()
|
||||
|
||||
tables = res.get("tables", [])
|
||||
if not tables:
|
||||
return pd.DataFrame()
|
||||
|
||||
table_info = tables[0]
|
||||
table_data = table_info.get("table", {})
|
||||
times = table_info.get("time", [])
|
||||
|
||||
if not table_data:
|
||||
return pd.DataFrame()
|
||||
|
||||
processed_table_data = {}
|
||||
for k, v in table_data.items():
|
||||
if not isinstance(v, list):
|
||||
processed_table_data[k] = [v]
|
||||
else:
|
||||
processed_table_data[k] = v
|
||||
|
||||
df = pd.DataFrame(processed_table_data)
|
||||
if times and len(times) == len(df):
|
||||
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
|
||||
elif times and len(df) == 1:
|
||||
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
|
||||
|
||||
if 'end_date' not in df.columns:
|
||||
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
|
||||
if col in df.columns:
|
||||
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
|
||||
break
|
||||
|
||||
return df
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
|
||||
# Try to include standard fiscal year ends or just all annuals?
|
||||
# JP/VN/US usually have annual reports.
|
||||
# iFinD often returns data for specific requested dates.
|
||||
# We will keep it simple and just dedup.
|
||||
# But for consistency with existing logic which concatenates records:
|
||||
is_annual = df['end_date'].astype(str).str.endswith('1231') | df['end_date'].astype(str).str.endswith('0331')
|
||||
annual_records = df[is_annual]
|
||||
|
||||
combined = pd.concat([latest_record, comparable_record, annual_records])
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def _fetch_basic_info(self, symbol: str, code: str) -> dict:
|
||||
if code in self._basic_info_cache:
|
||||
return self._basic_info_cache[code]
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "corp_cn_name", "indiparams": []},
|
||||
{"indicator": "accounting_date", "indiparams": []},
|
||||
{"indicator": "ipo_date", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
self._save_raw_data(df, symbol, "basic_info_raw")
|
||||
|
||||
info = {
|
||||
"name": "",
|
||||
"accounting_date": "1231",
|
||||
"ipo_date": ""
|
||||
}
|
||||
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
info["name"] = str(row.get("corp_cn_name", ""))
|
||||
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
|
||||
if acc_date:
|
||||
info["accounting_date"] = acc_date
|
||||
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
|
||||
|
||||
self._basic_info_cache[code] = info
|
||||
return info
|
||||
|
||||
def _fetch_financial_data_annual(self, symbol: str, code: str, indicator_configs: list) -> pd.DataFrame:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
last_valid_year = None
|
||||
|
||||
# 1. Determine most recent valid year
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}{acc_date}"
|
||||
first_indicator = indicator_configs[0]
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and valid_val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
target_date = f"{target_year}{acc_date}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
|
||||
for item in indicator_configs
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
df['end_date'] = target_date
|
||||
all_dfs.append(df)
|
||||
|
||||
# Filter and concat
|
||||
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
return pd.concat(all_dfs, ignore_index=True)
|
||||
|
||||
def get_income_statement(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "revenue_oas"},
|
||||
{"indicator": "gross_profit_oas"},
|
||||
{"indicator": "sga_expenses_oas"},
|
||||
{"indicator": "selling_marketing_expenses_oas"},
|
||||
{"indicator": "ga_expenses_oas"},
|
||||
{"indicator": "rd_expenses_oas"},
|
||||
{"indicator": "income_tax_expense_oas"},
|
||||
{"indicator": "net_income_attri_to_common_sh_oas"},
|
||||
{"indicator": "operating_income_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "income_statement_raw")
|
||||
|
||||
rename_map = {
|
||||
'revenue_oas': 'revenue',
|
||||
'gross_profit_oas': 'gross_profit',
|
||||
'sga_expenses_oas': 'sga_exp',
|
||||
'selling_marketing_expenses_oas': 'selling_marketing_exp',
|
||||
'ga_expenses_oas': 'ga_exp',
|
||||
'rd_expenses_oas': 'rd_exp',
|
||||
'income_tax_expense_oas': 'income_tax',
|
||||
'net_income_attri_to_common_sh_oas': 'net_income',
|
||||
'operating_income_oas': 'operating_profit'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_balance_sheet(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "cash_equi_short_term_inve_oas"},
|
||||
{"indicator": "accou_and_notes_recei_oas"},
|
||||
{"indicator": "inventories_oas"},
|
||||
{"indicator": "ppe_net_oas"},
|
||||
{"indicator": "long_term_inv_and_receiv_oas"},
|
||||
{"indicator": "goodwill_and_intasset_oas"},
|
||||
{"indicator": "short_term_debt_oas"},
|
||||
{"indicator": "short_term_borrowings_oas"},
|
||||
{"indicator": "account_and_note_payable_oas"},
|
||||
{"indicator": "contra_liabilities_current_oas"},
|
||||
{"indicator": "advance_from_cust_current_oas"},
|
||||
{"indicator": "defer_revenue_current_oas"},
|
||||
{"indicator": "long_term_debt_oas"},
|
||||
{"indicator": "long_term_borrowings_oas"},
|
||||
{"indicator": "total_assets_oas"},
|
||||
{"indicator": "equity_attri_to_companyowner_oas"},
|
||||
{"indicator": "prepaid_expenses_current_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "balance_sheet_raw")
|
||||
|
||||
rename_map = {
|
||||
'cash_equi_short_term_inve_oas': 'cash',
|
||||
'accou_and_notes_recei_oas': 'receivables',
|
||||
'inventories_oas': 'inventory',
|
||||
'ppe_net_oas': 'fixed_assets',
|
||||
'long_term_inv_and_receiv_oas': 'long_term_investments',
|
||||
'goodwill_and_intasset_oas': 'goodwill',
|
||||
'short_term_debt_oas': 'short_term_debt',
|
||||
'short_term_borrowings_oas': 'short_term_borrowings',
|
||||
'account_and_note_payable_oas': 'accounts_payable',
|
||||
'contra_liabilities_current_oas': 'contract_liabilities',
|
||||
'advance_from_cust_current_oas': 'advances_from_customers',
|
||||
'defer_revenue_current_oas': 'deferred_revenue',
|
||||
'long_term_debt_oas': 'long_term_debt',
|
||||
'long_term_borrowings_oas': 'long_term_borrowings',
|
||||
'total_assets_oas': 'total_assets',
|
||||
'equity_attri_to_companyowner_oas': 'total_equity',
|
||||
'prepaid_expenses_current_oas': 'prepayment'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
|
||||
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_cash_flow(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "net_cash_flows_from_oa_oas"},
|
||||
{"indicator": "purchase_of_ppe_and_ia_oas"},
|
||||
{"indicator": "dividends_paid_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, code, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "cash_flow_raw")
|
||||
|
||||
rename_map = {
|
||||
'net_cash_flows_from_oa_oas': 'ocf',
|
||||
'purchase_of_ppe_and_ia_oas': 'capex',
|
||||
'dividends_paid_oas': 'dividends'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
if 'capex' in df_filtered.columns:
|
||||
df_filtered['capex'] = df_filtered['capex'].abs()
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
|
||||
def get_market_metrics(self, symbol: str, code: str) -> dict:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
metrics = {
|
||||
"name": basic_info.get("name", ""),
|
||||
"list_date": basic_info.get("ipo_date", "")
|
||||
}
|
||||
return metrics
|
||||
|
||||
def get_historical_metrics(self, symbol: str, code: str, dates: list) -> pd.DataFrame:
|
||||
if not dates: return pd.DataFrame()
|
||||
|
||||
results = []
|
||||
for d in dates:
|
||||
d_str = str(d).replace('-', '').replace('/', '')
|
||||
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"startdate": fmt_d,
|
||||
"enddate": fmt_d,
|
||||
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
|
||||
"indipara": [
|
||||
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
|
||||
{"indicator": "market_value", "indiparams": ["", "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("date_sequence", params)
|
||||
df_seq = self._parse_ifind_tables(res)
|
||||
|
||||
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
|
||||
|
||||
if not df_seq.empty:
|
||||
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
|
||||
if not match.empty:
|
||||
if 'pre_close' in match.columns:
|
||||
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
|
||||
if 'market_value' in match.columns:
|
||||
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
|
||||
results.append(metrics)
|
||||
|
||||
df_hist = pd.DataFrame(results)
|
||||
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
|
||||
return df_hist
|
||||
|
||||
def get_dividends(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
year_str = str(current_year - i)
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'annual_cum_dividend' in df.columns:
|
||||
val = df['annual_cum_dividend'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{year_str}{acc_date}",
|
||||
'dividends': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div = pd.DataFrame(results)
|
||||
self._save_raw_data(df_div, symbol, "dividends_raw")
|
||||
return df_div
|
||||
|
||||
def get_repurchases(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
# Need MM-DD from acc_date (assuming MMDD or YYYYMMDD? acc_date is MMDD usually)
|
||||
# However, basic_info sets default to "1231" or reads MMDD.
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
fmt_mm_dd = f"{mm}-{dd}"
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
start_date = f"{target_year - 1}-{fmt_mm_dd}"
|
||||
end_date = f"{target_year}-{fmt_mm_dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'repur_num_new' in df.columns:
|
||||
val = df['repur_num_new'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'repurchases': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_repur = pd.DataFrame(results)
|
||||
self._save_raw_data(df_repur, symbol, "repurchases_raw")
|
||||
return df_repur
|
||||
|
||||
def get_employee_count(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
target_date = f"{target_year}-{mm}-{dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "staff_num", "indiparams": [target_date]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'staff_num' in df.columns:
|
||||
val = df['staff_num'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'employee_count': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_emp = pd.DataFrame(results)
|
||||
self._save_raw_data(df_emp, symbol, "employee_count_raw")
|
||||
return df_emp
|
||||
|
||||
def get_financial_ratios(self, symbol: str, code: str) -> pd.DataFrame:
|
||||
# Generic Implementation if available.
|
||||
# JP fetcher has it, VN fetcher did not show it but might support it.
|
||||
# We will implement it based on JP fetcher.
|
||||
current_year = int(time.strftime("%Y"))
|
||||
basic_info = self._fetch_basic_info(symbol, code)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
last_valid_year = None
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}{acc_date}"
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [{"indicator": "roe", "indiparams": [test_date]}]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(val) and val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
date_str = f"{target_year}{acc_date}"
|
||||
year_str = str(target_year)
|
||||
|
||||
indipara = []
|
||||
for key in ["salary_pp", "revenue_pp", "profit_pp"]:
|
||||
indipara.append({"indicator": key, "indiparams": [year_str, "100"]})
|
||||
|
||||
ratio_keys = [
|
||||
"roe", "roa", "roic",
|
||||
"sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income",
|
||||
"operating_revenue_yoy", "np_atsopc_yoy",
|
||||
"ibdebt_ratio_asset_base",
|
||||
"inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days",
|
||||
"fixed_asset_turnover_ratio", "total_capital_turnover"
|
||||
]
|
||||
for key in ratio_keys:
|
||||
indipara.append({"indicator": key, "indiparams": [date_str]})
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": indipara
|
||||
}
|
||||
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
if 'end_date' not in df.columns:
|
||||
df['end_date'] = date_str
|
||||
df = df.dropna(axis=1, how='all')
|
||||
valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']]
|
||||
if not df[valid_cols].isnull().all().all():
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
combined = pd.concat(all_dfs, ignore_index=True)
|
||||
self._save_raw_data(combined, symbol, "financial_ratios_raw")
|
||||
|
||||
rename_map = {
|
||||
"salary_pp": "salary_per_employee",
|
||||
"revenue_pp": "revenue_per_employee",
|
||||
"profit_pp": "profit_per_employee",
|
||||
"sales_fee_to_or": "selling_expense_ratio",
|
||||
"manage_fee_to_revenue": "admin_expense_ratio",
|
||||
"rad_expense_to_total_income": "rd_expense_ratio",
|
||||
"operating_revenue_yoy": "revenue_growth",
|
||||
"np_atsopc_yoy": "net_profit_growth",
|
||||
"ibdebt_ratio_asset_base": "interest_bearing_debt_ratio",
|
||||
"fixed_asset_turnover_ratio": "fixed_asset_turnover",
|
||||
"total_capital_turnover": "total_asset_turnover"
|
||||
}
|
||||
|
||||
df_final = combined.rename(columns=rename_map)
|
||||
|
||||
for col in df_final.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_final[col] = pd.to_numeric(df_final[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_final)
|
||||
@ -1,515 +1,60 @@
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
from .base import DataFetcher
|
||||
from .ifind_client import IFindClient
|
||||
from .ifind_int_client import IFindIntClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class JpFetcher(DataFetcher):
|
||||
def __init__(self, api_key: str):
|
||||
# api_key is the iFinD Refresh Token
|
||||
super().__init__(api_key)
|
||||
self.cli = IFindClient(refresh_token=api_key)
|
||||
self.data_source = 'iFinD'
|
||||
self.client = IFindIntClient(api_key, 'JP')
|
||||
self.storage = DataStorage()
|
||||
self._basic_info_cache = {}
|
||||
|
||||
def _get_ifind_code(self, symbol: str) -> str:
|
||||
"""保持逻辑一致性,如果是纯数字则补齐后缀 .T,否则直接传"""
|
||||
# Simple logic: if pure digits, append .T (Tokyo SE).
|
||||
# Otherwise assume it's already a code or handled.
|
||||
if symbol.isdigit():
|
||||
return f"{symbol}.T"
|
||||
return symbol
|
||||
|
||||
def _fetch_basic_info(self, symbol: str) -> dict:
|
||||
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
|
||||
def _fetch_basic_info(self, symbol: str):
|
||||
# Delegate to client
|
||||
code = self._get_ifind_code(symbol)
|
||||
if code in self._basic_info_cache:
|
||||
return self._basic_info_cache[code]
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "corp_cn_name", "indiparams": []},
|
||||
{"indicator": "accounting_date", "indiparams": []},
|
||||
{"indicator": "ipo_date", "indiparams": []}
|
||||
]
|
||||
}
|
||||
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
self._save_raw_data(df, symbol, "basic_info_raw")
|
||||
|
||||
info = {
|
||||
"name": "",
|
||||
"accounting_date": "1231", # 默认 12-31
|
||||
"ipo_date": ""
|
||||
}
|
||||
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
info["name"] = str(row.get("corp_cn_name", ""))
|
||||
|
||||
# accounting_date 通常返回类似 "03-31" 或 "1231"
|
||||
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
|
||||
# 好像是ifind的API有问题,明明财报是0331,但如果去读20240331,就是空数据
|
||||
# if acc_date:
|
||||
# info["accounting_date"] = acc_date
|
||||
|
||||
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
|
||||
|
||||
self._basic_info_cache[code] = info
|
||||
return info
|
||||
|
||||
def _save_raw_data(self, data: any, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
# 如果是字典(API 响应),直接保存
|
||||
if isinstance(data, dict):
|
||||
df = pd.DataFrame([data]) # 包装成单行 DF 或简单处理
|
||||
else:
|
||||
df = data
|
||||
self.storage.save_data(df, 'JP', symbol, f"raw_{name}")
|
||||
|
||||
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
|
||||
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
|
||||
if not res:
|
||||
return pd.DataFrame()
|
||||
|
||||
if res.get("errorcode") != 0:
|
||||
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
|
||||
return pd.DataFrame()
|
||||
|
||||
tables = res.get("tables", [])
|
||||
if not tables:
|
||||
print("iFinD API Warning: No tables found in response.")
|
||||
return pd.DataFrame()
|
||||
|
||||
# 提取第一个 table
|
||||
table_info = tables[0]
|
||||
table_data = table_info.get("table", {})
|
||||
times = table_info.get("time", [])
|
||||
|
||||
if not table_data:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Ensure all values are lists to avoid pd.DataFrame ValueError with scalars
|
||||
processed_table_data = {}
|
||||
for k, v in table_data.items():
|
||||
if not isinstance(v, list):
|
||||
processed_table_data[k] = [v]
|
||||
else:
|
||||
processed_table_data[k] = v
|
||||
|
||||
df = pd.DataFrame(processed_table_data)
|
||||
if times and len(times) == len(df):
|
||||
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
|
||||
elif times and len(df) == 1:
|
||||
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
|
||||
|
||||
# If still no end_date, look for it in columns
|
||||
if 'end_date' not in df.columns:
|
||||
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
|
||||
if col in df.columns:
|
||||
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
|
||||
break
|
||||
|
||||
return df
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
# Handle YoY logic: YYYYMMDD -> (YYYY-1)MMDD
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
|
||||
# 对齐 CN 逻辑,日本公司虽然多是 0331 截止
|
||||
is_annual = df['end_date'].astype(str).str.endswith('0331') | df['end_date'].astype(str).str.endswith('1231')
|
||||
annual_records = df[is_annual]
|
||||
|
||||
combined = pd.concat([latest_record, comparable_record, annual_records])
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
|
||||
"""通用获取历年会计年结日的财务数据 (CNY 结算)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
|
||||
# 1. First, determine the most recent valid year by trying backwards from current year
|
||||
last_valid_year = None
|
||||
|
||||
# Try up to 3 years back to find the latest available report
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}{acc_date}"
|
||||
|
||||
# Use the first indicator to test availability
|
||||
first_indicator = indicator_configs[0]
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
# Check for non-null values
|
||||
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and valid_val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
|
||||
# 2. Fetch 5 years starting from the last valid year
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
target_date = f"{target_year}{acc_date}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
|
||||
for item in indicator_configs
|
||||
]
|
||||
}
|
||||
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
# 强制设置 end_date 以防 API 返回不一致
|
||||
df['end_date'] = target_date
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Remove empty or Check for all-NA columns DataFrames (Fixing FutureWarning)
|
||||
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
return pd.concat(all_dfs, ignore_index=True)
|
||||
return self.client._fetch_basic_info(symbol, code)
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "revenue_oas"},
|
||||
{"indicator": "gross_profit_oas"},
|
||||
{"indicator": "sga_expenses_oas"},
|
||||
{"indicator": "selling_marketing_expenses_oas"},
|
||||
{"indicator": "ga_expenses_oas"},
|
||||
{"indicator": "rd_expenses_oas"},
|
||||
{"indicator": "income_tax_expense_oas"},
|
||||
{"indicator": "net_income_attri_to_common_sh_oas"},
|
||||
{"indicator": "operating_income_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "income_statement_raw")
|
||||
|
||||
rename_map = {
|
||||
'revenue_oas': 'revenue',
|
||||
'gross_profit_oas': 'gross_profit',
|
||||
'sga_expenses_oas': 'sga_exp',
|
||||
'selling_marketing_expenses_oas': 'selling_marketing_exp',
|
||||
'ga_expenses_oas': 'ga_exp',
|
||||
'rd_expenses_oas': 'rd_exp',
|
||||
'income_tax_expense_oas': 'income_tax',
|
||||
'net_income_attri_to_common_sh_oas': 'net_income',
|
||||
'operating_income_oas': 'operating_profit'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
# 数值转换
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_income_statement(symbol, code)
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "cash_equi_short_term_inve_oas"},
|
||||
{"indicator": "accou_and_notes_recei_oas"},
|
||||
{"indicator": "inventories_oas"},
|
||||
{"indicator": "ppe_net_oas"},
|
||||
{"indicator": "long_term_inv_and_receiv_oas"},
|
||||
{"indicator": "goodwill_and_intasset_oas"},
|
||||
{"indicator": "short_term_debt_oas"},
|
||||
{"indicator": "short_term_borrowings_oas"},
|
||||
{"indicator": "account_and_note_payable_oas"},
|
||||
{"indicator": "contra_liabilities_current_oas"},
|
||||
{"indicator": "advance_from_cust_current_oas"},
|
||||
{"indicator": "defer_revenue_current_oas"},
|
||||
{"indicator": "long_term_debt_oas"},
|
||||
{"indicator": "long_term_borrowings_oas"},
|
||||
{"indicator": "total_assets_oas"},
|
||||
{"indicator": "equity_attri_to_companyowner_oas"},
|
||||
{"indicator": "prepaid_expenses_current_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "balance_sheet_raw")
|
||||
|
||||
rename_map = {
|
||||
'cash_equi_short_term_inve_oas': 'cash',
|
||||
'accou_and_notes_recei_oas': 'receivables',
|
||||
'inventories_oas': 'inventory',
|
||||
'ppe_net_oas': 'fixed_assets',
|
||||
'long_term_inv_and_receiv_oas': 'long_term_investments',
|
||||
'goodwill_and_intasset_oas': 'goodwill',
|
||||
'short_term_debt_oas': 'short_term_debt',
|
||||
'short_term_borrowings_oas': 'short_term_borrowings',
|
||||
'account_and_note_payable_oas': 'accounts_payable',
|
||||
'contra_liabilities_current_oas': 'contract_liabilities',
|
||||
'advance_from_cust_current_oas': 'advances_from_customers',
|
||||
'defer_revenue_current_oas': 'deferred_revenue',
|
||||
'long_term_debt_oas': 'long_term_debt',
|
||||
'long_term_borrowings_oas': 'long_term_borrowings',
|
||||
'total_assets_oas': 'total_assets',
|
||||
'equity_attri_to_companyowner_oas': 'total_equity',
|
||||
'prepaid_expenses_current_oas': 'prepayment'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
# 如果没有负债合计,用资产减权益
|
||||
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
|
||||
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_balance_sheet(symbol, code)
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "net_cash_flows_from_oa_oas"},
|
||||
{"indicator": "purchase_of_ppe_and_ia_oas"},
|
||||
{"indicator": "dividends_paid_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "cash_flow_raw")
|
||||
|
||||
rename_map = {
|
||||
'net_cash_flows_from_oa_oas': 'ocf',
|
||||
'purchase_of_ppe_and_ia_oas': 'capex',
|
||||
'dividends_paid_oas': 'dividends'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
if 'capex' in df_filtered.columns:
|
||||
df_filtered['capex'] = df_filtered['capex'].abs()
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_cash_flow(symbol, code)
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
"""获取公司基本信息(名称、上市日期等静态数据)"""
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
|
||||
metrics = {
|
||||
"name": basic_info.get("name", ""),
|
||||
"list_date": basic_info.get("ipo_date", "")
|
||||
}
|
||||
|
||||
return metrics
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_market_metrics(symbol, code)
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
"""获取历史日期的收盘价和市值 (通过 cmd_history_quotation)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
if not dates: return pd.DataFrame()
|
||||
|
||||
results = []
|
||||
# get_historical_metrics里面不要拿所有日期数据了,而是一个一个数据拿
|
||||
for d in dates:
|
||||
d_str = str(d).replace('-', '').replace('/', '')
|
||||
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"startdate": fmt_d,
|
||||
"enddate": fmt_d,
|
||||
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
|
||||
"indipara": [
|
||||
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
|
||||
{"indicator": "market_value", "indiparams": ["", "CNY"]}
|
||||
]
|
||||
}
|
||||
|
||||
# print(f"iFinD API Request: endpoint=date_sequence, params={params}")
|
||||
res = self.cli.post("date_sequence", params)
|
||||
df_seq = self._parse_ifind_tables(res)
|
||||
|
||||
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
|
||||
|
||||
if not df_seq.empty:
|
||||
# 找到最接近该日期且不晚于该日期的记录
|
||||
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
|
||||
if not match.empty:
|
||||
if 'pre_close' in match.columns:
|
||||
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
|
||||
if 'market_value' in match.columns:
|
||||
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
|
||||
results.append(metrics)
|
||||
|
||||
df_hist = pd.DataFrame(results)
|
||||
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
|
||||
return df_hist
|
||||
return self.client.get_historical_metrics(symbol, code, dates)
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
"""获取历年年度累计分红记录 (逐年获取)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
# 获取最近 5 年的数据
|
||||
for i in range(5):
|
||||
year_str = str(current_year - i)
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
|
||||
]
|
||||
}
|
||||
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'annual_cum_dividend' in df.columns:
|
||||
val = df['annual_cum_dividend'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{year_str}{acc_date}",
|
||||
'dividends': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div = pd.DataFrame(results)
|
||||
self._save_raw_data(df_div, symbol, "dividends_raw")
|
||||
return df_div
|
||||
return self.client.get_dividends(symbol, code)
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
"""获取历年年度回购记录 (从 repur_num_new 获取)"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
|
||||
# 为了对应日期格式 YYYY-MM-DD
|
||||
fmt_mm_dd = f"{mm}-{dd}"
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
# 获取最近 5 年的数据
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
start_date = f"{target_year - 1}-{fmt_mm_dd}"
|
||||
end_date = f"{target_year}-{fmt_mm_dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
|
||||
]
|
||||
}
|
||||
# print(f"iFinD API Request: endpoint=basic_data_service, params={params}")
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'repur_num_new' in df.columns:
|
||||
val = df['repur_num_new'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'repurchases': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
df_repur = pd.DataFrame(results)
|
||||
self._save_raw_data(df_repur, symbol, "repurchases_raw")
|
||||
return df_repur
|
||||
return self.client.get_repurchases(symbol, code)
|
||||
|
||||
def get_employee_count(self, symbol: str) -> pd.DataFrame:
|
||||
"""获取历年员工人数"""
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
return self.client.get_employee_count(symbol, code)
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
# 获取最近 5 年的数据
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
target_date = f"{target_year}-{mm}-{dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "staff_num", "indiparams": [target_date]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'staff_num' in df.columns:
|
||||
val = df['staff_num'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'employee_count': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_emp = pd.DataFrame(results)
|
||||
self._save_raw_data(df_emp, symbol, "employee_count_raw")
|
||||
return df_emp
|
||||
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_financial_ratios(symbol, code)
|
||||
|
||||
242
src/fetchers/tushare_cn_client.py
Normal file
242
src/fetchers/tushare_cn_client.py
Normal file
@ -0,0 +1,242 @@
|
||||
import tushare as ts
|
||||
import pandas as pd
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class TushareCnClient:
|
||||
def __init__(self, api_key: str):
|
||||
ts.set_token(api_key)
|
||||
self.pro = ts.pro_api()
|
||||
self.storage = DataStorage()
|
||||
self.api_key = api_key
|
||||
|
||||
def _save_raw_data(self, df: pd.DataFrame, symbol: str, name: str):
|
||||
if df is None or df.empty:
|
||||
return
|
||||
market = 'CN'
|
||||
self.storage.save_data(df, market, symbol, f"raw_{name}")
|
||||
|
||||
def _get_ts_code(self, symbol: str) -> str:
|
||||
return symbol
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
if df.empty:
|
||||
return df
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
is_annual = df['end_date'].astype(str).str.endswith('1231')
|
||||
annual_records = df[is_annual]
|
||||
combined = pd.concat([latest_record, comparable_record, annual_records])
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.income(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "income_statement")
|
||||
rename_map = {
|
||||
'end_date': 'date',
|
||||
'revenue': 'revenue',
|
||||
'n_income_attr_p': 'net_income'
|
||||
}
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns=rename_map)
|
||||
return df
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.balancesheet(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "balance_sheet")
|
||||
rename_map = {
|
||||
'end_date': 'date',
|
||||
'total_hldr_eqy_exc_min_int': 'total_equity',
|
||||
'total_liab': 'total_liabilities',
|
||||
'total_cur_assets': 'current_assets',
|
||||
'total_cur_liab': 'current_liabilities'
|
||||
}
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns=rename_map)
|
||||
return df
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.cashflow(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "cash_flow")
|
||||
df = self._filter_data(df)
|
||||
df = df.rename(columns={
|
||||
'end_date': 'date',
|
||||
'n_cashflow_act': 'net_cash_flow',
|
||||
'depr_fa_coga_dpba': 'depreciation'
|
||||
})
|
||||
return df
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
metrics = {
|
||||
"price": 0.0,
|
||||
"market_cap": 0.0,
|
||||
"pe": 0.0,
|
||||
"pb": 0.0,
|
||||
"total_share_holders": 0,
|
||||
"employee_count": 0
|
||||
}
|
||||
|
||||
try:
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, limit=1)
|
||||
self._save_raw_data(df_daily, ts_code, "market_metrics_daily_basic")
|
||||
if not df_daily.empty:
|
||||
row = df_daily.iloc[0]
|
||||
metrics["price"] = row.get('close', 0.0)
|
||||
metrics["pe"] = row.get('pe', 0.0)
|
||||
metrics["pb"] = row.get('pb', 0.0)
|
||||
metrics["market_cap"] = row.get('total_mv', 0.0) * 10000
|
||||
metrics["dividend_yield"] = row.get('dv_ttm', 0.0)
|
||||
|
||||
df_basic = self.pro.stock_basic(ts_code=ts_code, fields='name,list_date')
|
||||
self._save_raw_data(df_basic, ts_code, "market_metrics_stock_basic")
|
||||
if not df_basic.empty:
|
||||
metrics['name'] = df_basic.iloc[0]['name']
|
||||
metrics['list_date'] = df_basic.iloc[0]['list_date']
|
||||
|
||||
df_comp = self.pro.stock_company(ts_code=ts_code)
|
||||
if not df_comp.empty:
|
||||
metrics["employee_count"] = int(df_comp.iloc[0].get('employees', 0) or 0)
|
||||
|
||||
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, limit=1)
|
||||
self._save_raw_data(df_holder, ts_code, "market_metrics_shareholder_number")
|
||||
if not df_holder.empty:
|
||||
metrics["total_share_holders"] = int(df_holder.iloc[0].get('holder_num', 0) or 0)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching market metrics for {symbol}: {e}")
|
||||
|
||||
return metrics
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
results = []
|
||||
|
||||
if not dates:
|
||||
return pd.DataFrame()
|
||||
|
||||
unique_dates = sorted(list(set([str(d).replace('-', '') for d in dates])), reverse=True)
|
||||
|
||||
try:
|
||||
import datetime
|
||||
min_date = min(unique_dates)
|
||||
max_date = max(unique_dates)
|
||||
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, start_date=min_date, end_date=max_date)
|
||||
self._save_raw_data(df_daily, ts_code, "historical_metrics_daily_basic")
|
||||
if not df_daily.empty:
|
||||
df_daily = df_daily.sort_values('trade_date', ascending=False)
|
||||
|
||||
df_holder = self.pro.stk_holdernumber(ts_code=ts_code, start_date=min_date, end_date=max_date)
|
||||
self._save_raw_data(df_holder, ts_code, "historical_metrics_shareholder_number")
|
||||
if not df_holder.empty:
|
||||
df_holder = df_holder.sort_values('end_date', ascending=False)
|
||||
|
||||
for date_str in unique_dates:
|
||||
metrics = {'date_str': date_str}
|
||||
|
||||
if not df_daily.empty:
|
||||
closest_daily = df_daily[df_daily['trade_date'] <= date_str]
|
||||
if not closest_daily.empty:
|
||||
row = closest_daily.iloc[0]
|
||||
metrics['Price'] = row.get('close')
|
||||
metrics['PE'] = row.get('pe')
|
||||
metrics['PB'] = row.get('pb')
|
||||
metrics['MarketCap'] = row.get('total_mv', 0) * 10000
|
||||
|
||||
if not df_holder.empty:
|
||||
closest_holder = df_holder[df_holder['end_date'] <= date_str]
|
||||
if not closest_holder.empty:
|
||||
metrics['Shareholders'] = closest_holder.iloc[0].get('holder_num')
|
||||
|
||||
results.append(metrics)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching historical metrics for {symbol}: {e}")
|
||||
|
||||
return pd.DataFrame(results)
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
import time
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df_div = self.pro.dividend(ts_code=ts_code, fields='end_date,ex_date,div_proc,cash_div')
|
||||
self._save_raw_data(df_div, ts_code, "dividends_raw")
|
||||
|
||||
if df_div.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Filter for implemented cash dividends
|
||||
df_div = df_div[(df_div['div_proc'] == '实施') & (df_div['cash_div'] > 0)]
|
||||
|
||||
if df_div.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div['total_cash_div'] = 0.0
|
||||
|
||||
# Get total shares for each ex_date
|
||||
for index, row in df_div.iterrows():
|
||||
ex_date = row['ex_date']
|
||||
if not ex_date or pd.isna(ex_date):
|
||||
continue
|
||||
|
||||
try:
|
||||
time.sleep(0.2) # Sleep for 200ms to avoid hitting API limits
|
||||
df_daily = self.pro.daily_basic(ts_code=ts_code, trade_date=ex_date, fields='total_share')
|
||||
|
||||
if not df_daily.empty and not df_daily['total_share'].empty:
|
||||
total_share = df_daily.iloc[0]['total_share'] # total_share is in 万股 (10k shares)
|
||||
cash_div_per_share = row['cash_div'] # This is per-share
|
||||
|
||||
# Total dividend in Yuan
|
||||
total_cash_dividend = (cash_div_per_share * total_share * 10000)
|
||||
df_div.loc[index, 'total_cash_div'] = total_cash_dividend
|
||||
except Exception as e:
|
||||
print(f"Could not fetch daily basic for {ts_code} on {ex_date}: {e}")
|
||||
|
||||
df_div['year'] = pd.to_datetime(df_div['end_date']).dt.year
|
||||
dividends_by_year = df_div.groupby('year')['total_cash_div'].sum().reset_index()
|
||||
|
||||
dividends_by_year['date_str'] = dividends_by_year['year'].astype(str) + '1231'
|
||||
dividends_by_year.rename(columns={'total_cash_div': 'dividends'}, inplace=True)
|
||||
|
||||
return dividends_by_year[['date_str', 'dividends']]
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
ts_code = self._get_ts_code(symbol)
|
||||
df = self.pro.repurchase(ts_code=ts_code)
|
||||
self._save_raw_data(df, ts_code, "repurchases")
|
||||
|
||||
if df.empty or 'ann_date' not in df.columns or 'amount' not in df.columns:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Filter for repurchases with a valid amount
|
||||
df = df[df['amount'] > 0]
|
||||
|
||||
if df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Extract year and group by it
|
||||
df['year'] = pd.to_datetime(df['ann_date']).dt.year
|
||||
repurchases_by_year = df.groupby('year')['amount'].sum().reset_index()
|
||||
|
||||
# Create date_str for merging (YYYY1231)
|
||||
repurchases_by_year['date_str'] = repurchases_by_year['year'].astype(str) + '1231'
|
||||
|
||||
# Rename for merging.
|
||||
# Based on user feedback, it appears the unit from the API is Yuan, so no conversion is needed.
|
||||
repurchases_by_year.rename(columns={'amount': 'repurchases'}, inplace=True)
|
||||
|
||||
return repurchases_by_year[['date_str', 'repurchases']]
|
||||
@ -1,182 +1,93 @@
|
||||
import requests
|
||||
import pandas as pd
|
||||
import time
|
||||
from .base import DataFetcher
|
||||
# Import clients
|
||||
from .alphavantage_us_client import AlphaVantageUsClient
|
||||
from .ifind_int_client import IFindIntClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class UsFetcher(DataFetcher):
|
||||
BASE_URL = "https://www.alphavantage.co/query"
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
def __init__(self, api_key: str, data_source: str = 'Alpha Vantage'):
|
||||
super().__init__(api_key)
|
||||
self.data_source = data_source
|
||||
self.storage = DataStorage()
|
||||
|
||||
def _save_raw_data(self, data, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
|
||||
df = pd.DataFrame()
|
||||
if isinstance(data, list):
|
||||
df = pd.DataFrame(data)
|
||||
elif isinstance(data, dict):
|
||||
# For single-record JSON objects, convert to a DataFrame
|
||||
df = pd.DataFrame([data])
|
||||
|
||||
if not df.empty:
|
||||
self.storage.save_data(df, 'US', symbol, f"raw_{name}")
|
||||
|
||||
def _fetch_data(self, function: str, symbol: str) -> pd.DataFrame:
|
||||
params = {
|
||||
"function": function,
|
||||
"symbol": symbol,
|
||||
"apikey": self.api_key
|
||||
}
|
||||
try:
|
||||
time.sleep(15)
|
||||
response = requests.get(self.BASE_URL, params=params)
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
print(f"Error requesting {function}: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
if data:
|
||||
self._save_raw_data(data.get("annualReports"), symbol, f"{function.lower()}_annual")
|
||||
|
||||
df_annual = pd.DataFrame()
|
||||
|
||||
if "annualReports" in data and data["annualReports"]:
|
||||
df_annual = pd.DataFrame(data["annualReports"])
|
||||
if "fiscalDateEnding" in df_annual.columns:
|
||||
df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False)
|
||||
|
||||
# Dynamic year filtering: Find the latest report with valid data and take surrounding 5 years
|
||||
# For Alpha Vantage, data is already sorted by date descending.
|
||||
# We simply check for the first row with non-None values in critical columns if possible,
|
||||
# but usually AV returns valid blocks. We'll just take the top 5.
|
||||
# Unlike iFinD, AV returns a list of available reports, so we don't need to probe year by year.
|
||||
|
||||
# Keep top 5 latest entries
|
||||
df_annual = df_annual.head(5)
|
||||
if self.data_source == 'iFinD':
|
||||
self.client = IFindIntClient(api_key, 'US')
|
||||
else:
|
||||
print(f"Error fetching {function} for {symbol}: {data}")
|
||||
return pd.DataFrame()
|
||||
self.client = AlphaVantageUsClient(api_key)
|
||||
|
||||
return df_annual
|
||||
def _get_ifind_code(self, symbol: str) -> str:
|
||||
# If using iFinD for US, what's the code?
|
||||
# Often ticker is enough or ticker + suffix.
|
||||
# Since IFindClient takes list of codes, maybe just 'AAPL'?
|
||||
# Let's return symbol for now.
|
||||
return symbol
|
||||
|
||||
def _save_raw_data(self, data, symbol: str, name: str):
|
||||
# Only used if strictly needed by fetcher itself, but now client handles it.
|
||||
# However, let's keep it for compatibility if something else calls it or legacy.
|
||||
pass
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
# 1. Get Overview for PE, PB, MarketCap, Employees
|
||||
overview_data = {}
|
||||
try:
|
||||
time.sleep(15)
|
||||
params = {"function": "OVERVIEW", "symbol": symbol, "apikey": self.api_key}
|
||||
r = requests.get(self.BASE_URL, params=params)
|
||||
overview_data = r.json()
|
||||
# Clean up 'None' strings from API response before processing
|
||||
if isinstance(overview_data, dict):
|
||||
for key, value in overview_data.items():
|
||||
if value == 'None':
|
||||
overview_data[key] = None
|
||||
self._save_raw_data(overview_data, symbol, "market_metrics_overview")
|
||||
except Exception as e:
|
||||
print(f"Error fetching OVERVIEW for {symbol}: {e}")
|
||||
|
||||
market_cap = float(overview_data.get("MarketCapitalization") or 0)
|
||||
shares_outstanding = float(overview_data.get("SharesOutstanding") or 0)
|
||||
|
||||
price = 0
|
||||
if shares_outstanding > 0:
|
||||
price = market_cap / shares_outstanding
|
||||
|
||||
return {
|
||||
"price": price,
|
||||
"name": overview_data.get("Name"),
|
||||
"fiscal_year_end": overview_data.get("FiscalYearEnd"),
|
||||
"dividend_yield": float(overview_data.get("DividendYield") or 0),
|
||||
"market_cap": market_cap,
|
||||
"pe": float(overview_data.get("PERatio") or 0),
|
||||
"pb": float(overview_data.get("PriceToBookRatio") or 0),
|
||||
"employee_count": int(float(overview_data.get("FullTimeEmployees") or 0)),
|
||||
"total_share_holders": 0 # Not typically provided in basic AV Overview
|
||||
}
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_market_metrics(symbol, code)
|
||||
else:
|
||||
return self.client.get_market_metrics(symbol)
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("INCOME_STATEMENT", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"totalRevenue": "revenue",
|
||||
"netIncome": "net_income",
|
||||
"grossProfit": "gross_profit",
|
||||
"costOfRevenue": "cogs",
|
||||
"researchAndDevelopment": "rd_exp",
|
||||
"sellingGeneralAndAdministrative": "sga_exp",
|
||||
"interestExpense": "fin_exp",
|
||||
"incomeBeforeTax": "total_profit",
|
||||
"incomeTaxExpense": "income_tax",
|
||||
"ebit": "ebit"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
|
||||
# Convert numeric columns for analysis, keep others as is
|
||||
numeric_cols = [
|
||||
"revenue", "net_income", "gross_profit", "cogs", "rd_exp", "sga_exp",
|
||||
"fin_exp", "total_profit", "income_tax", "ebit",
|
||||
"depreciation", "depreciationAndAmortization"
|
||||
]
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_income_statement(symbol, code)
|
||||
else:
|
||||
return self.client.get_income_statement(symbol)
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("BALANCE_SHEET", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"totalShareholderEquity": "total_equity",
|
||||
"totalLiabilities": "total_liabilities",
|
||||
"totalCurrentAssets": "current_assets",
|
||||
"totalCurrentLiabilities": "current_liabilities",
|
||||
"cashAndCashEquivalentsAtCarryingValue": "cash",
|
||||
"currentNetReceivables": "receivables",
|
||||
"inventory": "inventory",
|
||||
"propertyPlantEquipment": "fixed_assets",
|
||||
"totalAssets": "total_assets",
|
||||
"goodwill": "goodwill",
|
||||
"longTermInvestments": "lt_invest",
|
||||
"shortTermDebt": "short_term_debt",
|
||||
"currentLongTermDebt": "short_term_debt_part",
|
||||
"longTermDebt": "long_term_debt",
|
||||
"currentAccountsPayable": "accounts_payable",
|
||||
"otherCurrentAssets": "prepayment",
|
||||
"otherNonCurrentAssets": "other_assets",
|
||||
"deferredRevenue": "adv_receipts"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
|
||||
numeric_cols = [
|
||||
"total_equity", "total_liabilities", "current_assets", "current_liabilities",
|
||||
"cash", "receivables", "inventory", "fixed_assets", "total_assets",
|
||||
"goodwill", "lt_invest", "short_term_debt", "short_term_debt_part",
|
||||
"long_term_debt", "accounts_payable", "prepayment", "other_assets", "adv_receipts"
|
||||
]
|
||||
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_balance_sheet(symbol, code)
|
||||
else:
|
||||
return self.client.get_balance_sheet(symbol)
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
df = self._fetch_data("CASH_FLOW", symbol)
|
||||
cols_map = {
|
||||
"fiscalDateEnding": "date",
|
||||
"operatingCashflow": "ocf",
|
||||
"capitalExpenditures": "capex",
|
||||
"dividendPayout": "dividends",
|
||||
"depreciationDepletionAndAmortization": "depreciation"
|
||||
}
|
||||
df = df.rename(columns=cols_map)
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_cash_flow(symbol, code)
|
||||
else:
|
||||
return self.client.get_cash_flow(symbol)
|
||||
|
||||
numeric_cols = ["ocf", "capex", "dividends", "depreciation"]
|
||||
for col in numeric_cols:
|
||||
if col in df.columns:
|
||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
||||
return df
|
||||
# Optional methods not originally in UsFetcher but available in iFinD client
|
||||
# If using Alpha Vantage, these might not be supported or need adding to AV client.
|
||||
# We will only expose if iFinD or if AV client supports it (AV client currently doesn't implement these)
|
||||
# So we can create empty/dummy implementations for AV or check data_source.
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_historical_metrics(symbol, code, dates)
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_dividends(symbol, code)
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_repurchases(symbol, code)
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_employee_count(self, symbol: str) -> pd.DataFrame:
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_employee_count(symbol, code)
|
||||
# AV Metrics has employee count in market metrics, but not historical series yet.
|
||||
return pd.DataFrame()
|
||||
|
||||
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
|
||||
if self.data_source == 'iFinD':
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_financial_ratios(symbol, code)
|
||||
return pd.DataFrame()
|
||||
|
||||
@ -1,474 +1,56 @@
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
from .base import DataFetcher
|
||||
from .ifind_client import IFindClient
|
||||
from .ifind_int_client import IFindIntClient
|
||||
from storage.file_io import DataStorage
|
||||
|
||||
class VnFetcher(DataFetcher):
|
||||
def __init__(self, api_key: str):
|
||||
# api_key is the iFinD Refresh Token
|
||||
super().__init__(api_key)
|
||||
self.cli = IFindClient(refresh_token=api_key)
|
||||
self.data_source = 'iFinD'
|
||||
self.client = IFindIntClient(api_key, 'VN')
|
||||
self.storage = DataStorage()
|
||||
self._basic_info_cache = {}
|
||||
|
||||
def _get_ifind_code(self, symbol: str) -> str:
|
||||
# Vietnam stocks usually have 3 letter codes.
|
||||
# We assume the user provides the correct code (e.g. VNM, or VNM.VN).
|
||||
# We can add simple logic: if it's 3 letters, maybe append nothing?
|
||||
# iFinD codes often need suffix. But without documentation, safest is to pass through.
|
||||
# VN stocks in iFinD usually just symbol (e.g. VNM)
|
||||
return symbol
|
||||
|
||||
def _fetch_basic_info(self, symbol: str) -> dict:
|
||||
"""获取公司的基本信息:中文名称、会计年结日、上市日期"""
|
||||
def _fetch_basic_info(self, symbol: str):
|
||||
code = self._get_ifind_code(symbol)
|
||||
if code in self._basic_info_cache:
|
||||
return self._basic_info_cache[code]
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "corp_cn_name", "indiparams": []},
|
||||
{"indicator": "accounting_date", "indiparams": []},
|
||||
{"indicator": "ipo_date", "indiparams": []}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
self._save_raw_data(df, symbol, "basic_info_raw")
|
||||
|
||||
info = {
|
||||
"name": "",
|
||||
"accounting_date": "1231", # Default 12-31
|
||||
"ipo_date": ""
|
||||
}
|
||||
|
||||
if not df.empty:
|
||||
row = df.iloc[0]
|
||||
info["name"] = str(row.get("corp_cn_name", ""))
|
||||
|
||||
acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "")
|
||||
if acc_date:
|
||||
info["accounting_date"] = acc_date
|
||||
|
||||
info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "")
|
||||
|
||||
self._basic_info_cache[code] = info
|
||||
return info
|
||||
|
||||
def _save_raw_data(self, data: any, symbol: str, name: str):
|
||||
if data is None:
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
df = pd.DataFrame([data])
|
||||
else:
|
||||
df = data
|
||||
self.storage.save_data(df, 'VN', symbol, f"raw_{name}")
|
||||
|
||||
def _parse_ifind_tables(self, res: dict) -> pd.DataFrame:
|
||||
"""通用解析 iFinD 返回结果的 tables 结构为 DataFrame"""
|
||||
if not res:
|
||||
return pd.DataFrame()
|
||||
|
||||
if res.get("errorcode") != 0:
|
||||
print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})")
|
||||
return pd.DataFrame()
|
||||
|
||||
tables = res.get("tables", [])
|
||||
if not tables:
|
||||
# print("iFinD API Warning: No tables found in response.")
|
||||
return pd.DataFrame()
|
||||
|
||||
table_info = tables[0]
|
||||
table_data = table_info.get("table", {})
|
||||
times = table_info.get("time", [])
|
||||
|
||||
if not table_data:
|
||||
return pd.DataFrame()
|
||||
|
||||
processed_table_data = {}
|
||||
for k, v in table_data.items():
|
||||
if not isinstance(v, list):
|
||||
processed_table_data[k] = [v]
|
||||
else:
|
||||
processed_table_data[k] = v
|
||||
|
||||
df = pd.DataFrame(processed_table_data)
|
||||
if times and len(times) == len(df):
|
||||
df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times]
|
||||
elif times and len(df) == 1:
|
||||
df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0]
|
||||
|
||||
if 'end_date' not in df.columns:
|
||||
for col in ['time', 'date', 'trade_date', 'REPORT_DATE']:
|
||||
if col in df.columns:
|
||||
df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0]
|
||||
break
|
||||
|
||||
return df
|
||||
|
||||
def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
if df.empty or 'end_date' not in df.columns:
|
||||
return df
|
||||
|
||||
df = df.sort_values(by='end_date', ascending=False)
|
||||
df = df.drop_duplicates(subset=['end_date'], keep='first')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
latest_record = df.iloc[[0]]
|
||||
try:
|
||||
latest_date_str = str(latest_record['end_date'].values[0])
|
||||
last_year_date_str = str(int(latest_date_str) - 10000)
|
||||
comparable_record = df[df['end_date'].astype(str) == last_year_date_str]
|
||||
except:
|
||||
comparable_record = pd.DataFrame()
|
||||
|
||||
# VN usually ends in 1231
|
||||
is_annual = df['end_date'].astype(str).str.endswith('1231')
|
||||
annual_records = df[is_annual]
|
||||
|
||||
combined = pd.concat([latest_record, comparable_record, annual_records])
|
||||
combined = combined.drop_duplicates(subset=['end_date'])
|
||||
combined = combined.sort_values(by='end_date', ascending=False)
|
||||
return combined
|
||||
|
||||
def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
last_valid_year = None
|
||||
|
||||
for offset in range(3):
|
||||
test_year = current_year - offset
|
||||
test_date = f"{test_year}{acc_date}"
|
||||
first_indicator = indicator_configs[0]
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty:
|
||||
valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None
|
||||
if pd.notna(valid_val) and valid_val != 0:
|
||||
last_valid_year = test_year
|
||||
break
|
||||
|
||||
if last_valid_year is None:
|
||||
last_valid_year = current_year
|
||||
|
||||
all_dfs = []
|
||||
for i in range(5):
|
||||
target_year = last_valid_year - i
|
||||
target_date = f"{target_year}{acc_date}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]}
|
||||
for item in indicator_configs
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
if not df.empty:
|
||||
df['end_date'] = target_date
|
||||
all_dfs.append(df)
|
||||
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()]
|
||||
if not all_dfs:
|
||||
return pd.DataFrame()
|
||||
|
||||
return pd.concat(all_dfs, ignore_index=True)
|
||||
return self.client._fetch_basic_info(symbol, code)
|
||||
|
||||
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "revenue_oas"},
|
||||
{"indicator": "gross_profit_oas"},
|
||||
{"indicator": "sga_expenses_oas"},
|
||||
{"indicator": "selling_marketing_expenses_oas"},
|
||||
{"indicator": "ga_expenses_oas"},
|
||||
{"indicator": "rd_expenses_oas"},
|
||||
{"indicator": "income_tax_expense_oas"},
|
||||
{"indicator": "net_income_attri_to_common_sh_oas"},
|
||||
{"indicator": "operating_income_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "income_statement_raw")
|
||||
|
||||
rename_map = {
|
||||
'revenue_oas': 'revenue',
|
||||
'gross_profit_oas': 'gross_profit',
|
||||
'sga_expenses_oas': 'sga_exp',
|
||||
'selling_marketing_expenses_oas': 'selling_marketing_exp',
|
||||
'ga_expenses_oas': 'ga_exp',
|
||||
'rd_expenses_oas': 'rd_exp',
|
||||
'income_tax_expense_oas': 'income_tax',
|
||||
'net_income_attri_to_common_sh_oas': 'net_income',
|
||||
'operating_income_oas': 'operating_profit'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_income_statement(symbol, code)
|
||||
|
||||
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "cash_equi_short_term_inve_oas"},
|
||||
{"indicator": "accou_and_notes_recei_oas"},
|
||||
{"indicator": "inventories_oas"},
|
||||
{"indicator": "ppe_net_oas"},
|
||||
{"indicator": "long_term_inv_and_receiv_oas"},
|
||||
{"indicator": "goodwill_and_intasset_oas"},
|
||||
{"indicator": "short_term_debt_oas"},
|
||||
{"indicator": "short_term_borrowings_oas"},
|
||||
{"indicator": "account_and_note_payable_oas"},
|
||||
{"indicator": "contra_liabilities_current_oas"},
|
||||
{"indicator": "advance_from_cust_current_oas"},
|
||||
{"indicator": "defer_revenue_current_oas"},
|
||||
{"indicator": "long_term_debt_oas"},
|
||||
{"indicator": "long_term_borrowings_oas"},
|
||||
{"indicator": "total_assets_oas"},
|
||||
{"indicator": "equity_attri_to_companyowner_oas"},
|
||||
{"indicator": "prepaid_expenses_current_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "balance_sheet_raw")
|
||||
|
||||
rename_map = {
|
||||
'cash_equi_short_term_inve_oas': 'cash',
|
||||
'accou_and_notes_recei_oas': 'receivables',
|
||||
'inventories_oas': 'inventory',
|
||||
'ppe_net_oas': 'fixed_assets',
|
||||
'long_term_inv_and_receiv_oas': 'long_term_investments',
|
||||
'goodwill_and_intasset_oas': 'goodwill',
|
||||
'short_term_debt_oas': 'short_term_debt',
|
||||
'short_term_borrowings_oas': 'short_term_borrowings',
|
||||
'account_and_note_payable_oas': 'accounts_payable',
|
||||
'contra_liabilities_current_oas': 'contract_liabilities',
|
||||
'advance_from_cust_current_oas': 'advances_from_customers',
|
||||
'defer_revenue_current_oas': 'deferred_revenue',
|
||||
'long_term_debt_oas': 'long_term_debt',
|
||||
'long_term_borrowings_oas': 'long_term_borrowings',
|
||||
'total_assets_oas': 'total_assets',
|
||||
'equity_attri_to_companyowner_oas': 'total_equity',
|
||||
'prepaid_expenses_current_oas': 'prepayment'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all():
|
||||
if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns:
|
||||
df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity']
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_balance_sheet(symbol, code)
|
||||
|
||||
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
||||
indicators = [
|
||||
{"indicator": "net_cash_flows_from_oa_oas"},
|
||||
{"indicator": "purchase_of_ppe_and_ia_oas"},
|
||||
{"indicator": "dividends_paid_oas"}
|
||||
]
|
||||
|
||||
df = self._fetch_financial_data_annual(symbol, indicators)
|
||||
if df.empty: return df
|
||||
self._save_raw_data(df, symbol, "cash_flow_raw")
|
||||
|
||||
rename_map = {
|
||||
'net_cash_flows_from_oa_oas': 'ocf',
|
||||
'purchase_of_ppe_and_ia_oas': 'capex',
|
||||
'dividends_paid_oas': 'dividends'
|
||||
}
|
||||
|
||||
df_filtered = df.rename(columns=rename_map)
|
||||
|
||||
for col in df_filtered.columns:
|
||||
if col not in ['date', 'end_date']:
|
||||
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce')
|
||||
|
||||
if 'capex' in df_filtered.columns:
|
||||
df_filtered['capex'] = df_filtered['capex'].abs()
|
||||
|
||||
return self._filter_data(df_filtered)
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_cash_flow(symbol, code)
|
||||
|
||||
def get_market_metrics(self, symbol: str) -> dict:
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
metrics = {
|
||||
"name": basic_info.get("name", ""),
|
||||
"list_date": basic_info.get("ipo_date", "")
|
||||
}
|
||||
return metrics
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_market_metrics(symbol, code)
|
||||
|
||||
def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
if not dates: return pd.DataFrame()
|
||||
|
||||
results = []
|
||||
for d in dates:
|
||||
d_str = str(d).replace('-', '').replace('/', '')
|
||||
fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"startdate": fmt_d,
|
||||
"enddate": fmt_d,
|
||||
"functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"},
|
||||
"indipara": [
|
||||
{"indicator": "pre_close", "indiparams": ["", "0", "CNY"]},
|
||||
{"indicator": "market_value", "indiparams": ["", "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("date_sequence", params)
|
||||
df_seq = self._parse_ifind_tables(res)
|
||||
|
||||
metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0}
|
||||
|
||||
if not df_seq.empty:
|
||||
match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1)
|
||||
if not match.empty:
|
||||
if 'pre_close' in match.columns:
|
||||
metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0)
|
||||
if 'market_value' in match.columns:
|
||||
metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0)
|
||||
results.append(metrics)
|
||||
|
||||
df_hist = pd.DataFrame(results)
|
||||
self._save_raw_data(df_hist, symbol, "historical_metrics_raw")
|
||||
return df_hist
|
||||
return self.client.get_historical_metrics(symbol, code, dates)
|
||||
|
||||
def get_dividends(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
year_str = str(current_year - i)
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'annual_cum_dividend' in df.columns:
|
||||
val = df['annual_cum_dividend'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{year_str}{acc_date}",
|
||||
'dividends': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_div = pd.DataFrame(results)
|
||||
self._save_raw_data(df_div, symbol, "dividends_raw")
|
||||
return df_div
|
||||
return self.client.get_dividends(symbol, code)
|
||||
|
||||
def get_repurchases(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
fmt_mm_dd = f"{mm}-{dd}"
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
start_date = f"{target_year - 1}-{fmt_mm_dd}"
|
||||
end_date = f"{target_year}-{fmt_mm_dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'repur_num_new' in df.columns:
|
||||
val = df['repur_num_new'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'repurchases': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_repur = pd.DataFrame(results)
|
||||
self._save_raw_data(df_repur, symbol, "repurchases_raw")
|
||||
return df_repur
|
||||
return self.client.get_repurchases(symbol, code)
|
||||
|
||||
def get_employee_count(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
basic_info = self._fetch_basic_info(symbol)
|
||||
acc_date = basic_info.get("accounting_date", "1231")
|
||||
mm = acc_date[:2]
|
||||
dd = acc_date[2:]
|
||||
return self.client.get_employee_count(symbol, code)
|
||||
|
||||
current_year = int(time.strftime("%Y"))
|
||||
results = []
|
||||
|
||||
for i in range(5):
|
||||
target_year = current_year - i
|
||||
target_date = f"{target_year}-{mm}-{dd}"
|
||||
|
||||
params = {
|
||||
"codes": code,
|
||||
"indipara": [
|
||||
{"indicator": "staff_num", "indiparams": [target_date]}
|
||||
]
|
||||
}
|
||||
res = self.cli.post("basic_data_service", params)
|
||||
df = self._parse_ifind_tables(res)
|
||||
|
||||
if not df.empty and 'staff_num' in df.columns:
|
||||
val = df['staff_num'].iloc[0]
|
||||
if pd.notna(val) and val != 0:
|
||||
results.append({
|
||||
'date_str': f"{target_year}{acc_date}",
|
||||
'employee_count': float(val)
|
||||
})
|
||||
|
||||
if not results:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_emp = pd.DataFrame(results)
|
||||
self._save_raw_data(df_emp, symbol, "employee_count_raw")
|
||||
return df_emp
|
||||
def get_financial_ratios(self, symbol: str) -> pd.DataFrame:
|
||||
code = self._get_ifind_code(symbol)
|
||||
return self.client.get_financial_ratios(symbol, code)
|
||||
|
||||
@ -15,7 +15,7 @@ class BaseReporter(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}) -> str:
|
||||
def _generate_markdown_content(self, df: pd.DataFrame, market: str, symbol: str, metrics: dict = {}, data_source: str = None) -> str:
|
||||
if df.empty:
|
||||
return f"No breakdown data available for {market} {symbol}"
|
||||
|
||||
@ -23,7 +23,7 @@ class BaseReporter(ABC):
|
||||
headers = self._get_headers(df)
|
||||
|
||||
md = []
|
||||
md.append(self._generate_md_company_info(symbol, metrics, market))
|
||||
md.append(self._generate_md_company_info(symbol, metrics, market, data_source))
|
||||
md.append("\n")
|
||||
|
||||
for group_name, items in self.indicators.items():
|
||||
@ -69,7 +69,7 @@ class BaseReporter(ABC):
|
||||
disp_val = f"{val}"
|
||||
return disp_val
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
return "" # Implemented in subclasses
|
||||
|
||||
def _preprocess_data(self, df, market):
|
||||
|
||||
@ -108,7 +108,7 @@ class CN_ReportGenerator(BaseReporter):
|
||||
def _get_headers(self, df):
|
||||
return [self._format_period_label(date_value) for date_value in df['date_str']]
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name', '')
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -121,15 +121,19 @@ class CN_ReportGenerator(BaseReporter):
|
||||
div = metrics.get('dividend_yield', 0) or 0
|
||||
md = []
|
||||
md.append(f"# {name} ({symbol}) - Financial Report")
|
||||
md.append(f"*Report generated on: {today_str}*\n")
|
||||
md.append(f"*Report generated on: {today_str}*")
|
||||
if data_source:
|
||||
md.append(f"*Data Source: {data_source}*\n")
|
||||
else:
|
||||
md.append("\n")
|
||||
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
|
||||
md.append("|:---|:---|:---|:---|:---|:---|")
|
||||
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
|
||||
return "\n".join(md)
|
||||
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
|
||||
# 1. Generate Markdown content
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
|
||||
|
||||
# 2. Save Markdown file
|
||||
md_path = os.path.join(output_dir, "report.md")
|
||||
@ -144,7 +148,7 @@ class CN_ReportGenerator(BaseReporter):
|
||||
headers = self._get_headers(df_for_html)
|
||||
else:
|
||||
headers = []
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
|
||||
final_html = self.to_html(symbol, html_content)
|
||||
|
||||
html_path = os.path.join(output_dir, "report.html")
|
||||
@ -152,7 +156,7 @@ class CN_ReportGenerator(BaseReporter):
|
||||
f.write(final_html)
|
||||
print(f"HTML report saved to {html_path}")
|
||||
|
||||
def _build_html_content(self, symbol, metrics, headers, df):
|
||||
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name') or symbol
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -229,6 +233,7 @@ class CN_ReportGenerator(BaseReporter):
|
||||
html_sections = [
|
||||
f"<h1>{name} ({symbol}) - Financial Report</h1>",
|
||||
f"<p><em>Report generated on: {today_str}</em></p>",
|
||||
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
|
||||
company_table,
|
||||
'<div class="table-gap"></div>',
|
||||
metrics_table
|
||||
|
||||
@ -97,7 +97,7 @@ class HK_ReportGenerator(BaseReporter):
|
||||
return [self._format_period_label(date_value) for date_value in df['date_str']]
|
||||
return []
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name', '')
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -113,15 +113,19 @@ class HK_ReportGenerator(BaseReporter):
|
||||
|
||||
md = []
|
||||
md.append(f"# {name} ({symbol}) - Financial Report")
|
||||
md.append(f"*Report generated on: {today_str}*\n")
|
||||
md.append(f"*Report generated on: {today_str}*")
|
||||
if data_source:
|
||||
md.append(f"*Data Source: {data_source}*\n")
|
||||
else:
|
||||
md.append("\n")
|
||||
md.append("| 代码 | 简称 | 上市日期 | 年结日 | 市值(亿) | PE | PB | 股息率(%) |")
|
||||
md.append("|:---|:---|:---|:---|:---|:---|:---|:---|")
|
||||
md.append(f"| {symbol} | {name} | {list_date} | {acc_date} | {mcap:.2f} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
|
||||
return "\n".join(md)
|
||||
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
|
||||
md_path = os.path.join(output_dir, "report.md")
|
||||
with open(md_path, "w", encoding='utf-8') as f:
|
||||
f.write(md_content)
|
||||
@ -132,14 +136,14 @@ class HK_ReportGenerator(BaseReporter):
|
||||
headers = self._get_headers(df_for_html)
|
||||
else:
|
||||
headers = []
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
|
||||
final_html = self.to_html(symbol, html_content)
|
||||
|
||||
html_path = os.path.join(output_dir, "report.html")
|
||||
with open(html_path, "w", encoding='utf-8') as f:
|
||||
f.write(final_html)
|
||||
|
||||
def _build_html_content(self, symbol, metrics, headers, df):
|
||||
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name') or symbol
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -222,6 +226,7 @@ class HK_ReportGenerator(BaseReporter):
|
||||
html_sections = [
|
||||
f"<h1>{name} ({symbol}) - Financial Report</h1>",
|
||||
f"<p><em>Report generated on: {today_str}</em></p>",
|
||||
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
|
||||
company_table,
|
||||
'<div class="table-gap"></div>',
|
||||
metrics_table
|
||||
|
||||
@ -109,7 +109,7 @@ class JP_ReportGenerator(BaseReporter):
|
||||
def _get_headers(self, df):
|
||||
return [self._format_period_label(date_value) for date_value in df['date_str']]
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name', '')
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -122,14 +122,18 @@ class JP_ReportGenerator(BaseReporter):
|
||||
div = metrics.get('dividend_yield', 0) or 0
|
||||
md = []
|
||||
md.append(f"# {name} ({symbol}) - Financial Report")
|
||||
md.append(f"*Report generated on: {today_str}*\n")
|
||||
md.append(f"*Report generated on: {today_str}*")
|
||||
if data_source:
|
||||
md.append(f"*Data Source: {data_source}*\n")
|
||||
else:
|
||||
md.append("\n")
|
||||
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
|
||||
md.append("|:---|:---|:---|:---|:---|:---|")
|
||||
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
|
||||
return "\n".join(md)
|
||||
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
md_path = os.path.join(output_dir, "report.md")
|
||||
with open(md_path, "w", encoding='utf-8') as f:
|
||||
@ -141,7 +145,7 @@ class JP_ReportGenerator(BaseReporter):
|
||||
headers = self._get_headers(df_for_html)
|
||||
else:
|
||||
headers = []
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
|
||||
# Re-use the exact same styled HTML from CN_ReportGenerator
|
||||
final_html = self.to_html(symbol, html_content)
|
||||
|
||||
@ -149,7 +153,7 @@ class JP_ReportGenerator(BaseReporter):
|
||||
with open(html_path, "w", encoding='utf-8') as f:
|
||||
f.write(final_html)
|
||||
|
||||
def _build_html_content(self, symbol, metrics, headers, df):
|
||||
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
|
||||
# Implementation identical to CN_ReportGenerator for style consistency
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name') or symbol
|
||||
@ -227,6 +231,7 @@ class JP_ReportGenerator(BaseReporter):
|
||||
html_sections = [
|
||||
f"<h1>{name} ({symbol}) - Financial Report</h1>",
|
||||
f"<p><em>Report generated on: {today_str}</em></p>",
|
||||
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
|
||||
company_table,
|
||||
'<div class="table-gap"></div>',
|
||||
metrics_table
|
||||
|
||||
@ -58,7 +58,7 @@ class US_ReportGenerator(BaseReporter):
|
||||
]
|
||||
}
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name', '')
|
||||
fiscal_year_end = metrics.get('fiscal_year_end', '')
|
||||
@ -68,14 +68,22 @@ class US_ReportGenerator(BaseReporter):
|
||||
|
||||
md = []
|
||||
md.append(f"# {name} ({symbol}) - Financial Report")
|
||||
md.append(f"*Report generated on: {today_str}*\n")
|
||||
md.append(f"*Report generated on: {today_str}*")
|
||||
if data_source:
|
||||
md.append(f"*Data Source: {data_source}*\n")
|
||||
else:
|
||||
md.append("\n")
|
||||
|
||||
md.append("| 代码 | 简称 | 财报日期 | PE | PB | 股息率(%) |")
|
||||
md.append("|:---|:---|:---|:---|:---|:---|")
|
||||
md.append(f"| {symbol} | {name} | {fiscal_year_end} | {pe:.2f} | {pb:.2f} | {div_yield:.2f}% |")
|
||||
return "\n".join(md)
|
||||
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
|
||||
# Override to pass data_source to _generate_md_company_info
|
||||
# Note: BaseReporter._generate_markdown_content calls _generate_md_company_info
|
||||
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
|
||||
|
||||
md_path = os.path.join(output_dir, "report.md")
|
||||
with open(md_path, "w", encoding='utf-8') as f:
|
||||
@ -88,7 +96,7 @@ class US_ReportGenerator(BaseReporter):
|
||||
headers = self._get_headers(df_for_html)
|
||||
else:
|
||||
headers = []
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
|
||||
final_html = self.to_html(symbol, html_content)
|
||||
|
||||
html_path = os.path.join(output_dir, "report.html")
|
||||
@ -96,7 +104,7 @@ class US_ReportGenerator(BaseReporter):
|
||||
f.write(final_html)
|
||||
print(f"HTML report saved to {html_path}")
|
||||
|
||||
def _build_html_content(self, symbol, metrics, headers, df):
|
||||
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name') or symbol
|
||||
fiscal_year_end = metrics.get('fiscal_year_end') or "-"
|
||||
@ -169,6 +177,7 @@ class US_ReportGenerator(BaseReporter):
|
||||
html_sections = [
|
||||
f"<h1>{name} ({symbol}) - Financial Report</h1>",
|
||||
f"<p><em>Report generated on: {today_str}</em></p>",
|
||||
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
|
||||
company_table,
|
||||
'<div class="table-gap"></div>',
|
||||
metrics_table
|
||||
|
||||
@ -99,7 +99,7 @@ class VN_ReportGenerator(BaseReporter):
|
||||
def _get_headers(self, df):
|
||||
return [self._format_period_label(date_value) for date_value in df['date_str']]
|
||||
|
||||
def _generate_md_company_info(self, symbol, metrics, market):
|
||||
def _generate_md_company_info(self, symbol, metrics, market, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name', '')
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -112,14 +112,18 @@ class VN_ReportGenerator(BaseReporter):
|
||||
div = metrics.get('dividend_yield', 0) or 0
|
||||
md = []
|
||||
md.append(f"# {name} ({symbol}) - Financial Report")
|
||||
md.append(f"*Report generated on: {today_str}*\n")
|
||||
md.append(f"*Report generated on: {today_str}*")
|
||||
if data_source:
|
||||
md.append(f"*Data Source: {data_source}*\n")
|
||||
else:
|
||||
md.append("\n")
|
||||
md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |")
|
||||
md.append("|:---|:---|:---|:---|:---|:---|")
|
||||
md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |")
|
||||
return "\n".join(md)
|
||||
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir):
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics)
|
||||
def generate_report(self, df_analysis, symbol, market, metrics, output_dir, data_source=None):
|
||||
md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics, data_source)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
md_path = os.path.join(output_dir, "report.md")
|
||||
with open(md_path, "w", encoding='utf-8') as f:
|
||||
@ -131,14 +135,15 @@ class VN_ReportGenerator(BaseReporter):
|
||||
headers = self._get_headers(df_for_html)
|
||||
else:
|
||||
headers = []
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html)
|
||||
html_content = self._build_html_content(symbol, metrics, headers, df_for_html, data_source)
|
||||
final_html = self.to_html(symbol, html_content)
|
||||
|
||||
html_path = os.path.join(output_dir, "report.html")
|
||||
with open(html_path, "w", encoding='utf-8') as f:
|
||||
f.write(final_html)
|
||||
print(f"HTML report saved to {html_path}")
|
||||
|
||||
def _build_html_content(self, symbol, metrics, headers, df):
|
||||
def _build_html_content(self, symbol, metrics, headers, df, data_source=None):
|
||||
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
||||
name = metrics.get('name') or symbol
|
||||
raw_list_date = metrics.get('list_date', '')
|
||||
@ -215,6 +220,7 @@ class VN_ReportGenerator(BaseReporter):
|
||||
html_sections = [
|
||||
f"<h1>{name} ({symbol}) - Financial Report</h1>",
|
||||
f"<p><em>Report generated on: {today_str}</em></p>",
|
||||
f"<p><em>Data Source: {data_source}</em></p>" if data_source else "",
|
||||
company_table,
|
||||
'<div class="table-gap"></div>',
|
||||
metrics_table
|
||||
|
||||
@ -52,7 +52,9 @@ class CN_Strategy(BaseStrategy):
|
||||
symbol=self.stock_code,
|
||||
market='CN',
|
||||
metrics=self.raw_data['metrics'],
|
||||
output_dir=output_dir
|
||||
output_dir=output_dir,
|
||||
data_source=getattr(self.fetcher, 'data_source', 'Tushare')
|
||||
)
|
||||
else:
|
||||
print("No analysis result to generate report.")
|
||||
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
|
||||
|
||||
@ -76,7 +76,9 @@ class HK_Strategy(BaseStrategy):
|
||||
symbol=self.stock_code,
|
||||
market='HK',
|
||||
metrics=self.raw_data['metrics'],
|
||||
output_dir=output_dir
|
||||
output_dir=output_dir,
|
||||
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
|
||||
)
|
||||
else:
|
||||
print("No analysis result to generate report.")
|
||||
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
|
||||
|
||||
@ -71,9 +71,11 @@ class JP_Strategy(BaseStrategy):
|
||||
symbol=self.stock_code,
|
||||
market='JP',
|
||||
metrics=self.raw_data['metrics'],
|
||||
output_dir=output_dir
|
||||
output_dir=output_dir,
|
||||
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
|
||||
)
|
||||
else:
|
||||
print("No analysis result to generate report.")
|
||||
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
|
||||
|
||||
import pandas as pd # Import needed for the placeholder DataFrames
|
||||
|
||||
@ -6,10 +6,12 @@ from storage.file_io import DataStorage
|
||||
import os
|
||||
|
||||
class US_Strategy(BaseStrategy):
|
||||
def __init__(self, stock_code, av_key):
|
||||
def __init__(self, stock_code, av_key, data_source=None):
|
||||
super().__init__(stock_code)
|
||||
self.av_key = av_key
|
||||
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key)
|
||||
# If using iFinD for US, we might need IFIND token, but factory handles retrieval from env if needed
|
||||
# We pass av_key as it's the required arg for get_fetcher's av_key param (or we can make it optional in factory call if unused)
|
||||
self.fetcher = FetcherFactory.get_fetcher('US', av_key=self.av_key, data_source=data_source)
|
||||
self.analyzer = US_Analyzer()
|
||||
self.reporter = US_ReportGenerator()
|
||||
self.storage = DataStorage()
|
||||
@ -42,7 +44,9 @@ class US_Strategy(BaseStrategy):
|
||||
symbol=self.stock_code,
|
||||
market='US',
|
||||
metrics=self.raw_data['metrics'],
|
||||
output_dir=output_dir
|
||||
output_dir=output_dir,
|
||||
data_source=getattr(self.fetcher, 'data_source', None)
|
||||
)
|
||||
else:
|
||||
print("No analysis result to generate report.")
|
||||
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
|
||||
|
||||
@ -70,7 +70,9 @@ class VN_Strategy(BaseStrategy):
|
||||
symbol=self.stock_code,
|
||||
market='VN',
|
||||
metrics=self.raw_data['metrics'],
|
||||
output_dir=output_dir
|
||||
output_dir=output_dir,
|
||||
data_source=getattr(self.fetcher, 'data_source', 'iFinD')
|
||||
)
|
||||
else:
|
||||
print("No analysis result to generate report.")
|
||||
raise ValueError(f"Analysis result is empty for {self.stock_code}. Data fetching might have failed.")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user