Fundamental_Analysis/backend/app/data_providers/finnhub.py

311 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from .base import BaseDataProvider
from typing import Any, Dict, List, Optional
import finnhub
import pandas as pd
from datetime import datetime, timedelta
import asyncio
import logging
import httpx
logger = logging.getLogger(__name__)
class FinnhubProvider(BaseDataProvider):
def _initialize(self):
if not self.token:
raise ValueError("Finnhub API key not provided.")
self.client = finnhub.Client(api_key=self.token)
try:
masked = f"***{self.token[-4:]}" if isinstance(self.token, str) and len(self.token) >= 4 else "***"
logger.info(f"[Finnhub] client initialized (token={masked})")
except Exception:
# 避免日志失败影响初始化
pass
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
def _fetch():
try:
profile = None
try:
profile = self.client.company_profile2(symbol=stock_code)
logger.debug(f"[Finnhub] SDK company_profile2 ok symbol={stock_code} name={profile.get('name') if isinstance(profile, dict) else None}")
except Exception as e:
logger.warning(f"[Finnhub] SDK company_profile2 failed for {stock_code}: {e}")
# Fallback to direct HTTP if SDK call fails
try:
resp = httpx.get(
'https://finnhub.io/api/v1/stock/profile2',
params={'symbol': stock_code},
headers={'X-Finnhub-Token': self.token},
timeout=20.0,
)
logger.debug(f"[Finnhub] HTTP profile2 status={resp.status_code} len={len(resp.text)}")
if resp.status_code == 200:
profile = resp.json()
else:
logger.error(f"[Finnhub] HTTP profile2 failed status={resp.status_code} body={resp.text[:200]}")
except Exception:
profile = None
if not profile:
return None
# Normalize data
return {
"ts_code": stock_code,
"name": profile.get("name"),
"area": profile.get("country"),
"industry": profile.get("finnhubIndustry"),
"exchange": profile.get("exchange"),
"ipo_date": profile.get("ipo"),
}
except Exception as e:
logger.error(f"Finnhub get_stock_basic failed for {stock_code}: {e}")
return None
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _fetch)
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
def _fetch():
try:
start_ts = int(datetime.strptime(start_date, '%Y%m%d').timestamp())
end_ts = int(datetime.strptime(end_date, '%Y%m%d').timestamp())
logger.debug(f"[Finnhub] stock_candles symbol={stock_code} D {start_date}->{end_date}")
res = self.client.stock_candles(stock_code, 'D', start_ts, end_ts)
if res.get('s') != 'ok':
try:
logger.warning(f"[Finnhub] stock_candles not ok symbol={stock_code} status={res.get('s')}")
except Exception:
pass
return []
df = pd.DataFrame(res)
if df.empty:
return []
# Normalize data
df['trade_date'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%Y%m%d')
df.rename(columns={
'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'vol'
}, inplace=True)
return df[['trade_date', 'open', 'high', 'low', 'close', 'vol']].to_dict('records')
except Exception as e:
logger.error(f"Finnhub get_daily_price failed for {stock_code}: {e}")
return []
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _fetch)
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]:
def _fetch():
try:
# 1) 拉取年度报表financials_reported, annual
res = None
try:
res = self.client.financials_reported(symbol=stock_code, freq='annual')
except Exception as e:
logger.warning(f"[Finnhub] SDK financials_reported failed for {stock_code}: {e}")
# Fallback: direct HTTP
try:
r = httpx.get(
'https://finnhub.io/api/v1/stock/financials-reported',
params={'symbol': stock_code, 'freq': 'annual'},
headers={'X-Finnhub-Token': self.token},
timeout=30.0,
)
logger.debug(f"[Finnhub] HTTP financials-reported status={r.status_code} len={len(r.text)}")
if r.status_code == 200:
res = r.json()
else:
logger.error(f"[Finnhub] HTTP financials-reported failed status={r.status_code} body={r.text[:300]}")
except Exception:
res = None
if not res or not res.get('data'):
logger.warning(f"[Finnhub] financials-reported empty for {stock_code}")
return []
df = pd.DataFrame(res['data'])
if df.empty:
logger.warning(f"[Finnhub] financials-reported dataframe empty for {stock_code}")
return []
# 2) 仅保留请求的年份
years_to_fetch = {str(date)[:4] for date in report_dates}
logger.debug(f"[Finnhub] filter years {sorted(list(years_to_fetch))} before={len(df)}")
if 'year' in df.columns:
df = df[df['year'].astype(str).isin(years_to_fetch)]
# 兜底:如果缺失 year 列,则用 endDate 推断
if 'year' not in df.columns and 'endDate' in df.columns:
df = df[df['endDate'].astype(str).str[:4].isin(years_to_fetch)]
if df.empty:
logger.warning(f"[Finnhub] financials-reported no rows after filter for {stock_code}")
return []
def _normalize_key(s: Optional[str]) -> str:
if not isinstance(s, str):
return ""
return ''.join(ch.lower() for ch in s if ch.isalnum())
def pick(report_block: List[Dict[str, Any]], concept_candidates: List[str], label_candidates: List[str] = []) -> Optional[float]:
if not report_block:
return None
try:
by_concept = { _normalize_key(item.get('concept')): item.get('value') for item in report_block if isinstance(item, dict) }
by_label = { _normalize_key(item.get('label')): item.get('value') for item in report_block if isinstance(item, dict) }
except Exception:
return None
for key in concept_candidates:
v = by_concept.get(_normalize_key(key))
if v is not None:
try:
return float(v)
except Exception:
continue
for key in label_candidates:
v = by_label.get(_normalize_key(key))
if v is not None:
try:
return float(v)
except Exception:
continue
return None
# 3) 遍历年度记录,展开并标准化字段名
flat_reports: List[Dict[str, Any]] = []
for _, row in df.iterrows():
bs = (row.get('report') or {}).get('bs', [])
ic = (row.get('report') or {}).get('ic', [])
cf = (row.get('report') or {}).get('cf', [])
end_date = str(row.get('endDate') or '')
revenue = pick(
ic,
concept_candidates=['Revenues', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenue', 'RevenuesNet', 'SalesRevenueGoodsNet'],
label_candidates=['Total revenue', 'Revenue', 'Sales revenue']
)
net_income = pick(
ic,
concept_candidates=['NetIncomeLoss', 'ProfitLoss', 'NetIncomeLossAvailableToCommonStockholdersBasic', 'NetIncomeLossAvailableToCommonStockholdersDiluted'],
label_candidates=['Net income', 'Net income (loss)']
)
gross_profit = pick(
ic,
concept_candidates=['GrossProfit'],
label_candidates=['Gross profit']
)
total_assets = pick(
bs,
concept_candidates=['Assets', 'AssetsTotal', 'AssetsCurrentAndNoncurrent', 'AssetsIncludingAssetsMeasuredAtFairValue'],
label_candidates=['Total assets']
)
total_equity = pick(
bs,
concept_candidates=['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest', 'StockholdersEquity', 'StockholdersEquityTotal', 'Equity'],
label_candidates=['Total equity', "Stockholders' equity"]
)
goodwill = pick(
bs,
concept_candidates=['Goodwill', 'GoodwillAndIntangibleAssets'],
label_candidates=['Goodwill', 'Goodwill and intangible assets']
)
n_cashflow_act = pick(
cf,
concept_candidates=['NetCashProvidedByUsedInOperatingActivities', 'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations', 'NetCashFlowOperating'],
label_candidates=['Net cash provided by operating activities']
)
capex = pick(
cf,
concept_candidates=['CapitalExpenditures', 'PaymentsToAcquirePropertyPlantAndEquipment', 'PaymentsToAcquireProductiveAssets'],
label_candidates=['Capital expenditures']
)
# 计算衍生指标
free_cash_flow = None
if isinstance(n_cashflow_act, (int, float)) and isinstance(capex, (int, float)):
free_cash_flow = n_cashflow_act - capex
normalized = {
# 基本元字段
'ts_code': stock_code,
'end_date': end_date, # DataManager 会从这里抽取 year
# 标准命名(见 financial_data_dictionary
'revenue': revenue,
'n_income': net_income,
'gross_profit': gross_profit,
'total_assets': total_assets,
'total_hldr_eqy_exc_min_int': total_equity,
'goodwill': goodwill,
'n_cashflow_act': n_cashflow_act,
'c_pay_acq_const_fiolta': capex,
'__free_cash_flow': free_cash_flow,
}
# 一些常用比率(若有足够数据则计算),命名对齐文档
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(gross_profit, (int, float)):
normalized['grossprofit_margin'] = gross_profit / revenue
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(net_income, (int, float)):
normalized['netprofit_margin'] = net_income / revenue
if isinstance(total_assets, (int, float)) and total_assets > 0 and isinstance(net_income, (int, float)):
normalized['roa'] = net_income / total_assets
if isinstance(total_equity, (int, float)) and total_equity > 0 and isinstance(net_income, (int, float)):
normalized['roe'] = net_income / total_equity
flat_reports.append(normalized)
try:
logger.debug(
f"[Finnhub] row endDate={end_date} revenue={revenue} net_income={net_income} gross_profit={gross_profit} "
f"assets={total_assets} equity={total_equity} goodwill={goodwill} n_cfo={n_cashflow_act} capex={capex}"
)
except Exception:
pass
# Convert flat reports to series dict directly to match DataManager expected format
series: Dict[str, List[Dict[str, Any]]] = {}
for report in flat_reports:
end_date = str(report.get('end_date') or '')
year = end_date[:4] if len(end_date) >= 4 else None
if not year:
continue
period = f"{year}1231"
for key, value in report.items():
if key in ['ts_code', 'end_date']:
continue
# Only collect numeric values
try:
if value is None:
continue
num = float(value)
except Exception:
continue
if key not in series:
series[key] = []
# Avoid duplicate period entries
exists = any(dp.get('period') == period for dp in series[key])
if not exists:
series[key].append({'period': period, 'value': num})
try:
total_points = sum(len(v) for v in series.values())
logger.info(f"[Finnhub] built series for {stock_code} keys={len(series)} points={total_points}")
except Exception:
pass
return series
except Exception as e:
logger.error(f"Finnhub get_financial_statements failed for {stock_code}: {e}")
return []
loop = asyncio.get_event_loop()
return await loop.run_in_executor(None, _fetch)