- Covered by data-persistence-service tests (db/api). - No references or compose entries.
311 lines
15 KiB
Python
311 lines
15 KiB
Python
from .base import BaseDataProvider
|
||
from typing import Any, Dict, List, Optional
|
||
import finnhub
|
||
import pandas as pd
|
||
from datetime import datetime, timedelta
|
||
import asyncio
|
||
import logging
|
||
import httpx
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class FinnhubProvider(BaseDataProvider):
|
||
|
||
def _initialize(self):
|
||
if not self.token:
|
||
raise ValueError("Finnhub API key not provided.")
|
||
self.client = finnhub.Client(api_key=self.token)
|
||
try:
|
||
masked = f"***{self.token[-4:]}" if isinstance(self.token, str) and len(self.token) >= 4 else "***"
|
||
logger.info(f"[Finnhub] client initialized (token={masked})")
|
||
except Exception:
|
||
# 避免日志失败影响初始化
|
||
pass
|
||
|
||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||
def _fetch():
|
||
try:
|
||
profile = None
|
||
try:
|
||
profile = self.client.company_profile2(symbol=stock_code)
|
||
logger.debug(f"[Finnhub] SDK company_profile2 ok symbol={stock_code} name={profile.get('name') if isinstance(profile, dict) else None}")
|
||
except Exception as e:
|
||
logger.warning(f"[Finnhub] SDK company_profile2 failed for {stock_code}: {e}")
|
||
# Fallback to direct HTTP if SDK call fails
|
||
try:
|
||
resp = httpx.get(
|
||
'https://finnhub.io/api/v1/stock/profile2',
|
||
params={'symbol': stock_code},
|
||
headers={'X-Finnhub-Token': self.token},
|
||
timeout=20.0,
|
||
)
|
||
logger.debug(f"[Finnhub] HTTP profile2 status={resp.status_code} len={len(resp.text)}")
|
||
if resp.status_code == 200:
|
||
profile = resp.json()
|
||
else:
|
||
logger.error(f"[Finnhub] HTTP profile2 failed status={resp.status_code} body={resp.text[:200]}")
|
||
except Exception:
|
||
profile = None
|
||
if not profile:
|
||
return None
|
||
|
||
# Normalize data
|
||
return {
|
||
"ts_code": stock_code,
|
||
"name": profile.get("name"),
|
||
"area": profile.get("country"),
|
||
"industry": profile.get("finnhubIndustry"),
|
||
"exchange": profile.get("exchange"),
|
||
"ipo_date": profile.get("ipo"),
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"Finnhub get_stock_basic failed for {stock_code}: {e}")
|
||
return None
|
||
|
||
loop = asyncio.get_event_loop()
|
||
return await loop.run_in_executor(None, _fetch)
|
||
|
||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||
def _fetch():
|
||
try:
|
||
start_ts = int(datetime.strptime(start_date, '%Y%m%d').timestamp())
|
||
end_ts = int(datetime.strptime(end_date, '%Y%m%d').timestamp())
|
||
|
||
logger.debug(f"[Finnhub] stock_candles symbol={stock_code} D {start_date}->{end_date}")
|
||
res = self.client.stock_candles(stock_code, 'D', start_ts, end_ts)
|
||
if res.get('s') != 'ok':
|
||
try:
|
||
logger.warning(f"[Finnhub] stock_candles not ok symbol={stock_code} status={res.get('s')}")
|
||
except Exception:
|
||
pass
|
||
return []
|
||
|
||
df = pd.DataFrame(res)
|
||
if df.empty:
|
||
return []
|
||
|
||
# Normalize data
|
||
df['trade_date'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%Y%m%d')
|
||
df.rename(columns={
|
||
'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'vol'
|
||
}, inplace=True)
|
||
|
||
return df[['trade_date', 'open', 'high', 'low', 'close', 'vol']].to_dict('records')
|
||
|
||
except Exception as e:
|
||
logger.error(f"Finnhub get_daily_price failed for {stock_code}: {e}")
|
||
return []
|
||
|
||
loop = asyncio.get_event_loop()
|
||
return await loop.run_in_executor(None, _fetch)
|
||
|
||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]:
|
||
def _fetch():
|
||
try:
|
||
# 1) 拉取年度报表(financials_reported, annual)
|
||
res = None
|
||
try:
|
||
res = self.client.financials_reported(symbol=stock_code, freq='annual')
|
||
except Exception as e:
|
||
logger.warning(f"[Finnhub] SDK financials_reported failed for {stock_code}: {e}")
|
||
# Fallback: direct HTTP
|
||
try:
|
||
r = httpx.get(
|
||
'https://finnhub.io/api/v1/stock/financials-reported',
|
||
params={'symbol': stock_code, 'freq': 'annual'},
|
||
headers={'X-Finnhub-Token': self.token},
|
||
timeout=30.0,
|
||
)
|
||
logger.debug(f"[Finnhub] HTTP financials-reported status={r.status_code} len={len(r.text)}")
|
||
if r.status_code == 200:
|
||
res = r.json()
|
||
else:
|
||
logger.error(f"[Finnhub] HTTP financials-reported failed status={r.status_code} body={r.text[:300]}")
|
||
except Exception:
|
||
res = None
|
||
if not res or not res.get('data'):
|
||
logger.warning(f"[Finnhub] financials-reported empty for {stock_code}")
|
||
return []
|
||
|
||
df = pd.DataFrame(res['data'])
|
||
if df.empty:
|
||
logger.warning(f"[Finnhub] financials-reported dataframe empty for {stock_code}")
|
||
return []
|
||
|
||
# 2) 仅保留请求的年份
|
||
years_to_fetch = {str(date)[:4] for date in report_dates}
|
||
logger.debug(f"[Finnhub] filter years {sorted(list(years_to_fetch))} before={len(df)}")
|
||
if 'year' in df.columns:
|
||
df = df[df['year'].astype(str).isin(years_to_fetch)]
|
||
# 兜底:如果缺失 year 列,则用 endDate 推断
|
||
if 'year' not in df.columns and 'endDate' in df.columns:
|
||
df = df[df['endDate'].astype(str).str[:4].isin(years_to_fetch)]
|
||
|
||
if df.empty:
|
||
logger.warning(f"[Finnhub] financials-reported no rows after filter for {stock_code}")
|
||
return []
|
||
|
||
def _normalize_key(s: Optional[str]) -> str:
|
||
if not isinstance(s, str):
|
||
return ""
|
||
return ''.join(ch.lower() for ch in s if ch.isalnum())
|
||
|
||
def pick(report_block: List[Dict[str, Any]], concept_candidates: List[str], label_candidates: List[str] = []) -> Optional[float]:
|
||
if not report_block:
|
||
return None
|
||
try:
|
||
by_concept = { _normalize_key(item.get('concept')): item.get('value') for item in report_block if isinstance(item, dict) }
|
||
by_label = { _normalize_key(item.get('label')): item.get('value') for item in report_block if isinstance(item, dict) }
|
||
except Exception:
|
||
return None
|
||
for key in concept_candidates:
|
||
v = by_concept.get(_normalize_key(key))
|
||
if v is not None:
|
||
try:
|
||
return float(v)
|
||
except Exception:
|
||
continue
|
||
for key in label_candidates:
|
||
v = by_label.get(_normalize_key(key))
|
||
if v is not None:
|
||
try:
|
||
return float(v)
|
||
except Exception:
|
||
continue
|
||
return None
|
||
|
||
# 3) 遍历年度记录,展开并标准化字段名
|
||
flat_reports: List[Dict[str, Any]] = []
|
||
for _, row in df.iterrows():
|
||
bs = (row.get('report') or {}).get('bs', [])
|
||
ic = (row.get('report') or {}).get('ic', [])
|
||
cf = (row.get('report') or {}).get('cf', [])
|
||
|
||
end_date = str(row.get('endDate') or '')
|
||
|
||
revenue = pick(
|
||
ic,
|
||
concept_candidates=['Revenues', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenue', 'RevenuesNet', 'SalesRevenueGoodsNet'],
|
||
label_candidates=['Total revenue', 'Revenue', 'Sales revenue']
|
||
)
|
||
net_income = pick(
|
||
ic,
|
||
concept_candidates=['NetIncomeLoss', 'ProfitLoss', 'NetIncomeLossAvailableToCommonStockholdersBasic', 'NetIncomeLossAvailableToCommonStockholdersDiluted'],
|
||
label_candidates=['Net income', 'Net income (loss)']
|
||
)
|
||
gross_profit = pick(
|
||
ic,
|
||
concept_candidates=['GrossProfit'],
|
||
label_candidates=['Gross profit']
|
||
)
|
||
|
||
total_assets = pick(
|
||
bs,
|
||
concept_candidates=['Assets', 'AssetsTotal', 'AssetsCurrentAndNoncurrent', 'AssetsIncludingAssetsMeasuredAtFairValue'],
|
||
label_candidates=['Total assets']
|
||
)
|
||
total_equity = pick(
|
||
bs,
|
||
concept_candidates=['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest', 'StockholdersEquity', 'StockholdersEquityTotal', 'Equity'],
|
||
label_candidates=['Total equity', "Stockholders' equity"]
|
||
)
|
||
goodwill = pick(
|
||
bs,
|
||
concept_candidates=['Goodwill', 'GoodwillAndIntangibleAssets'],
|
||
label_candidates=['Goodwill', 'Goodwill and intangible assets']
|
||
)
|
||
|
||
n_cashflow_act = pick(
|
||
cf,
|
||
concept_candidates=['NetCashProvidedByUsedInOperatingActivities', 'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations', 'NetCashFlowOperating'],
|
||
label_candidates=['Net cash provided by operating activities']
|
||
)
|
||
capex = pick(
|
||
cf,
|
||
concept_candidates=['CapitalExpenditures', 'PaymentsToAcquirePropertyPlantAndEquipment', 'PaymentsToAcquireProductiveAssets'],
|
||
label_candidates=['Capital expenditures']
|
||
)
|
||
|
||
# 计算衍生指标
|
||
free_cash_flow = None
|
||
if isinstance(n_cashflow_act, (int, float)) and isinstance(capex, (int, float)):
|
||
free_cash_flow = n_cashflow_act - capex
|
||
|
||
normalized = {
|
||
# 基本元字段
|
||
'ts_code': stock_code,
|
||
'end_date': end_date, # DataManager 会从这里抽取 year
|
||
|
||
# 标准命名(见 financial_data_dictionary)
|
||
'revenue': revenue,
|
||
'n_income': net_income,
|
||
'gross_profit': gross_profit,
|
||
|
||
'total_assets': total_assets,
|
||
'total_hldr_eqy_exc_min_int': total_equity,
|
||
'goodwill': goodwill,
|
||
|
||
'n_cashflow_act': n_cashflow_act,
|
||
'c_pay_acq_const_fiolta': capex,
|
||
'__free_cash_flow': free_cash_flow,
|
||
}
|
||
|
||
# 一些常用比率(若有足够数据则计算),命名对齐文档
|
||
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(gross_profit, (int, float)):
|
||
normalized['grossprofit_margin'] = gross_profit / revenue
|
||
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(net_income, (int, float)):
|
||
normalized['netprofit_margin'] = net_income / revenue
|
||
if isinstance(total_assets, (int, float)) and total_assets > 0 and isinstance(net_income, (int, float)):
|
||
normalized['roa'] = net_income / total_assets
|
||
if isinstance(total_equity, (int, float)) and total_equity > 0 and isinstance(net_income, (int, float)):
|
||
normalized['roe'] = net_income / total_equity
|
||
|
||
flat_reports.append(normalized)
|
||
try:
|
||
logger.debug(
|
||
f"[Finnhub] row endDate={end_date} revenue={revenue} net_income={net_income} gross_profit={gross_profit} "
|
||
f"assets={total_assets} equity={total_equity} goodwill={goodwill} n_cfo={n_cashflow_act} capex={capex}"
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# Convert flat reports to series dict directly to match DataManager expected format
|
||
series: Dict[str, List[Dict[str, Any]]] = {}
|
||
for report in flat_reports:
|
||
end_date = str(report.get('end_date') or '')
|
||
year = end_date[:4] if len(end_date) >= 4 else None
|
||
if not year:
|
||
continue
|
||
period = f"{year}1231"
|
||
|
||
for key, value in report.items():
|
||
if key in ['ts_code', 'end_date']:
|
||
continue
|
||
# Only collect numeric values
|
||
try:
|
||
if value is None:
|
||
continue
|
||
num = float(value)
|
||
except Exception:
|
||
continue
|
||
if key not in series:
|
||
series[key] = []
|
||
# Avoid duplicate period entries
|
||
exists = any(dp.get('period') == period for dp in series[key])
|
||
if not exists:
|
||
series[key].append({'period': period, 'value': num})
|
||
|
||
try:
|
||
total_points = sum(len(v) for v in series.values())
|
||
logger.info(f"[Finnhub] built series for {stock_code} keys={len(series)} points={total_points}")
|
||
except Exception:
|
||
pass
|
||
return series
|
||
|
||
except Exception as e:
|
||
logger.error(f"Finnhub get_financial_statements failed for {stock_code}: {e}")
|
||
return []
|
||
|
||
loop = asyncio.get_event_loop()
|
||
return await loop.run_in_executor(None, _fetch)
|