from .base import BaseDataProvider from typing import Any, Dict, List, Optional import finnhub import pandas as pd from datetime import datetime, timedelta import asyncio import logging import httpx logger = logging.getLogger(__name__) class FinnhubProvider(BaseDataProvider): def _initialize(self): if not self.token: raise ValueError("Finnhub API key not provided.") self.client = finnhub.Client(api_key=self.token) try: masked = f"***{self.token[-4:]}" if isinstance(self.token, str) and len(self.token) >= 4 else "***" logger.info(f"[Finnhub] client initialized (token={masked})") except Exception: # 避免日志失败影响初始化 pass async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]: def _fetch(): try: profile = None try: profile = self.client.company_profile2(symbol=stock_code) logger.debug(f"[Finnhub] SDK company_profile2 ok symbol={stock_code} name={profile.get('name') if isinstance(profile, dict) else None}") except Exception as e: logger.warning(f"[Finnhub] SDK company_profile2 failed for {stock_code}: {e}") # Fallback to direct HTTP if SDK call fails try: resp = httpx.get( 'https://finnhub.io/api/v1/stock/profile2', params={'symbol': stock_code}, headers={'X-Finnhub-Token': self.token}, timeout=20.0, ) logger.debug(f"[Finnhub] HTTP profile2 status={resp.status_code} len={len(resp.text)}") if resp.status_code == 200: profile = resp.json() else: logger.error(f"[Finnhub] HTTP profile2 failed status={resp.status_code} body={resp.text[:200]}") except Exception: profile = None if not profile: return None # Normalize data return { "ts_code": stock_code, "name": profile.get("name"), "area": profile.get("country"), "industry": profile.get("finnhubIndustry"), "exchange": profile.get("exchange"), "ipo_date": profile.get("ipo"), } except Exception as e: logger.error(f"Finnhub get_stock_basic failed for {stock_code}: {e}") return None loop = asyncio.get_event_loop() return await loop.run_in_executor(None, _fetch) async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]: def _fetch(): try: start_ts = int(datetime.strptime(start_date, '%Y%m%d').timestamp()) end_ts = int(datetime.strptime(end_date, '%Y%m%d').timestamp()) logger.debug(f"[Finnhub] stock_candles symbol={stock_code} D {start_date}->{end_date}") res = self.client.stock_candles(stock_code, 'D', start_ts, end_ts) if res.get('s') != 'ok': try: logger.warning(f"[Finnhub] stock_candles not ok symbol={stock_code} status={res.get('s')}") except Exception: pass return [] df = pd.DataFrame(res) if df.empty: return [] # Normalize data df['trade_date'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%Y%m%d') df.rename(columns={ 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'vol' }, inplace=True) return df[['trade_date', 'open', 'high', 'low', 'close', 'vol']].to_dict('records') except Exception as e: logger.error(f"Finnhub get_daily_price failed for {stock_code}: {e}") return [] loop = asyncio.get_event_loop() return await loop.run_in_executor(None, _fetch) async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]: def _fetch(): try: # 1) 拉取年度报表(financials_reported, annual) res = None try: res = self.client.financials_reported(symbol=stock_code, freq='annual') except Exception as e: logger.warning(f"[Finnhub] SDK financials_reported failed for {stock_code}: {e}") # Fallback: direct HTTP try: r = httpx.get( 'https://finnhub.io/api/v1/stock/financials-reported', params={'symbol': stock_code, 'freq': 'annual'}, headers={'X-Finnhub-Token': self.token}, timeout=30.0, ) logger.debug(f"[Finnhub] HTTP financials-reported status={r.status_code} len={len(r.text)}") if r.status_code == 200: res = r.json() else: logger.error(f"[Finnhub] HTTP financials-reported failed status={r.status_code} body={r.text[:300]}") except Exception: res = None if not res or not res.get('data'): logger.warning(f"[Finnhub] financials-reported empty for {stock_code}") return [] df = pd.DataFrame(res['data']) if df.empty: logger.warning(f"[Finnhub] financials-reported dataframe empty for {stock_code}") return [] # 2) 仅保留请求的年份 years_to_fetch = {str(date)[:4] for date in report_dates} logger.debug(f"[Finnhub] filter years {sorted(list(years_to_fetch))} before={len(df)}") if 'year' in df.columns: df = df[df['year'].astype(str).isin(years_to_fetch)] # 兜底:如果缺失 year 列,则用 endDate 推断 if 'year' not in df.columns and 'endDate' in df.columns: df = df[df['endDate'].astype(str).str[:4].isin(years_to_fetch)] if df.empty: logger.warning(f"[Finnhub] financials-reported no rows after filter for {stock_code}") return [] def _normalize_key(s: Optional[str]) -> str: if not isinstance(s, str): return "" return ''.join(ch.lower() for ch in s if ch.isalnum()) def pick(report_block: List[Dict[str, Any]], concept_candidates: List[str], label_candidates: List[str] = []) -> Optional[float]: if not report_block: return None try: by_concept = { _normalize_key(item.get('concept')): item.get('value') for item in report_block if isinstance(item, dict) } by_label = { _normalize_key(item.get('label')): item.get('value') for item in report_block if isinstance(item, dict) } except Exception: return None for key in concept_candidates: v = by_concept.get(_normalize_key(key)) if v is not None: try: return float(v) except Exception: continue for key in label_candidates: v = by_label.get(_normalize_key(key)) if v is not None: try: return float(v) except Exception: continue return None # 3) 遍历年度记录,展开并标准化字段名 flat_reports: List[Dict[str, Any]] = [] for _, row in df.iterrows(): bs = (row.get('report') or {}).get('bs', []) ic = (row.get('report') or {}).get('ic', []) cf = (row.get('report') or {}).get('cf', []) end_date = str(row.get('endDate') or '') revenue = pick( ic, concept_candidates=['Revenues', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenue', 'RevenuesNet', 'SalesRevenueGoodsNet'], label_candidates=['Total revenue', 'Revenue', 'Sales revenue'] ) net_income = pick( ic, concept_candidates=['NetIncomeLoss', 'ProfitLoss', 'NetIncomeLossAvailableToCommonStockholdersBasic', 'NetIncomeLossAvailableToCommonStockholdersDiluted'], label_candidates=['Net income', 'Net income (loss)'] ) gross_profit = pick( ic, concept_candidates=['GrossProfit'], label_candidates=['Gross profit'] ) total_assets = pick( bs, concept_candidates=['Assets', 'AssetsTotal', 'AssetsCurrentAndNoncurrent', 'AssetsIncludingAssetsMeasuredAtFairValue'], label_candidates=['Total assets'] ) total_equity = pick( bs, concept_candidates=['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest', 'StockholdersEquity', 'StockholdersEquityTotal', 'Equity'], label_candidates=['Total equity', "Stockholders' equity"] ) goodwill = pick( bs, concept_candidates=['Goodwill', 'GoodwillAndIntangibleAssets'], label_candidates=['Goodwill', 'Goodwill and intangible assets'] ) n_cashflow_act = pick( cf, concept_candidates=['NetCashProvidedByUsedInOperatingActivities', 'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations', 'NetCashFlowOperating'], label_candidates=['Net cash provided by operating activities'] ) capex = pick( cf, concept_candidates=['CapitalExpenditures', 'PaymentsToAcquirePropertyPlantAndEquipment', 'PaymentsToAcquireProductiveAssets'], label_candidates=['Capital expenditures'] ) # 计算衍生指标 free_cash_flow = None if isinstance(n_cashflow_act, (int, float)) and isinstance(capex, (int, float)): free_cash_flow = n_cashflow_act - capex normalized = { # 基本元字段 'ts_code': stock_code, 'end_date': end_date, # DataManager 会从这里抽取 year # 标准命名(见 financial_data_dictionary) 'revenue': revenue, 'n_income': net_income, 'gross_profit': gross_profit, 'total_assets': total_assets, 'total_hldr_eqy_exc_min_int': total_equity, 'goodwill': goodwill, 'n_cashflow_act': n_cashflow_act, 'c_pay_acq_const_fiolta': capex, '__free_cash_flow': free_cash_flow, } # 一些常用比率(若有足够数据则计算),命名对齐文档 if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(gross_profit, (int, float)): normalized['grossprofit_margin'] = gross_profit / revenue if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(net_income, (int, float)): normalized['netprofit_margin'] = net_income / revenue if isinstance(total_assets, (int, float)) and total_assets > 0 and isinstance(net_income, (int, float)): normalized['roa'] = net_income / total_assets if isinstance(total_equity, (int, float)) and total_equity > 0 and isinstance(net_income, (int, float)): normalized['roe'] = net_income / total_equity flat_reports.append(normalized) try: logger.debug( f"[Finnhub] row endDate={end_date} revenue={revenue} net_income={net_income} gross_profit={gross_profit} " f"assets={total_assets} equity={total_equity} goodwill={goodwill} n_cfo={n_cashflow_act} capex={capex}" ) except Exception: pass # Convert flat reports to series dict directly to match DataManager expected format series: Dict[str, List[Dict[str, Any]]] = {} for report in flat_reports: end_date = str(report.get('end_date') or '') year = end_date[:4] if len(end_date) >= 4 else None if not year: continue period = f"{year}1231" for key, value in report.items(): if key in ['ts_code', 'end_date']: continue # Only collect numeric values try: if value is None: continue num = float(value) except Exception: continue if key not in series: series[key] = [] # Avoid duplicate period entries exists = any(dp.get('period') == period for dp in series[key]) if not exists: series[key].append({'period': period, 'value': num}) try: total_points = sum(len(v) for v in series.values()) logger.info(f"[Finnhub] built series for {stock_code} keys={len(series)} points={total_points}") except Exception: pass return series except Exception as e: logger.error(f"Finnhub get_financial_statements failed for {stock_code}: {e}") return [] loop = asyncio.get_event_loop() return await loop.run_in_executor(None, _fetch)