import pandas as pd import os import time from .base import DataFetcher from .ifind_client import IFindClient from storage.file_io import DataStorage class JpFetcher(DataFetcher): def __init__(self, api_key: str): # api_key is the iFinD Refresh Token super().__init__(api_key) self.cli = IFindClient(refresh_token=api_key) self.storage = DataStorage() self._basic_info_cache = {} def _get_ifind_code(self, symbol: str) -> str: """保持逻辑一致性,如果是纯数字则补齐后缀 .T,否则直接传""" if symbol.isdigit(): return f"{symbol}.T" return symbol def _fetch_basic_info(self, symbol: str) -> dict: """获取公司的基本信息:中文名称、会计年结日、上市日期""" code = self._get_ifind_code(symbol) if code in self._basic_info_cache: return self._basic_info_cache[code] params = { "codes": code, "indipara": [ {"indicator": "corp_cn_name", "indiparams": []}, {"indicator": "accounting_date", "indiparams": []}, {"indicator": "ipo_date", "indiparams": []} ] } # print(f"iFinD API Request: endpoint=basic_data_service, params={params}") res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty: self._save_raw_data(df, symbol, "basic_info_raw") info = { "name": "", "accounting_date": "1231", # 默认 12-31 "ipo_date": "" } if not df.empty: row = df.iloc[0] info["name"] = str(row.get("corp_cn_name", "")) # accounting_date 通常返回类似 "03-31" 或 "1231" acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "") # 好像是ifind的API有问题,明明财报是0331,但如果去读20240331,就是空数据 # if acc_date: # info["accounting_date"] = acc_date info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "") self._basic_info_cache[code] = info return info def _save_raw_data(self, data: any, symbol: str, name: str): if data is None: return # 如果是字典(API 响应),直接保存 if isinstance(data, dict): df = pd.DataFrame([data]) # 包装成单行 DF 或简单处理 else: df = data self.storage.save_data(df, 'JP', symbol, f"raw_{name}") def _parse_ifind_tables(self, res: dict) -> pd.DataFrame: """通用解析 iFinD 返回结果的 tables 结构为 DataFrame""" if not res: return pd.DataFrame() if res.get("errorcode") != 0: print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})") return pd.DataFrame() tables = res.get("tables", []) if not tables: print("iFinD API Warning: No tables found in response.") return pd.DataFrame() # 提取第一个 table table_info = tables[0] table_data = table_info.get("table", {}) times = table_info.get("time", []) if not table_data: return pd.DataFrame() # Ensure all values are lists to avoid pd.DataFrame ValueError with scalars processed_table_data = {} for k, v in table_data.items(): if not isinstance(v, list): processed_table_data[k] = [v] else: processed_table_data[k] = v df = pd.DataFrame(processed_table_data) if times and len(times) == len(df): df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times] elif times and len(df) == 1: df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0] # If still no end_date, look for it in columns if 'end_date' not in df.columns: for col in ['time', 'date', 'trade_date', 'REPORT_DATE']: if col in df.columns: df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0] break return df def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame: if df.empty or 'end_date' not in df.columns: return df df = df.sort_values(by='end_date', ascending=False) df = df.drop_duplicates(subset=['end_date'], keep='first') if df.empty: return df latest_record = df.iloc[[0]] try: latest_date_str = str(latest_record['end_date'].values[0]) # Handle YoY logic: YYYYMMDD -> (YYYY-1)MMDD last_year_date_str = str(int(latest_date_str) - 10000) comparable_record = df[df['end_date'].astype(str) == last_year_date_str] except: comparable_record = pd.DataFrame() # 对齐 CN 逻辑,日本公司虽然多是 0331 截止 is_annual = df['end_date'].astype(str).str.endswith('0331') | df['end_date'].astype(str).str.endswith('1231') annual_records = df[is_annual] combined = pd.concat([latest_record, comparable_record, annual_records]) combined = combined.drop_duplicates(subset=['end_date']) combined = combined.sort_values(by='end_date', ascending=False) return combined def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame: """通用获取历年会计年结日的财务数据 (CNY 结算)""" code = self._get_ifind_code(symbol) basic_info = self._fetch_basic_info(symbol) acc_date = basic_info.get("accounting_date", "1231") current_year = int(time.strftime("%Y")) # 1. First, determine the most recent valid year by trying backwards from current year last_valid_year = None # Try up to 3 years back to find the latest available report for offset in range(3): test_year = current_year - offset test_date = f"{test_year}{acc_date}" # Use the first indicator to test availability first_indicator = indicator_configs[0] params = { "codes": code, "indipara": [ {"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]} ] } res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty: # Check for non-null values valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None if pd.notna(valid_val) and valid_val != 0: last_valid_year = test_year break if last_valid_year is None: last_valid_year = current_year all_dfs = [] # 2. Fetch 5 years starting from the last valid year for i in range(5): target_year = last_valid_year - i target_date = f"{target_year}{acc_date}" params = { "codes": code, "indipara": [ {"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]} for item in indicator_configs ] } # print(f"iFinD API Request: endpoint=basic_data_service, params={params}") res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty: # 强制设置 end_date 以防 API 返回不一致 df['end_date'] = target_date all_dfs.append(df) if not all_dfs: return pd.DataFrame() # Remove empty or Check for all-NA columns DataFrames (Fixing FutureWarning) all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()] if not all_dfs: return pd.DataFrame() return pd.concat(all_dfs, ignore_index=True) def get_income_statement(self, symbol: str) -> pd.DataFrame: indicators = [ {"indicator": "revenue_oas"}, {"indicator": "gross_profit_oas"}, {"indicator": "sga_expenses_oas"}, {"indicator": "selling_marketing_expenses_oas"}, {"indicator": "ga_expenses_oas"}, {"indicator": "rd_expenses_oas"}, {"indicator": "income_tax_expense_oas"}, {"indicator": "net_income_attri_to_common_sh_oas"}, {"indicator": "operating_income_oas"} ] df = self._fetch_financial_data_annual(symbol, indicators) if df.empty: return df self._save_raw_data(df, symbol, "income_statement_raw") rename_map = { 'revenue_oas': 'revenue', 'gross_profit_oas': 'gross_profit', 'sga_expenses_oas': 'sga_exp', 'selling_marketing_expenses_oas': 'selling_marketing_exp', 'ga_expenses_oas': 'ga_exp', 'rd_expenses_oas': 'rd_exp', 'income_tax_expense_oas': 'income_tax', 'net_income_attri_to_common_sh_oas': 'net_income', 'operating_income_oas': 'operating_profit' } df_filtered = df.rename(columns=rename_map) # 数值转换 for col in df_filtered.columns: if col not in ['date', 'end_date']: df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') return self._filter_data(df_filtered) def get_balance_sheet(self, symbol: str) -> pd.DataFrame: indicators = [ {"indicator": "cash_equi_short_term_inve_oas"}, {"indicator": "accou_and_notes_recei_oas"}, {"indicator": "inventories_oas"}, {"indicator": "ppe_net_oas"}, {"indicator": "long_term_inv_and_receiv_oas"}, {"indicator": "goodwill_and_intasset_oas"}, {"indicator": "short_term_debt_oas"}, {"indicator": "short_term_borrowings_oas"}, {"indicator": "account_and_note_payable_oas"}, {"indicator": "contra_liabilities_current_oas"}, {"indicator": "advance_from_cust_current_oas"}, {"indicator": "defer_revenue_current_oas"}, {"indicator": "long_term_debt_oas"}, {"indicator": "long_term_borrowings_oas"}, {"indicator": "total_assets_oas"}, {"indicator": "equity_attri_to_companyowner_oas"}, {"indicator": "prepaid_expenses_current_oas"} ] df = self._fetch_financial_data_annual(symbol, indicators) if df.empty: return df self._save_raw_data(df, symbol, "balance_sheet_raw") rename_map = { 'cash_equi_short_term_inve_oas': 'cash', 'accou_and_notes_recei_oas': 'receivables', 'inventories_oas': 'inventory', 'ppe_net_oas': 'fixed_assets', 'long_term_inv_and_receiv_oas': 'long_term_investments', 'goodwill_and_intasset_oas': 'goodwill', 'short_term_debt_oas': 'short_term_debt', 'short_term_borrowings_oas': 'short_term_borrowings', 'account_and_note_payable_oas': 'accounts_payable', 'contra_liabilities_current_oas': 'contract_liabilities', 'advance_from_cust_current_oas': 'advances_from_customers', 'defer_revenue_current_oas': 'deferred_revenue', 'long_term_debt_oas': 'long_term_debt', 'long_term_borrowings_oas': 'long_term_borrowings', 'total_assets_oas': 'total_assets', 'equity_attri_to_companyowner_oas': 'total_equity', 'prepaid_expenses_current_oas': 'prepayment' } df_filtered = df.rename(columns=rename_map) # 如果没有负债合计,用资产减权益 if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all(): if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns: df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity'] for col in df_filtered.columns: if col not in ['date', 'end_date']: df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') return self._filter_data(df_filtered) def get_cash_flow(self, symbol: str) -> pd.DataFrame: indicators = [ {"indicator": "net_cash_flows_from_oa_oas"}, {"indicator": "purchase_of_ppe_and_ia_oas"}, {"indicator": "dividends_paid_oas"} ] df = self._fetch_financial_data_annual(symbol, indicators) if df.empty: return df self._save_raw_data(df, symbol, "cash_flow_raw") rename_map = { 'net_cash_flows_from_oa_oas': 'ocf', 'purchase_of_ppe_and_ia_oas': 'capex', 'dividends_paid_oas': 'dividends' } df_filtered = df.rename(columns=rename_map) for col in df_filtered.columns: if col not in ['date', 'end_date']: df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') if 'capex' in df_filtered.columns: df_filtered['capex'] = df_filtered['capex'].abs() return self._filter_data(df_filtered) def get_market_metrics(self, symbol: str) -> dict: """获取公司基本信息(名称、上市日期等静态数据)""" basic_info = self._fetch_basic_info(symbol) metrics = { "name": basic_info.get("name", ""), "list_date": basic_info.get("ipo_date", "") } return metrics def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: """获取历史日期的收盘价和市值 (通过 cmd_history_quotation)""" code = self._get_ifind_code(symbol) if not dates: return pd.DataFrame() results = [] # get_historical_metrics里面不要拿所有日期数据了,而是一个一个数据拿 for d in dates: d_str = str(d).replace('-', '').replace('/', '') fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str params = { "codes": code, "startdate": fmt_d, "enddate": fmt_d, "functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"}, "indipara": [ {"indicator": "pre_close", "indiparams": ["", "0", "CNY"]}, {"indicator": "market_value", "indiparams": ["", "CNY"]} ] } # print(f"iFinD API Request: endpoint=date_sequence, params={params}") res = self.cli.post("date_sequence", params) df_seq = self._parse_ifind_tables(res) metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0} if not df_seq.empty: # 找到最接近该日期且不晚于该日期的记录 match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1) if not match.empty: if 'pre_close' in match.columns: metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0) if 'market_value' in match.columns: metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0) results.append(metrics) df_hist = pd.DataFrame(results) self._save_raw_data(df_hist, symbol, "historical_metrics_raw") return df_hist def get_dividends(self, symbol: str) -> pd.DataFrame: """获取历年年度累计分红记录 (逐年获取)""" code = self._get_ifind_code(symbol) basic_info = self._fetch_basic_info(symbol) acc_date = basic_info.get("accounting_date", "1231") current_year = int(time.strftime("%Y")) results = [] # 获取最近 5 年的数据 for i in range(5): year_str = str(current_year - i) params = { "codes": code, "indipara": [ {"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]} ] } # print(f"iFinD API Request: endpoint=basic_data_service, params={params}") res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty and 'annual_cum_dividend' in df.columns: val = df['annual_cum_dividend'].iloc[0] if pd.notna(val) and val != 0: results.append({ 'date_str': f"{year_str}{acc_date}", 'dividends': float(val) }) if not results: return pd.DataFrame() df_div = pd.DataFrame(results) self._save_raw_data(df_div, symbol, "dividends_raw") return df_div def get_repurchases(self, symbol: str) -> pd.DataFrame: """获取历年年度回购记录 (从 repur_num_new 获取)""" code = self._get_ifind_code(symbol) basic_info = self._fetch_basic_info(symbol) acc_date = basic_info.get("accounting_date", "1231") mm = acc_date[:2] dd = acc_date[2:] # 为了对应日期格式 YYYY-MM-DD fmt_mm_dd = f"{mm}-{dd}" current_year = int(time.strftime("%Y")) results = [] # 获取最近 5 年的数据 for i in range(5): target_year = current_year - i start_date = f"{target_year - 1}-{fmt_mm_dd}" end_date = f"{target_year}-{fmt_mm_dd}" params = { "codes": code, "indipara": [ {"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]} ] } # print(f"iFinD API Request: endpoint=basic_data_service, params={params}") res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty and 'repur_num_new' in df.columns: val = df['repur_num_new'].iloc[0] if pd.notna(val) and val != 0: results.append({ 'date_str': f"{target_year}{acc_date}", 'repurchases': float(val) }) if not results: return pd.DataFrame() df_repur = pd.DataFrame(results) self._save_raw_data(df_repur, symbol, "repurchases_raw") return df_repur def get_employee_count(self, symbol: str) -> pd.DataFrame: """获取历年员工人数""" code = self._get_ifind_code(symbol) basic_info = self._fetch_basic_info(symbol) acc_date = basic_info.get("accounting_date", "1231") mm = acc_date[:2] dd = acc_date[2:] current_year = int(time.strftime("%Y")) results = [] # 获取最近 5 年的数据 for i in range(5): target_year = current_year - i target_date = f"{target_year}-{mm}-{dd}" params = { "codes": code, "indipara": [ {"indicator": "staff_num", "indiparams": [target_date]} ] } res = self.cli.post("basic_data_service", params) df = self._parse_ifind_tables(res) if not df.empty and 'staff_num' in df.columns: val = df['staff_num'].iloc[0] if pd.notna(val) and val != 0: results.append({ 'date_str': f"{target_year}{acc_date}", 'employee_count': float(val) }) if not results: return pd.DataFrame() df_emp = pd.DataFrame(results) self._save_raw_data(df_emp, symbol, "employee_count_raw") return df_emp