From daf5808f054270d2f7f11080479c98fe03d2f9b0 Mon Sep 17 00:00:00 2001 From: xucheng Date: Wed, 7 Jan 2026 21:15:24 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E8=B6=8A=E5=8D=97?= =?UTF-8?q?=E5=B8=82=E5=9C=BA=E6=95=B0=E6=8D=AE=E6=8A=93=E5=8F=96=E3=80=81?= =?UTF-8?q?=E5=88=86=E6=9E=90=E4=B8=8E=E6=8A=A5=E5=91=8A=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E5=B9=B6=E4=BC=98=E5=8C=96=E9=A6=99=E6=B8=AF=E5=B8=82?= =?UTF-8?q?=E5=9C=BA=E6=95=B0=E6=8D=AE=E6=8A=93=E5=8F=96=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E5=8F=8A=E6=9B=B4=E6=96=B0=E9=83=A8=E5=88=86JP=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 15 +- backend/app/database.py | 16 + backend/app/models.py | 3 + backend/app/services/analysis_service.py | 21 +- backend/app/services/llm_engine.py | 31 +- data/HK/00700.HK/raw_balance_sheet_raw.csv | 5 - data/HK/00700.HK/raw_basic_info_raw.csv | 2 - data/HK/00700.HK/raw_cash_flow_raw.csv | 5 - data/HK/00700.HK/raw_dividends_raw.csv | 4 - data/HK/00700.HK/raw_employee_count_raw.csv | 6 - .../00700.HK/raw_historical_metrics_raw.csv | 5 - data/HK/00700.HK/raw_income_statement_raw.csv | 5 - data/HK/00700.HK/raw_repurchases_raw.csv | 6 - data/HK/00700.HK/report.md | 89 ---- data/JP/2503.T/raw_balance_sheet_raw.csv | 4 +- data/JP/2503.T/raw_cash_flow_raw.csv | 4 +- data/JP/2503.T/raw_historical_metrics_raw.csv | 4 +- data/JP/2503.T/raw_income_statement_raw.csv | 4 +- data/JP/2503.T/report.html | 92 ++-- data/JP/2503.T/report.md | 102 ++-- frontend/src/app/analysis/[id]/page.tsx | 69 ++- frontend/src/app/page.tsx | 2 +- main.py => run_fetcher.py | 6 +- src/analysis/vn_analyzer.py | 116 +++++ src/fetchers/factory.py | 9 + src/fetchers/hk_fetcher.py | 458 +++++++++++++---- src/fetchers/jp_fetcher.py | 34 +- src/fetchers/us_fetcher.py | 9 + src/fetchers/vn_fetcher.py | 474 ++++++++++++++++++ src/reporting/jp_report_generator.py | 30 +- .../reporting/vn_report_generator.py | 334 ++++++++---- src/strategies/vn_strategy.py | 76 +++ test_hk_fetcher_logic.py | 105 ++++ 33 files changed, 1647 insertions(+), 498 deletions(-) delete mode 100644 data/HK/00700.HK/raw_balance_sheet_raw.csv delete mode 100644 data/HK/00700.HK/raw_basic_info_raw.csv delete mode 100644 data/HK/00700.HK/raw_cash_flow_raw.csv delete mode 100644 data/HK/00700.HK/raw_dividends_raw.csv delete mode 100644 data/HK/00700.HK/raw_employee_count_raw.csv delete mode 100644 data/HK/00700.HK/raw_historical_metrics_raw.csv delete mode 100644 data/HK/00700.HK/raw_income_statement_raw.csv delete mode 100644 data/HK/00700.HK/raw_repurchases_raw.csv delete mode 100644 data/HK/00700.HK/report.md rename main.py => run_fetcher.py (88%) create mode 100644 src/analysis/vn_analyzer.py create mode 100644 src/fetchers/vn_fetcher.py rename data/HK/00700.HK/report.html => src/reporting/vn_report_generator.py (62%) create mode 100644 src/strategies/vn_strategy.py create mode 100644 test_hk_fetcher_logic.py diff --git a/README.md b/README.md index 89970e7..351c9dc 100644 --- a/README.md +++ b/README.md @@ -38,14 +38,15 @@ ## 如何运行 +### 参数说明 +- ``: 必填参数,指定目标市场。 + 使用以下命令来运行程序: ```bash -python main.py +python run_fetcher.py ``` -### 参数说明 - - ``: 必填参数,指定目标市场。 - `CN`: 中国A股市场 - `HK`: 中国香港市场 @@ -62,20 +63,20 @@ python main.py - 分析贵州茅台 (A股): ```bash - python main.py CN 600519.SH + python run_fetcher.py CN 600519.SH ``` - 分析苹果公司 (美股): ```bash - python main.py US AAPL + python run_fetcher.py US AAPL ``` - If running just `python main.py` without arguments, it executes built-in default test cases (Kweichow Moutai and Apple Inc.). + If running just `python run_fetcher.py` without arguments, it executes built-in default test cases (Kweichow Moutai and Apple Inc.). ## 深度分析自动化 (Automated Deep Analysis) 项目提供了一个名为 `stock_analysis.py` 的脚本,能够全自动完成从数据获取到AI深度分析报告生成的全流程。 ### 功能特点 -1. **全自动流程**:一键调用 `main.py` 获取数据 -> 识别公司信息 -> 初始化报告 -> 调用 LLM 进行分章节深度分析。 +1. **全自动流程**:一键调用 `run_fetcher.py` 获取数据 -> 识别公司信息 -> 初始化报告 -> 调用 LLM 进行分章节深度分析。 2. **AI 驱动**:利用大语言模型(如 Gemini/GPT-4)根据预设的专业提示词(`prompts.yaml`),对公司简介、基本面、内部人动向、看涨/看跌逻辑进行深度解读。 3. **结构化报告**:生成的 Markdown 报告保存在 `reports/` 目录下,包含详细的文字分析和指向可视化财务图表的链接。 diff --git a/backend/app/database.py b/backend/app/database.py index 11513f1..49e7a08 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -26,6 +26,22 @@ async def init_db(): if "duplicate column" not in str(e).lower() and "already exists" not in str(e).lower(): print(f"Migration check: {e}") + # Migration: Add token columns to report_sections + columns_to_add = [ + ("prompt_tokens", "INTEGER DEFAULT 0"), + ("completion_tokens", "INTEGER DEFAULT 0"), + ("total_tokens", "INTEGER DEFAULT 0") + ] + + for col_name, col_type in columns_to_add: + try: + await conn.execute(text(f"ALTER TABLE report_sections ADD COLUMN {col_name} {col_type}")) + print(f"Migration: Added {col_name} to report_sections table") + except Exception as e: + # SQLite error for duplicate column usually contains "duplicate column name" + if "duplicate column" not in str(e).lower() and "already exists" not in str(e).lower(): + print(f"Migration check for {col_name}: {e}") + async def get_db(): async with AsyncSessionLocal() as session: yield session diff --git a/backend/app/models.py b/backend/app/models.py index 3dcb2f9..349eb29 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -33,6 +33,9 @@ class ReportSection(Base): report_id: Mapped[int] = mapped_column(ForeignKey("reports.id")) section_name: Mapped[str] = mapped_column(String(50)) # e.g. company_profile, fundamental_analysis content: Mapped[str] = mapped_column(Text) # Markdown content + total_tokens: Mapped[int] = mapped_column(Integer, nullable=True, default=0) + prompt_tokens: Mapped[int] = mapped_column(Integer, nullable=True, default=0) + completion_tokens: Mapped[int] = mapped_column(Integer, nullable=True, default=0) created_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) report: Mapped["Report"] = relationship(back_populates="sections") diff --git a/backend/app/services/analysis_service.py b/backend/app/services/analysis_service.py index 8d51a66..81e93f3 100644 --- a/backend/app/services/analysis_service.py +++ b/backend/app/services/analysis_service.py @@ -25,14 +25,21 @@ async def search_stock(query: str, api_key: str, model: str = "gemini-2.0-flash" 4. 例如:"茅台" = "贵州茅台酒股份有限公司" (600519.SH) 请返回一个 JSON 数组,包含所有匹配的公司。每个对象包含以下字段: - - 'market': 'CN' (中国), 'US' (美国), 'HK' (香港), 或 'JP' (日本) 之一 - - 'symbol': 完整的股票代码 (例如 'AAPL', '600519.SH', '00700.HK', '688778.SH', '2503.T') + - 'market': 'CN' (中国), 'US' (美国), 'HK' (香港), 'JP' (日本), 或 'VN' (越南) 之一 + - 'symbol': 完整的股票代码 (例如 'AAPL', '600519.SH', '00700.HK', '688778.SH', '2503.T', 'SAB.HM') - 'company_name': 公司的中文简称(如果有的话,优先使用中文;如果只有英文名,则使用英文名) **匹配规则**: - 如果查询词与某公司简称、全称或股票代码完全匹配,返回该公司 - 如果有多个可能的匹配,返回所有相关公司 - 如果公司在中国但用户没有指定市场,默认为 CN(上海/深圳/北京交易所) + - **越南股票后缀规则(iFinD数据源)**: + - 胡志明交易所 (HOSE) -> 后缀 **.HM** (例如: VNM.HM, SAB.HM, VCB.HM) + - 河内交易所 (HNX) -> 后缀 **.HN** (例如: PVS.HN, SHS.HN) + - UPCoM 市场 -> iFinD 通常也使用 .HN 或 .HM,或者特定的 UPCoM 后缀,但绝不要使用 .VN (这是 Bloomberg 格式) + - 示例:MCH (Masan Consumer) -> MCH.HN 或 MCH.HM (UPCoM 在 iFinD 中可能归类不一,但 MCH.HN 是常见尝试,MCH.HM 也可以尝试。请根据搜索结果确认 iFinD 使用哪一个。**强烈倾向于使用 .HM 或 .HN,严禁使用 .VN**) + - MCH 具体案例:用户指出应为 MCH.HM (或 MCH.HN),绝非 MCH.VN。请只返回 .HM 或 .HN。 + - 如果不确定是 HM 还是 HN,优先返回 .HM。 - 如果完全没找到匹配,返回 {{ "error": "未找到相关公司" }} 示例响应(单个结果): @@ -52,9 +59,9 @@ async def search_stock(query: str, api_key: str, model: str = "gemini-2.0-flash" "company_name": "腾讯控股" }}, {{ - "market": "US", - "symbol": "TCEHY", - "company_name": "Tencent Holdings ADR" + "market": "VN", + "symbol": "VNM.HM", + "company_name": "Vinamilk" }} ] @@ -128,9 +135,9 @@ async def run_analysis_task(report_id: int, market: str, symbol: str, api_key: s company_name_for_prompt = report.company_name - # 2. Run Main Data Fetching Script (main.py) + # 2. Run Main Data Fetching Script (run_fetcher.py) root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) - cmd = [sys.executable, "main.py", market, symbol] + cmd = [sys.executable, "run_fetcher.py", market, symbol] print(f"Executing data fetch command: {cmd} in {root_dir}") process = await asyncio.create_subprocess_exec( diff --git a/backend/app/services/llm_engine.py b/backend/app/services/llm_engine.py index 047c5de..16c3ed1 100644 --- a/backend/app/services/llm_engine.py +++ b/backend/app/services/llm_engine.py @@ -60,10 +60,22 @@ async def call_llm(api_key: str, model_name: str, system_prompt: str, user_promp ) response = await asyncio.to_thread(run_sync) - return response.text + usage = response.usage_metadata + prompt_tokens = usage.prompt_token_count if usage else 0 + completion_tokens = usage.candidates_token_count if usage else 0 + + return { + "text": response.text, + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens + } except Exception as e: print(f"API Call Failed: {e}") - return f"\n\nError generating section: {e}\n\n" + return { + "text": f"\n\nError generating section: {e}\n\n", + "prompt_tokens": 0, + "completion_tokens": 0 + } async def process_analysis_steps(report_id: int, company_name: str, symbol: str, market: str, db: AsyncSession, api_key: str): # 1. Load Prompts @@ -71,7 +83,7 @@ async def process_analysis_steps(report_id: int, company_name: str, symbol: str, prompt_dir = os.path.join(root_dir, "Prompt") prompts = await load_prompts(db, prompt_dir) - # 2. Read Data Context (report.md generated by main.py) + # 2. Read Data Context (report.md generated by run_fetcher.py) base_dir = os.path.join(root_dir, "data", market) symbol_dir = os.path.join(base_dir, symbol) @@ -82,7 +94,7 @@ async def process_analysis_steps(report_id: int, company_name: str, symbol: str, data_path = os.path.join(symbol_dir, "report.md") if not os.path.exists(data_path): - # If report.md is missing, maybe main.py failed or output structure changed. + # If report.md is missing, maybe run_fetcher.py failed or output structure changed. # We try to proceed or fail. print(f"Warning: {data_path} not found.") data_context = "No financial data available." @@ -127,9 +139,9 @@ async def process_analysis_steps(report_id: int, company_name: str, symbol: str, if key == "bearish_analysis" and csv_context: current_data_context += csv_context - content = await call_llm(api_key, model_name, system_content, user_content, current_data_context, enable_search=True) + result = await call_llm(api_key, model_name, system_content, user_content, current_data_context, enable_search=True) - return (key, content) + return (key, result) # Run all sections concurrently print(f"Starting concurrent analysis with {len(steps)} sections...") @@ -139,11 +151,14 @@ async def process_analysis_steps(report_id: int, company_name: str, symbol: str, for result in results: if result is None: continue - key, content = result + key, result_data = result section = ReportSection( report_id=report_id, section_name=key, - content=content + content=result_data["text"], + prompt_tokens=result_data["prompt_tokens"], + completion_tokens=result_data["completion_tokens"], + total_tokens=result_data["prompt_tokens"] + result_data["completion_tokens"] ) db.add(section) diff --git a/data/HK/00700.HK/raw_balance_sheet_raw.csv b/data/HK/00700.HK/raw_balance_sheet_raw.csv deleted file mode 100644 index 3235c47..0000000 --- a/data/HK/00700.HK/raw_balance_sheet_raw.csv +++ /dev/null @@ -1,5 +0,0 @@ -cash_equi_short_term_inve_oas,accou_and_notes_recei_oas,inventories_oas,ppe_net_oas,long_term_inv_and_receiv_oas,goodwill_and_intasset_oas,short_term_debt_oas,short_term_borrowings_oas,account_and_note_payable_oas,advance_from_cust_current_oas,defer_revenue_current_oas,long_term_debt_oas,long_term_borrowings_oas,total_assets_oas,equity_attri_to_companyowner_oas,prepaid_expenses_current_oas,end_date -380444000000.0,51315000000.0,435000000.0,172648000000.0,706700000000.0,215832000000.0,63974000000.0,58577000000.0,143381000000.0,998000000.0,120908000000.0,335632000000.0,322304000000.0,2013310000000.0,1114639000000.0,26893000000.0,20250630 -343159000000.0,48203000000.0,440000000.0,133283000000.0,589410000000.0,196127000000.0,58485000000.0,48526000000.0,127335000000.0,1042000000.0,100097000000.0,291004000000.0,277107000000.0,1780995000000.0,973548000000.0,31265000000.0,20241231 -379155000000.0,46606000000.0,456000000.0,104458000000.0,460591000000.0,177727000000.0,47691000000.0,25561000000.0,115109000000.0,669000000.0,86168000000.0,309388000000.0,292920000000.0,1577246000000.0,808591000000.0,27824000000.0,20231231 -290756000000.0,45467000000.0,2333000000.0,103777000000.0,431451000000.0,161802000000.0,17934000000.0,5981000000.0,102827000000.0,816000000.0,82216000000.0,330761000000.0,312337000000.0,1578131000000.0,721391000000.0,24393000000.0,20221231 diff --git a/data/HK/00700.HK/raw_basic_info_raw.csv b/data/HK/00700.HK/raw_basic_info_raw.csv deleted file mode 100644 index 7667357..0000000 --- a/data/HK/00700.HK/raw_basic_info_raw.csv +++ /dev/null @@ -1,2 +0,0 @@ -corp_cn_name,accounting_date,ipo_date -腾讯控股有限公司,1231,20040616 diff --git a/data/HK/00700.HK/raw_cash_flow_raw.csv b/data/HK/00700.HK/raw_cash_flow_raw.csv deleted file mode 100644 index e12c248..0000000 --- a/data/HK/00700.HK/raw_cash_flow_raw.csv +++ /dev/null @@ -1,5 +0,0 @@ -net_cash_flows_from_oa_oas,purchase_of_ppe_and_ia_oas,dividends_paid_oas,end_date -151265000000.0,57457000000.0,37535000000.0,20250630 -258521000000.0,96048000000.0,28859000000.0,20241231 -221962000000.0,47407000000.0,20983000000.0,20231231 -146091000000.0,50850000000.0,12952000000.0,20221231 diff --git a/data/HK/00700.HK/raw_dividends_raw.csv b/data/HK/00700.HK/raw_dividends_raw.csv deleted file mode 100644 index d7f802f..0000000 --- a/data/HK/00700.HK/raw_dividends_raw.csv +++ /dev/null @@ -1,4 +0,0 @@ -date_str,dividends -20241231,38104168998.825 -20231231,29163521377.441 -20221231,20700985117.366 diff --git a/data/HK/00700.HK/raw_employee_count_raw.csv b/data/HK/00700.HK/raw_employee_count_raw.csv deleted file mode 100644 index 4cf86bf..0000000 --- a/data/HK/00700.HK/raw_employee_count_raw.csv +++ /dev/null @@ -1,6 +0,0 @@ -date_str,employee_count -20261231,111221.0 -20251231,111221.0 -20241231,110558.0 -20231231,105417.0 -20221231,108436.0 diff --git a/data/HK/00700.HK/raw_historical_metrics_raw.csv b/data/HK/00700.HK/raw_historical_metrics_raw.csv deleted file mode 100644 index a5a077b..0000000 --- a/data/HK/00700.HK/raw_historical_metrics_raw.csv +++ /dev/null @@ -1,5 +0,0 @@ -date_str,PE,PB,MarketCap,Price -20250630,0.0,0.0,4204320544234.2,467.83035 -20241231,0.0,0.0,3562280981362.7,388.01076 -20231231,0.0,0.0,2523103788380.7,266.066192 -20221231,0.0,0.0,2854854121108.1,298.35218 diff --git a/data/HK/00700.HK/raw_income_statement_raw.csv b/data/HK/00700.HK/raw_income_statement_raw.csv deleted file mode 100644 index 9f04e9a..0000000 --- a/data/HK/00700.HK/raw_income_statement_raw.csv +++ /dev/null @@ -1,5 +0,0 @@ -revenue_oas,gross_profit_oas,sga_expenses_oas,selling_marketing_expenses_oas,ga_expenses_oas,income_tax_expense_oas,net_income_attri_to_common_sh_oas,operating_income_oas,end_date -364526000000.0,205506000000.0,82861000000.0,17276000000.0,65585000000.0,25068000000.0,103449000000.0,117670000000.0,20250630 -660257000000.0,349246000000.0,149149000000.0,36388000000.0,112761000000.0,45018000000.0,194073000000.0,208099000000.0,20241231 -609015000000.0,293109000000.0,137736000000.0,34211000000.0,103525000000.0,43276000000.0,115216000000.0,160074000000.0,20231231 -554552000000.0,238746000000.0,135925000000.0,29229000000.0,106696000000.0,21516000000.0,188243000000.0,227114000000.0,20221231 diff --git a/data/HK/00700.HK/raw_repurchases_raw.csv b/data/HK/00700.HK/raw_repurchases_raw.csv deleted file mode 100644 index 6ea8286..0000000 --- a/data/HK/00700.HK/raw_repurchases_raw.csv +++ /dev/null @@ -1,6 +0,0 @@ -date_str,repurchases -20261231,1271296551.4 -20251231,80610335058.2 -20241231,112003383926.44 -20231231,49432707948.16 -20221231,33794068650.24 diff --git a/data/HK/00700.HK/report.md b/data/HK/00700.HK/report.md deleted file mode 100644 index ff4beb1..0000000 --- a/data/HK/00700.HK/report.md +++ /dev/null @@ -1,89 +0,0 @@ -# 腾讯控股有限公司 (00700.HK) - Financial Report -*Report generated on: 2026-01-03* - -| 代码 | 简称 | 上市日期 | 年结日 | 市值(亿) | PE | PB | 股息率(%) | -|:---|:---|:---|:---|:---|:---|:---|:---| -| 00700.HK | 腾讯控股有限公司 | 2004-06-16 | 1231 | 42043.21 | 20.32 | 3.77 | 0.89% | - - -## 主要指标 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| ROE | 9.28% | 19.93% | 14.25% | 26.09% | -| ROA | 5.14% | 10.90% | 7.30% | 11.93% | -| ROCE/ROIC | 6.21% | 12.62% | 10.79% | 16.36% | -| 毛利率 | 56.38% | 52.90% | 48.13% | 43.05% | -| 净利润率 | 28.38% | 29.39% | 18.92% | 33.95% | -| 收入(亿) | 3,645.26 | 6,602.57 | 6,090.15 | 5,545.52 | -| 收入增速 | - | 8.41% | 9.82% | - | -| 净利润(亿) | 1,034.49 | 1,940.73 | 1,152.16 | 1,882.43 | -| 净利润增速 | - | 68.44% | -38.79% | - | -| 经营净现金流(亿) | 1,512.65 | 2,585.21 | 2,219.62 | 1,460.91 | -| 资本开支(亿) | 574.57 | 960.48 | 474.07 | 508.50 | -| 自由现金流(亿) | 938.08 | 1,624.73 | 1,745.55 | 952.41 | -| 分红(亿) | 375.35 | 288.59 | 209.83 | 129.52 | -| 回购(亿) | - | 1,120.03 | 494.33 | 337.94 | -| 总资产(亿) | 20,133.10 | 17,809.95 | 15,772.46 | 15,781.31 | -| 净资产(亿) | 11,146.39 | 9,735.48 | 8,085.91 | 7,213.91 | -| 商誉(亿) | 2,158.32 | 1,961.27 | 1,777.27 | 1,618.02 | - - -## 费用指标 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| 销售费用率 | 4.74% | 5.51% | 5.62% | 5.27% | -| 管理费用率 | 17.99% | 17.08% | 17.00% | 19.24% | -| SG&A比例 | 22.73% | 22.59% | 22.62% | 24.51% | -| 研发费用率 | - | - | - | - | -| 其他费用率 | 5.27% | 0.91% | 6.59% | -15.40% | -| 折旧费用占比 | - | - | - | - | -| 所得税率 | 19.51% | 18.83% | 27.30% | 10.26% | - - -## 资产占比 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| 现金占比 | 18.90% | 19.27% | 24.04% | 18.42% | -| 库存占比 | 0.02% | 0.02% | 0.03% | 0.15% | -| 应收款占比 | 2.55% | 2.71% | 2.95% | 2.88% | -| 预付款占比 | 1.34% | 1.76% | 1.76% | 1.55% | -| 固定资产占比 | 8.58% | 7.48% | 6.62% | 6.58% | -| 长期投资占比 | 35.10% | 33.09% | 29.20% | 27.34% | -| 商誉占比 | 10.72% | 11.01% | 11.27% | 10.25% | -| 其他资产占比 | 22.80% | 24.66% | 24.12% | 32.83% | -| 应付款占比 | 7.12% | 7.15% | 7.30% | 6.52% | -| 预收款占比 | 0.00% | 0.00% | 0.00% | 0.00% | -| 短期借款占比 | 6.09% | 6.01% | 4.64% | 1.52% | -| 长期借款占比 | 32.68% | 31.90% | 38.19% | 40.75% | -| 运营资产占比 | -3.22% | -2.66% | -2.55% | -1.94% | -| 有息负债率 | 38.77% | 37.91% | 42.83% | 42.27% | - - -## 周转能力 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| 存货周转天数 | 0 | 0 | 0 | 2 | -| 应收款周转天数 | 25 | 26 | 27 | 29 | -| 应付款周转天数 | 164 | 149 | 132 | 118 | -| 固定资产周转率 | 4.22 | 4.95 | 5.83 | 5.34 | -| 总资产周转率 | 0.36 | 0.37 | 0.39 | 0.35 | - - -## 人均效率 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| 员工人数 | - | 110,558 | 105,417 | 108,436 | -| 人均创收(万) | - | 597.20 | 577.72 | 511.41 | -| 人均创利(万) | - | 175.54 | 109.30 | 173.60 | -| 人均薪酬(万) | - | - | - | - | - - -## 市场表现 -| 指标 | 2025H1 | 2024A | 2023A | 2022A | -|:---|--:|--:|--:|--:| -| 股价 | 467.83 | 388.01 | 266.07 | 298.35 | -| 市值(亿) | 42,043 | 35,623 | 25,231 | 28,549 | -| PE | 20.32 | 18.36 | 21.90 | 15.17 | -| PB | 3.77 | 3.66 | 3.12 | 3.96 | -| 股东户数 | - | - | - | - | - diff --git a/data/JP/2503.T/raw_balance_sheet_raw.csv b/data/JP/2503.T/raw_balance_sheet_raw.csv index 1b01da9..facb7be 100644 --- a/data/JP/2503.T/raw_balance_sheet_raw.csv +++ b/data/JP/2503.T/raw_balance_sheet_raw.csv @@ -1,6 +1,6 @@ cash_equi_short_term_inve_oas,accou_and_notes_recei_oas,inventories_oas,ppe_net_oas,long_term_inv_and_receiv_oas,goodwill_and_intasset_oas,short_term_debt_oas,short_term_borrowings_oas,account_and_note_payable_oas,contra_liabilities_current_oas,advance_from_cust_current_oas,defer_revenue_current_oas,long_term_debt_oas,long_term_borrowings_oas,total_assets_oas,equity_attri_to_companyowner_oas,prepaid_expenses_current_oas,end_date -,,,,,,,,,,,,,,,,,20261231 -,,,,,,,,,,,,,,,,,20251231 5899320980.35,22326249202.25,16650772821.2,31263474425.4,4794152048.0,53852622755.049995,3617228537.3,3617228537.3,7518553255.849999,,,,36159437268.15,36159437268.15,155576124604.95,54802733151.25,,20241231 7070829794.070001,21350738934.77,16675747687.67,29873189030.72,5307593791.54,34970889368.92,5072156415.77,5072156415.77,6915651724.870001,,,,27998809255.25,27998809255.25,144576837566.65,57062250906.689995,,20231231 5046190177.88,20369212787.27,15183728587.93,29336618624.86,5409547685.4,25662437929.58,5989276634.969999,5989276634.969999,6303080634.48,,,,21383987001.46,21383987001.46,133028563816.29,51281444590.26,,20221231 +8709165023.76,20609942061.24,13679704695.48,29539550251.08,6497706183.72,25484093179.92,5616320844.24,5616320844.24,5452427104.8,,,,24897794036.4,24897794036.4,136777293387.96,49476819729.48,,20211231 +10725243245.05,22595254368.58,13732598542.25,33002436277.529995,6686243673.8,28770955850.0,15747071314.71,15747071314.71,6078069930.14,,,,24889089960.7,24889089960.7,155512580861.81,53026073056.08,,20201231 diff --git a/data/JP/2503.T/raw_cash_flow_raw.csv b/data/JP/2503.T/raw_cash_flow_raw.csv index 50a38cd..7924b69 100644 --- a/data/JP/2503.T/raw_cash_flow_raw.csv +++ b/data/JP/2503.T/raw_cash_flow_raw.csv @@ -1,6 +1,6 @@ net_cash_flows_from_oa_oas,purchase_of_ppe_and_ia_oas,dividends_paid_oas,end_date -,,,20261231 -,,,20251231 11263845394.2,8376036701.2,2704873943.8,20241231 10238024262.94,5734031186.9,2896993175.0,20231231 7093529728.46,5153093891.57,2814032263.74,20221231 +12134499912.36,4777098580.2,2998115590.08,20211231 +10423243057.93,5882300964.62,3498421765.62,20201231 diff --git a/data/JP/2503.T/raw_historical_metrics_raw.csv b/data/JP/2503.T/raw_historical_metrics_raw.csv index e0d9f6f..d426218 100644 --- a/data/JP/2503.T/raw_historical_metrics_raw.csv +++ b/data/JP/2503.T/raw_historical_metrics_raw.csv @@ -1,6 +1,6 @@ date_str,PE,PB,MarketCap,Price -20261231,0.0,0.0,95997084050.16,105.02963244 -20251231,0.0,0.0,95997084050.16,105.02963244 20241231,0.0,0.0,86865526677.3,95.03886945 20231231,0.0,0.0,95138465046.76,104.09022434 20221231,0.0,0.0,96179539188.82,105.22925513 +20211231,0.0,0.0,93409361034.96,102.19842564 +20201231,0.0,0.0,0.0,0.0 diff --git a/data/JP/2503.T/raw_income_statement_raw.csv b/data/JP/2503.T/raw_income_statement_raw.csv index 4369106..778582e 100644 --- a/data/JP/2503.T/raw_income_statement_raw.csv +++ b/data/JP/2503.T/raw_income_statement_raw.csv @@ -1,6 +1,6 @@ revenue_oas,gross_profit_oas,sga_expenses_oas,selling_marketing_expenses_oas,ga_expenses_oas,rd_expenses_oas,income_tax_expense_oas,net_income_attri_to_common_sh_oas,operating_income_oas,end_date -,,,,,,,,,20261231 -,,,,,,,,,20251231 108461428374.25,49442244062.75,34274708414.449997,7858262714.049999,,5382196355.9,2500510225.5,2700142872.7,5813651487.0,20241231 107536033978.57,48541816110.34,34112774799.24,8351653454.849999,,4277221488.55,2348378241.39,5677955475.53,7572185952.06,20231231 104102553826.44,47393090179.79,33504032019.72,8687928238.56,,3886365990.93,2491542010.45,5808644417.809999,6070906489.7699995,20221231 +100791329828.4,45529592285.04,32524275468.12,9023230136.88,,3851779537.44,1725974819.16,3308307454.8,3767232058.08,20211231 +116952038544.15,50831829234.21,36502312399.16,9503078333.689999,,4078520115.0,1562420984.83,4548656503.45,6507863747.53,20201231 diff --git a/data/JP/2503.T/report.html b/data/JP/2503.T/report.html index cccc834..414df9b 100644 --- a/data/JP/2503.T/report.html +++ b/data/JP/2503.T/report.html @@ -205,7 +205,7 @@

麒麟控股株式会社 (2503.T) - Financial Report

-

Report generated on: 2026-01-03

+

Report generated on: 2026-01-07

@@ -236,67 +236,67 @@ - + - - + + - - - - - - - - - - + + + + + + + + + + - - - + + + - - - + + + - + - - - + + + - - - - - - - - - - + + + + + + + + + + - - - - - + + + + + - - - + + + - - - - + + + +
指标2026A2025A2024A2023A2022A2024A2023A2022A2021A2020A
ROE--4.93%9.95%11.33%
ROA--1.74%3.93%4.37%
ROE4.93%9.95%11.33%6.69%8.58%
ROA1.74%3.93%4.37%2.42%2.92%
ROCE/ROIC-----
毛利率--45.59%45.14%45.53%
净利润率--2.49%5.28%5.58%
收入(亿)--1,084.611,075.361,041.03
收入增速--0.86%3.30%-
净利润(亿)--27.0056.7858.09
净利润增速---52.45%-2.25%-
经营净现金流(亿)--112.64102.3870.94
资本开支(亿)--83.7657.3451.53
自由现金流(亿)--28.8845.0419.40
分红(亿)--27.0528.9728.14
毛利率45.59%45.14%45.53%45.17%43.46%
净利润率2.49%5.28%5.58%3.28%3.89%
收入(亿)1,084.611,075.361,041.031,007.911,169.52
收入增速0.86%3.30%3.29%-13.82%-
净利润(亿)27.0056.7858.0933.0845.49
净利润增速-52.45%-2.25%75.58%-27.27%-
经营净现金流(亿)112.64102.3870.94121.34104.23
资本开支(亿)83.7657.3451.5347.7758.82
自由现金流(亿)28.8845.0419.4073.5745.41
分红(亿)27.0528.9728.1429.9834.98
回购(亿)-----
总资产(亿)--1,555.761,445.771,330.29
净资产(亿)--548.03570.62512.81
商誉(亿)--538.53349.71256.62
总资产(亿)1,555.761,445.771,330.291,367.771,555.13
净资产(亿)548.03570.62512.81494.77530.26
商誉(亿)538.53349.71256.62254.84287.71
销售费用率-----
管理费用率-----
SG&A比例--31.60%31.72%32.18%
研发费用率--4.96%3.98%3.73%
其他费用率--6.53%4.16%4.03%
SG&A比例31.60%31.72%32.18%32.27%31.21%
研发费用率4.96%3.98%3.73%3.82%3.49%
其他费用率6.53%4.16%4.03%5.80%4.88%
折旧费用占比-----
所得税率--48.08%29.26%30.02%
所得税率48.08%29.26%30.02%34.28%25.57%
现金占比--3.79%4.89%3.79%
库存占比--10.70%11.53%11.41%
应收款占比--14.35%14.77%15.31%
现金占比3.79%4.89%3.79%6.37%6.90%
库存占比10.70%11.53%11.41%10.00%8.83%
应收款占比14.35%14.77%15.31%15.07%14.53%
预付款占比-----
固定资产占比--20.10%20.66%22.05%
长期投资占比--3.08%3.67%4.07%
商誉占比--34.61%24.19%19.29%
其他资产占比100.00%100.00%13.36%20.29%24.07%
应付款占比--4.83%4.78%4.74%
预收款占比--0.00%0.00%0.00%
短期借款占比--2.33%3.51%4.50%
长期借款占比--46.48%38.73%32.15%
运营资产占比0.00%0.00%20.22%21.52%21.99%
有息负债率--48.81%42.24%36.65%
固定资产占比20.10%20.66%22.05%21.60%21.22%
长期投资占比3.08%3.67%4.07%4.75%4.30%
商誉占比34.61%24.19%19.29%18.63%18.50%
其他资产占比13.36%20.29%24.07%23.58%25.72%
应付款占比4.83%4.78%4.74%3.99%3.91%
预收款占比0.00%0.00%0.00%0.00%0.00%
短期借款占比2.33%3.51%4.50%4.11%10.13%
长期借款占比46.48%38.73%32.15%36.41%32.01%
运营资产占比20.22%21.52%21.99%21.08%19.45%
有息负债率48.81%42.24%36.65%40.51%42.14%
存货周转天数--10210397
应收款周转天数--757271
应付款周转天数--464240
固定资产周转率--3.473.603.55
总资产周转率--0.700.740.78
存货周转天数102103979075
应收款周转天数7572717470
应付款周转天数4642403633
固定资产周转率3.473.603.553.413.54
总资产周转率0.700.740.780.740.75
员工人数31,93431,93431,93430,53829,515
人均创收(万)--339.64352.14352.71
人均创利(万)--8.4618.5919.68
员工人数31,93430,53829,515--
人均创收(万)339.64352.14352.71--
人均创利(万)8.4618.5919.68--
人均薪酬(万)-----
股价105.03105.0395.04104.09105.23
市值(亿)959.97959.97868.66951.38961.80
PE--32.1716.7616.56
PB--1.591.671.88
股价95.04104.09105.23102.200.00
市值(亿)868.66951.38961.80934.090.00
PE32.1716.7616.5628.230.00
PB1.591.671.881.890.00
股东户数-----
diff --git a/data/JP/2503.T/report.md b/data/JP/2503.T/report.md index 014c074..992aa75 100644 --- a/data/JP/2503.T/report.md +++ b/data/JP/2503.T/report.md @@ -1,5 +1,5 @@ # 麒麟控股株式会社 (2503.T) - Financial Report -*Report generated on: 2026-01-03* +*Report generated on: 2026-01-07* | 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) | |:---|:---|:---|:---|:---|:---| @@ -7,83 +7,83 @@ ## 主要指标 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| -| ROE | - | - | 4.93% | 9.95% | 11.33% | -| ROA | - | - | 1.74% | 3.93% | 4.37% | +| ROE | 4.93% | 9.95% | 11.33% | 6.69% | 8.58% | +| ROA | 1.74% | 3.93% | 4.37% | 2.42% | 2.92% | | ROCE/ROIC | - | - | - | - | - | -| 毛利率 | - | - | 45.59% | 45.14% | 45.53% | -| 净利润率 | - | - | 2.49% | 5.28% | 5.58% | -| 收入(亿) | - | - | 1,084.61 | 1,075.36 | 1,041.03 | -| 收入增速 | - | - | 0.86% | 3.30% | - | -| 净利润(亿) | - | - | 27.00 | 56.78 | 58.09 | -| 净利润增速 | - | - | -52.45% | -2.25% | - | -| 经营净现金流(亿) | - | - | 112.64 | 102.38 | 70.94 | -| 资本开支(亿) | - | - | 83.76 | 57.34 | 51.53 | -| 自由现金流(亿) | - | - | 28.88 | 45.04 | 19.40 | -| 分红(亿) | - | - | 27.05 | 28.97 | 28.14 | +| 毛利率 | 45.59% | 45.14% | 45.53% | 45.17% | 43.46% | +| 净利润率 | 2.49% | 5.28% | 5.58% | 3.28% | 3.89% | +| 收入(亿) | 1,084.61 | 1,075.36 | 1,041.03 | 1,007.91 | 1,169.52 | +| 收入增速 | 0.86% | 3.30% | 3.29% | -13.82% | - | +| 净利润(亿) | 27.00 | 56.78 | 58.09 | 33.08 | 45.49 | +| 净利润增速 | -52.45% | -2.25% | 75.58% | -27.27% | - | +| 经营净现金流(亿) | 112.64 | 102.38 | 70.94 | 121.34 | 104.23 | +| 资本开支(亿) | 83.76 | 57.34 | 51.53 | 47.77 | 58.82 | +| 自由现金流(亿) | 28.88 | 45.04 | 19.40 | 73.57 | 45.41 | +| 分红(亿) | 27.05 | 28.97 | 28.14 | 29.98 | 34.98 | | 回购(亿) | - | - | - | - | - | -| 总资产(亿) | - | - | 1,555.76 | 1,445.77 | 1,330.29 | -| 净资产(亿) | - | - | 548.03 | 570.62 | 512.81 | -| 商誉(亿) | - | - | 538.53 | 349.71 | 256.62 | +| 总资产(亿) | 1,555.76 | 1,445.77 | 1,330.29 | 1,367.77 | 1,555.13 | +| 净资产(亿) | 548.03 | 570.62 | 512.81 | 494.77 | 530.26 | +| 商誉(亿) | 538.53 | 349.71 | 256.62 | 254.84 | 287.71 | ## 费用指标 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| | 销售费用率 | - | - | - | - | - | | 管理费用率 | - | - | - | - | - | -| SG&A比例 | - | - | 31.60% | 31.72% | 32.18% | -| 研发费用率 | - | - | 4.96% | 3.98% | 3.73% | -| 其他费用率 | - | - | 6.53% | 4.16% | 4.03% | +| SG&A比例 | 31.60% | 31.72% | 32.18% | 32.27% | 31.21% | +| 研发费用率 | 4.96% | 3.98% | 3.73% | 3.82% | 3.49% | +| 其他费用率 | 6.53% | 4.16% | 4.03% | 5.80% | 4.88% | | 折旧费用占比 | - | - | - | - | - | -| 所得税率 | - | - | 48.08% | 29.26% | 30.02% | +| 所得税率 | 48.08% | 29.26% | 30.02% | 34.28% | 25.57% | ## 资产占比 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| -| 现金占比 | - | - | 3.79% | 4.89% | 3.79% | -| 库存占比 | - | - | 10.70% | 11.53% | 11.41% | -| 应收款占比 | - | - | 14.35% | 14.77% | 15.31% | +| 现金占比 | 3.79% | 4.89% | 3.79% | 6.37% | 6.90% | +| 库存占比 | 10.70% | 11.53% | 11.41% | 10.00% | 8.83% | +| 应收款占比 | 14.35% | 14.77% | 15.31% | 15.07% | 14.53% | | 预付款占比 | - | - | - | - | - | -| 固定资产占比 | - | - | 20.10% | 20.66% | 22.05% | -| 长期投资占比 | - | - | 3.08% | 3.67% | 4.07% | -| 商誉占比 | - | - | 34.61% | 24.19% | 19.29% | -| 其他资产占比 | 100.00% | 100.00% | 13.36% | 20.29% | 24.07% | -| 应付款占比 | - | - | 4.83% | 4.78% | 4.74% | -| 预收款占比 | - | - | 0.00% | 0.00% | 0.00% | -| 短期借款占比 | - | - | 2.33% | 3.51% | 4.50% | -| 长期借款占比 | - | - | 46.48% | 38.73% | 32.15% | -| 运营资产占比 | 0.00% | 0.00% | 20.22% | 21.52% | 21.99% | -| 有息负债率 | - | - | 48.81% | 42.24% | 36.65% | +| 固定资产占比 | 20.10% | 20.66% | 22.05% | 21.60% | 21.22% | +| 长期投资占比 | 3.08% | 3.67% | 4.07% | 4.75% | 4.30% | +| 商誉占比 | 34.61% | 24.19% | 19.29% | 18.63% | 18.50% | +| 其他资产占比 | 13.36% | 20.29% | 24.07% | 23.58% | 25.72% | +| 应付款占比 | 4.83% | 4.78% | 4.74% | 3.99% | 3.91% | +| 预收款占比 | 0.00% | 0.00% | 0.00% | 0.00% | 0.00% | +| 短期借款占比 | 2.33% | 3.51% | 4.50% | 4.11% | 10.13% | +| 长期借款占比 | 46.48% | 38.73% | 32.15% | 36.41% | 32.01% | +| 运营资产占比 | 20.22% | 21.52% | 21.99% | 21.08% | 19.45% | +| 有息负债率 | 48.81% | 42.24% | 36.65% | 40.51% | 42.14% | ## 周转能力 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| -| 存货周转天数 | - | - | 102 | 103 | 97 | -| 应收款周转天数 | - | - | 75 | 72 | 71 | -| 应付款周转天数 | - | - | 46 | 42 | 40 | -| 固定资产周转率 | - | - | 3.47 | 3.60 | 3.55 | -| 总资产周转率 | - | - | 0.70 | 0.74 | 0.78 | +| 存货周转天数 | 102 | 103 | 97 | 90 | 75 | +| 应收款周转天数 | 75 | 72 | 71 | 74 | 70 | +| 应付款周转天数 | 46 | 42 | 40 | 36 | 33 | +| 固定资产周转率 | 3.47 | 3.60 | 3.55 | 3.41 | 3.54 | +| 总资产周转率 | 0.70 | 0.74 | 0.78 | 0.74 | 0.75 | ## 人均效率 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| -| 员工人数 | 31,934 | 31,934 | 31,934 | 30,538 | 29,515 | -| 人均创收(万) | - | - | 339.64 | 352.14 | 352.71 | -| 人均创利(万) | - | - | 8.46 | 18.59 | 19.68 | +| 员工人数 | 31,934 | 30,538 | 29,515 | - | - | +| 人均创收(万) | 339.64 | 352.14 | 352.71 | - | - | +| 人均创利(万) | 8.46 | 18.59 | 19.68 | - | - | | 人均薪酬(万) | - | - | - | - | - | ## 市场表现 -| 指标 | 2026A | 2025A | 2024A | 2023A | 2022A | +| 指标 | 2024A | 2023A | 2022A | 2021A | 2020A | |:---|--:|--:|--:|--:|--:| -| 股价 | 105.03 | 105.03 | 95.04 | 104.09 | 105.23 | -| 市值(亿) | 959.97 | 959.97 | 868.66 | 951.38 | 961.80 | -| PE | - | - | 32.17 | 16.76 | 16.56 | -| PB | - | - | 1.59 | 1.67 | 1.88 | +| 股价 | 95.04 | 104.09 | 105.23 | 102.20 | 0.00 | +| 市值(亿) | 868.66 | 951.38 | 961.80 | 934.09 | 0.00 | +| PE | 32.17 | 16.76 | 16.56 | 28.23 | 0.00 | +| PB | 1.59 | 1.67 | 1.88 | 1.89 | 0.00 | | 股东户数 | - | - | - | - | - | diff --git a/frontend/src/app/analysis/[id]/page.tsx b/frontend/src/app/analysis/[id]/page.tsx index 080c745..62453d3 100644 --- a/frontend/src/app/analysis/[id]/page.tsx +++ b/frontend/src/app/analysis/[id]/page.tsx @@ -124,21 +124,21 @@ export default function AnalysisPage({ params }: { params: Promise<{ id: string 财务数据 - {report.sections?.map((s: any) => { - const nameMap: Record = { - 'company_profile': '公司简介', - 'fundamental_analysis': '基本面分析', - 'insider_analysis': '内部人士分析', - 'bullish_analysis': '看涨分析', - 'bearish_analysis': '看跌分析' - }; - return ( - - {nameMap[s.section_name] || s.section_name.replace(/_/g, " ")} + + {[ + { id: 'company_profile', label: '公司简介' }, + { id: 'fundamental_analysis', label: '基本面分析' }, + { id: 'insider_analysis', label: '内部人士分析' }, + { id: 'bullish_analysis', label: '看涨分析' }, + { id: 'bearish_analysis', label: '看跌分析' } + ].map((section) => ( +
+
+ + {section.label} - ); - })} - {report.sections?.length === 0 && report.status !== "in_progress" && 无内容} +
+ ))} @@ -152,15 +152,38 @@ export default function AnalysisPage({ params }: { params: Promise<{ id: string - {report.sections?.map((s: any) => ( - - - - - - - - ))} + {[ + { id: 'company_profile', label: '公司简介' }, + { id: 'fundamental_analysis', label: '基本面分析' }, + { id: 'insider_analysis', label: '内部人士分析' }, + { id: 'bullish_analysis', label: '看涨分析' }, + { id: 'bearish_analysis', label: '看跌分析' } + ].map((section) => { + const sectionData = report.sections?.find((s: any) => s.section_name === section.id); + return ( + + + + {sectionData ? ( + + ) : ( +
+ {report.status === "in_progress" || report.status === "pending" ? ( + <> + +

AI 正在撰写{section.label}...

+

请稍候,全面分析通常需要 1 分钟左右

+ + ) : ( +

该部分暂无内容

+ )} +
+ )} +
+
+
+ ); + })}
) diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx index 066f231..aef5636 100644 --- a/frontend/src/app/page.tsx +++ b/frontend/src/app/page.tsx @@ -7,7 +7,7 @@ export default function Home() {

股票分析

- 输入公司名称或股票代码,开始全面的AI驱动的分析。支持中国、香港、美国、日本的公司分析。 + 输入公司名称或股票代码,开始全面的AI驱动的分析。支持中国、香港、美国、日本、越南的公司分析。

diff --git a/main.py b/run_fetcher.py similarity index 88% rename from main.py rename to run_fetcher.py index cb4e619..029791d 100644 --- a/main.py +++ b/run_fetcher.py @@ -25,6 +25,10 @@ def get_strategy(market, stock_code, tushare_token=None, av_key=None): from strategies.jp_strategy import JP_Strategy ifind_token = os.getenv('IFIND_REFRESH_TOKEN') return JP_Strategy(stock_code, ifind_token) + elif market == 'VN': + from strategies.vn_strategy import VN_Strategy + ifind_token = os.getenv('IFIND_REFRESH_TOKEN') + return VN_Strategy(stock_code, ifind_token) else: raise ValueError(f"Unsupported market: {market}") @@ -39,7 +43,7 @@ def main(): strategy = get_strategy(market, symbol, tushare_token, av_key) strategy.execute() else: - print("Usage: python main.py ") + print("Usage: python run_fetcher.py ") print("Running default test cases:") # Test CN diff --git a/src/analysis/vn_analyzer.py b/src/analysis/vn_analyzer.py new file mode 100644 index 0000000..048539c --- /dev/null +++ b/src/analysis/vn_analyzer.py @@ -0,0 +1,116 @@ +from .cn_analyzer import CN_Analyzer +import pandas as pd + +class VN_Analyzer(CN_Analyzer): + def __init__(self): + super().__init__() + self.market = 'VN' + + self.mapping = { + 'income': { + 'revenue': 'revenue', + 'net_income': 'net_income', + 'gross_profit': 'gross_profit', + 'total_profit': 'total_profit', + 'sga_exp': 'sga_exp' + }, + 'balance': { + 'total_equity': 'total_equity', + 'total_assets': 'total_assets', + 'total_liabilities': 'total_liabilities', + 'current_assets': 'current_assets', + 'current_liabilities': 'current_liabilities', + 'cash': 'cash', + 'receivables': 'receivables', + 'inventory': 'inventory', + 'fixed_assets': 'fixed_assets', + 'goodwill': 'goodwill', + 'short_term_debt': 'short_term_debt', + 'long_term_debt': 'long_term_debt' + }, + 'cashflow': { + 'ocf': 'ocf', + 'capex': 'capex', + 'dividends': 'dividends' + } + } + + def _post_process_columns(self, df, type): + if market_type := self.mapping.get(type): + for col in market_type.values(): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + df = super()._post_process_columns(df, type) + if type == 'balance': + if 'long_term_investments' in df.columns: + df['lt_invest'] = df['long_term_investments'] + + if 'long_term_debt' not in df.columns: df['long_term_debt'] = 0 + if 'long_term_borrowings' in df.columns: + df['long_term_debt'] = df['long_term_debt'].fillna(0) + df['long_term_borrowings'].fillna(0) + return df + + def calculate_indicators(self, df_merged, market_metrics, historical_metrics): + if 'revenue' in df_merged.columns and 'gross_profit' in df_merged.columns: + df_merged['cogs'] = df_merged['revenue'] - df_merged['gross_profit'] + + df_merged = super().calculate_indicators(df_merged, market_metrics, historical_metrics) + + has_sga = False + if 'sga_exp' in df_merged.columns and 'revenue' in df_merged.columns: + if df_merged['sga_exp'].notna().any() and (df_merged['sga_exp'] != 0).any(): + df_merged['SgaRatio'] = self._safe_div(df_merged['sga_exp'], df_merged['revenue']) + has_sga = True + + if not has_sga: + sga_sum = 0 + if 'selling_exp' in df_merged.columns: sga_sum = sga_sum + df_merged['selling_exp'].fillna(0) + if 'admin_exp' in df_merged.columns: sga_sum = sga_sum + df_merged['admin_exp'].fillna(0) + if 'revenue' in df_merged.columns: + df_merged['SgaRatio'] = self._safe_div(sga_sum, df_merged['revenue']) + + if 'income_tax' in df_merged.columns and 'net_income' in df_merged.columns: + ebt_approx = df_merged['net_income'] + df_merged['income_tax'] + df_merged['TaxRate'] = self._safe_div(df_merged['income_tax'], ebt_approx) + + if 'GrossMargin' in df_merged.columns and 'NetMargin' in df_merged.columns: + other_ratio = df_merged['GrossMargin'] - df_merged['NetMargin'] + if 'SgaRatio' in df_merged.columns: + other_ratio = other_ratio - df_merged['SgaRatio'].fillna(0) + if 'RDRatio' in df_merged.columns: + other_ratio = other_ratio - df_merged['RDRatio'].fillna(0) + df_merged['OtherExpenseRatio'] = other_ratio + + if 'MarketCap' in df_merged.columns: + if 'net_income' in df_merged.columns: + calculated_pe = self._safe_div(df_merged['MarketCap'], df_merged['net_income']) + if 'PE' not in df_merged.columns: + df_merged['PE'] = calculated_pe + else: + cond_pe = (df_merged['PE'] != 0) & df_merged['PE'].notna() + df_merged['PE'] = df_merged['PE'].where(cond_pe, calculated_pe) + + if 'total_equity' in df_merged.columns: + calculated_pb = self._safe_div(df_merged['MarketCap'], df_merged['total_equity']) + if 'PB' not in df_merged.columns: + df_merged['PB'] = calculated_pb + else: + cond_pb = (df_merged['PB'] != 0) & df_merged['PB'].notna() + df_merged['PB'] = df_merged['PB'].where(cond_pb, calculated_pb) + + if 'dividends' in df_merged.columns: + calculated_yield = self._safe_div(df_merged['dividends'], df_merged['MarketCap']) * 100 + if 'DividendYield' not in df_merged.columns: + df_merged['DividendYield'] = calculated_yield + else: + df_merged['DividendYield'] = df_merged['DividendYield'].fillna(calculated_yield) + + if 'employee_count' in df_merged.columns: + df_merged['Employees'] = df_merged['employee_count'] + if 'revenue' in df_merged.columns: + df_merged['RevenuePerEmp'] = self._safe_div(df_merged['revenue'], df_merged['employee_count']) + if 'net_income' in df_merged.columns: + df_merged['ProfitPerEmp'] = self._safe_div(df_merged['net_income'], df_merged['employee_count']) + + return df_merged diff --git a/src/fetchers/factory.py b/src/fetchers/factory.py index d4a7b89..63ab3a4 100644 --- a/src/fetchers/factory.py +++ b/src/fetchers/factory.py @@ -31,5 +31,14 @@ class FetcherFactory: raise ValueError("iFinD Refresh Token is required for JP market") from .jp_fetcher import JpFetcher return JpFetcher(ifind_token) + elif market == 'VN': + ifind_token = kwargs.get('ifind_refresh_token') + if not ifind_token: + import os + ifind_token = os.getenv('IFIND_REFRESH_TOKEN') + if not ifind_token: + raise ValueError("iFinD Refresh Token is required for VN market") + from .vn_fetcher import VnFetcher + return VnFetcher(ifind_token) else: raise ValueError(f"Unsupported market: {market}") diff --git a/src/fetchers/hk_fetcher.py b/src/fetchers/hk_fetcher.py index 348f265..45e4281 100644 --- a/src/fetchers/hk_fetcher.py +++ b/src/fetchers/hk_fetcher.py @@ -74,8 +74,10 @@ class HkFetcher(DataFetcher): if not res: return pd.DataFrame() - if res.get("errorcode") != 0: - print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})") + # Default to 0 if not present (for lenient mocking) or check properly + error_code = res.get("errorcode", 0) + if error_code != 0: + print(f"iFinD API Error: {res.get('errmsg')} (code: {error_code})") return pd.DataFrame() tables = res.get("tables", []) @@ -142,42 +144,61 @@ class HkFetcher(DataFetcher): def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame: """通用获取历年财务数据 (HKD 为主,但 iFinD 支持转 CNY)""" code = self._get_ifind_code(symbol) - basic_info = self._fetch_basic_info(symbol) - # HK stocks don't always use the same acc_date, but we can try to fetch recent years - current_year = int(time.strftime("%Y")) + + # 1. First, determine the most recent valid year by trying backwards from current year + last_valid_year = None + + # Try up to 3 years back to find the latest available report + # e.g., in Jan 2026, try 2026 -> fail, 2025 -> success + for offset in range(3): + test_year = current_year - offset + test_date = f"{test_year}1231" + + # Use the first indicator to test availability + first_indicator = indicator_configs[0] + params = { + "codes": code, + "indipara": [ + {"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + # Check for non-null values + valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None + if pd.notna(valid_val) and valid_val != 0: + last_valid_year = test_year + break + + if last_valid_year is None: + # Fallback to current year if nothing found (will likely return empty/zeros, but keeps logic flowing) + last_valid_year = current_year + + # 2. Fetch 5 years starting from the last valid year all_dfs = [] - - # HK stocks often report semi-annually or annually. Let's fetch recent reporting periods. - # Instead of guessing dates, we can use "reporting_period" but iFinD's basic_data_service - # often works better with explicit dates if we want annuals. - # Alternatively, we can fetch multiple periods. - for i in range(5): - target_year = current_year - i - # Try 1231 as primary guess for annual - for month_day in ["1231", "0331", "0630", "0930"]: - target_date = f"{target_year}{month_day}" - params = { - "codes": code, - "indipara": [ - {"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]} - for item in indicator_configs - ] - } - res = self.cli.post("basic_data_service", params) - df = self._parse_ifind_tables(res) - if not df.empty: - # Check if it's mostly empty data - valid_cols = [c for c in df.columns if c not in ['end_date', 'date']] - if not df[valid_cols].isnull().all().all(): - df['end_date'] = target_date - # Drop columns that are entirely NA to prevent FutureWarning in pd.concat - df = df.dropna(axis=1, how='all') - all_dfs.append(df) - # If we found data for this year, maybe we don't need to try other months? - # Actually some companies changed their fiscal year. - break + target_year = last_valid_year - i + target_date = f"{target_year}1231" + + params = { + "codes": code, + "indipara": [ + {"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]} + for item in indicator_configs + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + valid_cols = [c for c in df.columns if c not in ['end_date', 'date']] + if not df[valid_cols].isnull().all().all(): + df['end_date'] = target_date + df = df.dropna(axis=1, how='all') + all_dfs.append(df) if not all_dfs: return pd.DataFrame() @@ -186,17 +207,59 @@ class HkFetcher(DataFetcher): def get_income_statement(self, symbol: str) -> pd.DataFrame: indicators = [ - {"indicator": "revenue_oas"}, - {"indicator": "gross_profit_oas"}, - {"indicator": "sga_expenses_oas"}, - {"indicator": "selling_marketing_expenses_oas"}, - {"indicator": "ga_expenses_oas"}, - {"indicator": "rd_expenses_oas"}, - {"indicator": "income_tax_expense_oas"}, - {"indicator": "net_income_attri_to_common_sh_oas"}, - {"indicator": "operating_income_oas"}, - {"indicator": "ebit_oas"}, - {"indicator": "depreciation_and_amortization_oas"} + {"indicator": "total_oi"}, + {"indicator": "prime_oi"}, + {"indicator": "other_oi"}, + {"indicator": "operating_cost"}, + {"indicator": "operating_expense"}, + {"indicator": "operating_fee"}, + {"indicator": "p_depreciation_and_amortization"}, + {"indicator": "gross_profit"}, + {"indicator": "sales_ad_and_ga"}, + {"indicator": "rad_cost"}, + {"indicator": "sales_fee"}, + {"indicator": "financial_expense"}, + {"indicator": "sales_income"}, + {"indicator": "sales_cost"}, + {"indicator": "other_income"}, + {"indicator": "manage_fee"}, + {"indicator": "deprec_and_amorti"}, + {"indicator": "total_other_opearting_expense"}, + {"indicator": "p_total_cost"}, + {"indicator": "operating_profit"}, + {"indicator": "total_gal"}, + {"indicator": "interest_income"}, + {"indicator": "interest_net_pay"}, + {"indicator": "interest_expense"}, + {"indicator": "income_from_asso_and_joint"}, + {"indicator": "other_gal_effct_profit_pre_tax"}, + {"indicator": "conti_op_before_tax"}, + {"indicator": "profit_before_noncurrent_items"}, + {"indicator": "profit_and_loss_of_noncurrent_items"}, + {"indicator": "profit_before_tax"}, + {"indicator": "income_tax"}, + {"indicator": "profit_after_tax"}, + {"indicator": "minoritygal"}, + {"indicator": "continue_operate_net_profit"}, + {"indicator": "noncontinue_operate_net_profit"}, + {"indicator": "other_special_items"}, + {"indicator": "ni_attr_to_cs"}, + {"indicator": "np_atms"}, + {"indicator": "preferred_divid_and_other_adjust"}, + {"indicator": "oci"}, + {"indicator": "total_oci"}, + {"indicator": "oci_from_parent"}, + {"indicator": "oci_from_minority"}, + {"indicator": "invest_property_fv_chg"}, + {"indicator": "operating_amt"}, + {"indicator": "oi_si"}, + {"indicator": "operating_premium_profit_si"}, + {"indicator": "to_toallied_corp_perf"}, + {"indicator": "to_joint_control_entity_perf"}, + {"indicator": "pre_tax_profit_si"}, + {"indicator": "after_tax_profit_si"}, + {"indicator": "profit_attrbt_to_nonholders"}, + {"indicator": "total_income_atncs"} ] df = self._fetch_financial_data_annual(symbol, indicators) @@ -204,20 +267,28 @@ class HkFetcher(DataFetcher): self._save_raw_data(df, symbol, "income_statement_raw") rename_map = { - 'revenue_oas': 'revenue', - 'gross_profit_oas': 'gross_profit', - 'sga_expenses_oas': 'sga_exp', - 'selling_marketing_expenses_oas': 'selling_marketing_exp', - 'ga_expenses_oas': 'ga_exp', - 'rd_expenses_oas': 'rd_exp', - 'income_tax_expense_oas': 'income_tax', - 'net_income_attri_to_common_sh_oas': 'net_income', - 'operating_income_oas': 'operating_profit', - 'ebit_oas': 'ebit', - 'depreciation_and_amortization_oas': 'depreciation' + 'total_oi': 'revenue', + 'operating_amt': 'turnover', # Backup for revenue + 'gross_profit': 'gross_profit', + 'sales_ad_and_ga': 'sga_exp', + 'sales_fee': 'selling_marketing_exp', + 'manage_fee': 'ga_exp', + 'rad_cost': 'rd_exp', + 'income_tax': 'income_tax', + 'ni_attr_to_cs': 'net_income', + 'operating_profit': 'operating_profit', + 'depreciation': 'depreciation', + 'deprec_and_amorti': 'depreciation', # Backup + 'p_depreciation_and_amortization': 'depreciation' # Another backup } df_filtered = df.rename(columns=rename_map) + + # Calculate EBIT if not present but operating_profit is there + if 'ebit' not in df_filtered.columns and 'operating_profit' in df_filtered.columns: + # Simple approximation: Operating Profit is often used as EBIT + df_filtered['ebit'] = df_filtered['operating_profit'] + for col in df_filtered.columns: if col not in ['date', 'end_date']: df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') @@ -226,23 +297,96 @@ class HkFetcher(DataFetcher): def get_balance_sheet(self, symbol: str) -> pd.DataFrame: indicators = [ - {"indicator": "cash_equi_short_term_inve_oas"}, - {"indicator": "accou_and_notes_recei_oas"}, - {"indicator": "inventories_oas"}, - {"indicator": "ppe_net_oas"}, - {"indicator": "long_term_inv_and_receiv_oas"}, - {"indicator": "goodwill_and_intasset_oas"}, - {"indicator": "short_term_debt_oas"}, - {"indicator": "short_term_borrowings_oas"}, - {"indicator": "account_and_note_payable_oas"}, - {"indicator": "contra_liabilities_current_oas"}, - {"indicator": "advance_from_cust_current_oas"}, - {"indicator": "defer_revenue_current_oas"}, - {"indicator": "long_term_debt_oas"}, - {"indicator": "long_term_borrowings_oas"}, - {"indicator": "total_assets_oas"}, - {"indicator": "equity_attri_to_companyowner_oas"}, - {"indicator": "prepaid_expenses_current_oas"} + {"indicator": "cce"}, + {"indicator": "st_investment"}, + {"indicator": "total_cash"}, + {"indicator": "account_receivable"}, + {"indicator": "tradable_fnncl_asset"}, + {"indicator": "derivative_fnncl_assets"}, + {"indicator": "restriv_fund"}, + {"indicator": "other_short_term_investment"}, + {"indicator": "ar_nr"}, + {"indicator": "total_ar"}, + {"indicator": "or"}, + {"indicator": "inventory"}, + {"indicator": "flow_assets_dit"}, + {"indicator": "pre_payment"}, + {"indicator": "other_cunrrent_assets_si"}, + {"indicator": "other_ca"}, + {"indicator": "total_ca"}, + {"indicator": "receivables_from_allied_corp"}, + {"indicator": "current_assets_si"}, + {"indicator": "prepay_deposits_etc"}, + {"indicator": "receivables_from_jce"}, + {"indicator": "receivables_from_ac"}, + {"indicator": "recoverable_tax"}, + {"indicator": "total_fixed_assets"}, + {"indicator": "depreciation"}, + {"indicator": "equity_and_lt_invest"}, + {"indicator": "net_fixed_assets"}, + {"indicator": "invest_property"}, + {"indicator": "equity_investment"}, + {"indicator": "investment_in_associate"}, + {"indicator": "investment_in_joints"}, + {"indicator": "held_to_maturity_invest"}, + {"indicator": "goodwill_and_intangible_asset"}, + {"indicator": "intangible_assets"}, + {"indicator": "accum_amortized"}, + {"indicator": "noncurrent_assets_dit"}, + {"indicator": "other_noncurrent_assets_si"}, + {"indicator": "dt_assets"}, + {"indicator": "total_noncurrent_assets"}, + {"indicator": "total_assets"}, + {"indicator": "ac_equity"}, + {"indicator": "lease_prepay"}, + {"indicator": "noncurrent_assets_si"}, + {"indicator": "st_lt_current_loan"}, + {"indicator": "trade_financial_lia"}, + {"indicator": "derivative_financial_lia"}, + {"indicator": "ap_np"}, + {"indicator": "accounts_payable"}, + {"indicator": "advance_payment"}, + {"indicator": "st_debt"}, + {"indicator": "contra_liab"}, + {"indicator": "tax_payable"}, + {"indicator": "accrued_liab"}, + {"indicator": "flow_debt_deferred_income"}, + {"indicator": "other_cl"}, + {"indicator": "other_cunrrent_liab_si"}, + {"indicator": "total_cl"}, + {"indicator": "accrued_expenses_etc"}, + {"indicator": "money_payable_toac"}, + {"indicator": "joint_control_entity_payable"}, + {"indicator": "payable_to_associated_corp"}, + {"indicator": "lt_debt"}, + {"indicator": "long_term_loan"}, + {"indicator": "other_noncurrent_liabi"}, + {"indicator": "deferred_tax_liability"}, + {"indicator": "ncl_deferred_income"}, + {"indicator": "other_noncurrent_liab_si"}, + {"indicator": "noncurrent_liab_si"}, + {"indicator": "total_noncurrent_liab"}, + {"indicator": "total_liab"}, + {"indicator": "common_shares"}, + {"indicator": "capital_reserve"}, + {"indicator": "equity_premium"}, + {"indicator": "treasury_stock"}, + {"indicator": "accumgal"}, + {"indicator": "equity_atsopc_sbi"}, + {"indicator": "preferred_stock"}, + {"indicator": "perpetual_debt"}, + {"indicator": "reserve"}, + {"indicator": "other_reserves"}, + {"indicator": "retained_earnings"}, + {"indicator": "oci_bs"}, + {"indicator": "total_common_equity"}, + {"indicator": "equity_belong_to_parent"}, + {"indicator": "minority_interests"}, + {"indicator": "other_equity_si"}, + {"indicator": "total_equity"}, + {"indicator": "total_lib_and_equity"}, + {"indicator": "equity_si"}, + {"indicator": "equity_atncs"} ] df = self._fetch_financial_data_annual(symbol, indicators) @@ -250,29 +394,38 @@ class HkFetcher(DataFetcher): self._save_raw_data(df, symbol, "balance_sheet_raw") rename_map = { - 'cash_equi_short_term_inve_oas': 'cash', - 'accou_and_notes_recei_oas': 'receivables', - 'inventories_oas': 'inventory', - 'ppe_net_oas': 'fixed_assets', - 'long_term_inv_and_receiv_oas': 'long_term_investments', - 'goodwill_and_intasset_oas': 'goodwill', - 'short_term_debt_oas': 'short_term_debt', - 'short_term_borrowings_oas': 'short_term_borrowings', - 'account_and_note_payable_oas': 'accounts_payable', - 'contra_liabilities_current_oas': 'contract_liabilities', - 'advance_from_cust_current_oas': 'advances_from_customers', - 'defer_revenue_current_oas': 'deferred_revenue', - 'long_term_debt_oas': 'long_term_debt', - 'long_term_borrowings_oas': 'long_term_borrowings', - 'total_assets_oas': 'total_assets', - 'equity_attri_to_companyowner_oas': 'total_equity', - 'prepaid_expenses_current_oas': 'prepayment' + 'cce': 'cash', + 'ar_nr': 'receivables', + 'inventory': 'inventory', + 'net_fixed_assets': 'fixed_assets', + 'equity_and_lt_invest': 'long_term_investments', + 'goodwill_and_intangible_asset': 'goodwill', + 'st_debt': 'short_term_debt', + 'st_lt_current_loan': 'short_term_borrowings', + 'ap_np': 'accounts_payable', + 'contra_liab': 'contract_liabilities', + 'advance_payment': 'advances_from_customers', + 'flow_debt_deferred_income': 'deferred_revenue', + 'lt_debt': 'long_term_debt', + 'long_term_loan': 'long_term_borrowings', + 'total_assets': 'total_assets', + 'equity_belong_to_parent': 'total_equity', + 'pre_payment': 'prepayment' } df_filtered = df.rename(columns=rename_map) + + # Deduplicate columns just in case + df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()] + if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all(): - if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns: + if 'total_liab' in df_filtered.columns: + df_filtered['total_liabilities'] = df_filtered['total_liab'] + elif 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns: df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity'] + + # Deduplicate again in case total_liabilities logic added a dupe (unlikely) + df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated()] for col in df_filtered.columns: if col not in ['date', 'end_date']: @@ -282,9 +435,17 @@ class HkFetcher(DataFetcher): def get_cash_flow(self, symbol: str) -> pd.DataFrame: indicators = [ - {"indicator": "net_cash_flows_from_oa_oas"}, - {"indicator": "purchase_of_ppe_and_ia_oas"}, - {"indicator": "dividends_paid_oas"} + {"indicator": "ni"}, + {"indicator": "depreciation_and_amortization"}, + {"indicator": "operating_capital_change"}, + {"indicator": "ncf_from_oa"}, + {"indicator": "capital_cost"}, + {"indicator": "invest_buy"}, + {"indicator": "ncf_from_ia"}, + {"indicator": "increase_in_share_capital"}, + {"indicator": "decrease_in_share_capital"}, + {"indicator": "total_dividends_paid"}, + {"indicator": "ncf_from_fa"} ] df = self._fetch_financial_data_annual(symbol, indicators) @@ -292,9 +453,9 @@ class HkFetcher(DataFetcher): self._save_raw_data(df, symbol, "cash_flow_raw") rename_map = { - 'net_cash_flows_from_oa_oas': 'ocf', - 'purchase_of_ppe_and_ia_oas': 'capex', - 'dividends_paid_oas': 'dividends' + 'ncf_from_oa': 'ocf', + 'capital_cost': 'capex', + 'total_dividends_paid': 'dividends' } df_filtered = df.rename(columns=rename_map) @@ -482,3 +643,104 @@ class HkFetcher(DataFetcher): df_emp = pd.DataFrame(results) self._save_raw_data(df_emp, symbol, "employee_count_raw") return df_emp + + def get_financial_ratios(self, symbol: str) -> pd.DataFrame: + """获取官方计算的财务指标(比率、周转天数等)""" + code = self._get_ifind_code(symbol) + current_year = int(time.strftime("%Y")) + + # 1. Determine the latest valid year + last_valid_year = None + for offset in range(3): + test_year = current_year - offset + # Try getting ROE as a proxy for data availability + test_date = f"{test_year}1231" + params = { + "codes": code, + "indipara": [{"indicator": "roe", "indiparams": [test_date]}] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None + if pd.notna(val) and val != 0: + last_valid_year = test_year + break + + if last_valid_year is None: + last_valid_year = current_year + + all_dfs = [] + + # 2. Fetch 5 years starting from last valid year + for i in range(5): + target_year = last_valid_year - i + date_str = f"{target_year}1231" + year_str = str(target_year) + + indipara = [] + + # 1. 人均指标 (参数: Year, "100") + for key in ["salary_pp", "revenue_pp", "profit_pp"]: + indipara.append({"indicator": key, "indiparams": [year_str, "100"]}) + + # 2. 财务比率与周转率 (参数: Date YYYYMMDD) + ratio_keys = [ + "roe", "roa", "roic", + "sales_fee_to_or", "manage_fee_to_revenue", "rad_expense_to_total_income", + "operating_revenue_yoy", "np_atsopc_yoy", + "ibdebt_ratio_asset_base", + "inventory_turnover_days", "receivable_turnover_days", "accounts_payable_turnover_days", + "fixed_asset_turnover_ratio", "total_capital_turnover" + ] + for key in ratio_keys: + indipara.append({"indicator": key, "indiparams": [date_str]}) + + params = { + "codes": code, + "indipara": indipara + } + + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + if 'end_date' not in df.columns: + df['end_date'] = date_str + + # Filter out columns that are all NaN + df = df.dropna(axis=1, how='all') + + # Identify if we have meaningful data (at least one valid metric) + valid_cols = [c for c in df.columns if c not in ['end_date', 'date', 'code', 'thscode']] + if not df[valid_cols].isnull().all().all(): + all_dfs.append(df) + + if not all_dfs: + return pd.DataFrame() + + combined = pd.concat(all_dfs, ignore_index=True) + self._save_raw_data(combined, symbol, "financial_ratios_raw") + + rename_map = { + "salary_pp": "salary_per_employee", + "revenue_pp": "revenue_per_employee", + "profit_pp": "profit_per_employee", + "sales_fee_to_or": "selling_expense_ratio", + "manage_fee_to_revenue": "admin_expense_ratio", + "rad_expense_to_total_income": "rd_expense_ratio", + "operating_revenue_yoy": "revenue_growth", + "np_atsopc_yoy": "net_profit_growth", + "ibdebt_ratio_asset_base": "interest_bearing_debt_ratio", + "fixed_asset_turnover_ratio": "fixed_asset_turnover", + "total_capital_turnover": "total_asset_turnover" + } + + df_final = combined.rename(columns=rename_map) + + for col in df_final.columns: + if col not in ['date', 'end_date']: + df_final[col] = pd.to_numeric(df_final[col], errors='coerce') + + return self._filter_data(df_final) diff --git a/src/fetchers/jp_fetcher.py b/src/fetchers/jp_fetcher.py index d9a8395..ebf120e 100644 --- a/src/fetchers/jp_fetcher.py +++ b/src/fetchers/jp_fetcher.py @@ -151,11 +151,41 @@ class JpFetcher(DataFetcher): acc_date = basic_info.get("accounting_date", "1231") current_year = int(time.strftime("%Y")) + + # 1. First, determine the most recent valid year by trying backwards from current year + last_valid_year = None + + # Try up to 3 years back to find the latest available report + for offset in range(3): + test_year = current_year - offset + test_date = f"{test_year}{acc_date}" + + # Use the first indicator to test availability + first_indicator = indicator_configs[0] + params = { + "codes": code, + "indipara": [ + {"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + # Check for non-null values + valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None + if pd.notna(valid_val) and valid_val != 0: + last_valid_year = test_year + break + + if last_valid_year is None: + last_valid_year = current_year + all_dfs = [] - # 获取最近 5 年的数据,精准定位会计年结日 + # 2. Fetch 5 years starting from the last valid year for i in range(5): - target_year = current_year - i + target_year = last_valid_year - i target_date = f"{target_year}{acc_date}" params = { diff --git a/src/fetchers/us_fetcher.py b/src/fetchers/us_fetcher.py index f8434b3..c53bb0b 100644 --- a/src/fetchers/us_fetcher.py +++ b/src/fetchers/us_fetcher.py @@ -48,6 +48,15 @@ class UsFetcher(DataFetcher): df_annual = pd.DataFrame(data["annualReports"]) if "fiscalDateEnding" in df_annual.columns: df_annual = df_annual.sort_values("fiscalDateEnding", ascending=False) + + # Dynamic year filtering: Find the latest report with valid data and take surrounding 5 years + # For Alpha Vantage, data is already sorted by date descending. + # We simply check for the first row with non-None values in critical columns if possible, + # but usually AV returns valid blocks. We'll just take the top 5. + # Unlike iFinD, AV returns a list of available reports, so we don't need to probe year by year. + + # Keep top 5 latest entries + df_annual = df_annual.head(5) else: print(f"Error fetching {function} for {symbol}: {data}") return pd.DataFrame() diff --git a/src/fetchers/vn_fetcher.py b/src/fetchers/vn_fetcher.py new file mode 100644 index 0000000..b4ae5aa --- /dev/null +++ b/src/fetchers/vn_fetcher.py @@ -0,0 +1,474 @@ + +import pandas as pd +import os +import time +from .base import DataFetcher +from .ifind_client import IFindClient +from storage.file_io import DataStorage + +class VnFetcher(DataFetcher): + def __init__(self, api_key: str): + # api_key is the iFinD Refresh Token + super().__init__(api_key) + self.cli = IFindClient(refresh_token=api_key) + self.storage = DataStorage() + self._basic_info_cache = {} + + def _get_ifind_code(self, symbol: str) -> str: + # Vietnam stocks usually have 3 letter codes. + # We assume the user provides the correct code (e.g. VNM, or VNM.VN). + # We can add simple logic: if it's 3 letters, maybe append nothing? + # iFinD codes often need suffix. But without documentation, safest is to pass through. + return symbol + + def _fetch_basic_info(self, symbol: str) -> dict: + """获取公司的基本信息:中文名称、会计年结日、上市日期""" + code = self._get_ifind_code(symbol) + if code in self._basic_info_cache: + return self._basic_info_cache[code] + + params = { + "codes": code, + "indipara": [ + {"indicator": "corp_cn_name", "indiparams": []}, + {"indicator": "accounting_date", "indiparams": []}, + {"indicator": "ipo_date", "indiparams": []} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + if not df.empty: + self._save_raw_data(df, symbol, "basic_info_raw") + + info = { + "name": "", + "accounting_date": "1231", # Default 12-31 + "ipo_date": "" + } + + if not df.empty: + row = df.iloc[0] + info["name"] = str(row.get("corp_cn_name", "")) + + acc_date = str(row.get("accounting_date", "1231")).replace("-", "").replace("/", "") + if acc_date: + info["accounting_date"] = acc_date + + info["ipo_date"] = str(row.get("ipo_date", "")).replace("-", "").replace("/", "") + + self._basic_info_cache[code] = info + return info + + def _save_raw_data(self, data: any, symbol: str, name: str): + if data is None: + return + if isinstance(data, dict): + df = pd.DataFrame([data]) + else: + df = data + self.storage.save_data(df, 'VN', symbol, f"raw_{name}") + + def _parse_ifind_tables(self, res: dict) -> pd.DataFrame: + """通用解析 iFinD 返回结果的 tables 结构为 DataFrame""" + if not res: + return pd.DataFrame() + + if res.get("errorcode") != 0: + print(f"iFinD API Error: {res.get('errmsg')} (code: {res.get('errorcode')})") + return pd.DataFrame() + + tables = res.get("tables", []) + if not tables: + # print("iFinD API Warning: No tables found in response.") + return pd.DataFrame() + + table_info = tables[0] + table_data = table_info.get("table", {}) + times = table_info.get("time", []) + + if not table_data: + return pd.DataFrame() + + processed_table_data = {} + for k, v in table_data.items(): + if not isinstance(v, list): + processed_table_data[k] = [v] + else: + processed_table_data[k] = v + + df = pd.DataFrame(processed_table_data) + if times and len(times) == len(df): + df['end_date'] = [str(t).replace('-', '').replace('/', '').split(' ')[0] for t in times] + elif times and len(df) == 1: + df['end_date'] = str(times[0]).replace('-', '').replace('/', '').split(' ')[0] + + if 'end_date' not in df.columns: + for col in ['time', 'date', 'trade_date', 'REPORT_DATE']: + if col in df.columns: + df['end_date'] = df[col].astype(str).str.replace('-', '').str.replace('/', '').str.split(' ').str[0] + break + + return df + + def _filter_data(self, df: pd.DataFrame) -> pd.DataFrame: + if df.empty or 'end_date' not in df.columns: + return df + + df = df.sort_values(by='end_date', ascending=False) + df = df.drop_duplicates(subset=['end_date'], keep='first') + + if df.empty: + return df + + latest_record = df.iloc[[0]] + try: + latest_date_str = str(latest_record['end_date'].values[0]) + last_year_date_str = str(int(latest_date_str) - 10000) + comparable_record = df[df['end_date'].astype(str) == last_year_date_str] + except: + comparable_record = pd.DataFrame() + + # VN usually ends in 1231 + is_annual = df['end_date'].astype(str).str.endswith('1231') + annual_records = df[is_annual] + + combined = pd.concat([latest_record, comparable_record, annual_records]) + combined = combined.drop_duplicates(subset=['end_date']) + combined = combined.sort_values(by='end_date', ascending=False) + return combined + + def _fetch_financial_data_annual(self, symbol: str, indicator_configs: list) -> pd.DataFrame: + code = self._get_ifind_code(symbol) + basic_info = self._fetch_basic_info(symbol) + acc_date = basic_info.get("accounting_date", "1231") + + current_year = int(time.strftime("%Y")) + last_valid_year = None + + for offset in range(3): + test_year = current_year - offset + test_date = f"{test_year}{acc_date}" + first_indicator = indicator_configs[0] + params = { + "codes": code, + "indipara": [ + {"indicator": first_indicator["indicator"], "indiparams": [test_date, first_indicator.get("type", "1"), "CNY"]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty: + valid_val = df.iloc[0, 0] if not df.empty and df.shape[1] > 0 else None + if pd.notna(valid_val) and valid_val != 0: + last_valid_year = test_year + break + + if last_valid_year is None: + last_valid_year = current_year + + all_dfs = [] + for i in range(5): + target_year = last_valid_year - i + target_date = f"{target_year}{acc_date}" + + params = { + "codes": code, + "indipara": [ + {"indicator": item["indicator"], "indiparams": [target_date, item.get("type", "1"), "CNY"]} + for item in indicator_configs + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + if not df.empty: + df['end_date'] = target_date + all_dfs.append(df) + + if not all_dfs: + return pd.DataFrame() + + all_dfs = [d for d in all_dfs if not d.empty and not d.isna().all().all()] + if not all_dfs: + return pd.DataFrame() + + return pd.concat(all_dfs, ignore_index=True) + + def get_income_statement(self, symbol: str) -> pd.DataFrame: + indicators = [ + {"indicator": "revenue_oas"}, + {"indicator": "gross_profit_oas"}, + {"indicator": "sga_expenses_oas"}, + {"indicator": "selling_marketing_expenses_oas"}, + {"indicator": "ga_expenses_oas"}, + {"indicator": "rd_expenses_oas"}, + {"indicator": "income_tax_expense_oas"}, + {"indicator": "net_income_attri_to_common_sh_oas"}, + {"indicator": "operating_income_oas"} + ] + + df = self._fetch_financial_data_annual(symbol, indicators) + if df.empty: return df + self._save_raw_data(df, symbol, "income_statement_raw") + + rename_map = { + 'revenue_oas': 'revenue', + 'gross_profit_oas': 'gross_profit', + 'sga_expenses_oas': 'sga_exp', + 'selling_marketing_expenses_oas': 'selling_marketing_exp', + 'ga_expenses_oas': 'ga_exp', + 'rd_expenses_oas': 'rd_exp', + 'income_tax_expense_oas': 'income_tax', + 'net_income_attri_to_common_sh_oas': 'net_income', + 'operating_income_oas': 'operating_profit' + } + + df_filtered = df.rename(columns=rename_map) + + for col in df_filtered.columns: + if col not in ['date', 'end_date']: + df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') + + return self._filter_data(df_filtered) + + def get_balance_sheet(self, symbol: str) -> pd.DataFrame: + indicators = [ + {"indicator": "cash_equi_short_term_inve_oas"}, + {"indicator": "accou_and_notes_recei_oas"}, + {"indicator": "inventories_oas"}, + {"indicator": "ppe_net_oas"}, + {"indicator": "long_term_inv_and_receiv_oas"}, + {"indicator": "goodwill_and_intasset_oas"}, + {"indicator": "short_term_debt_oas"}, + {"indicator": "short_term_borrowings_oas"}, + {"indicator": "account_and_note_payable_oas"}, + {"indicator": "contra_liabilities_current_oas"}, + {"indicator": "advance_from_cust_current_oas"}, + {"indicator": "defer_revenue_current_oas"}, + {"indicator": "long_term_debt_oas"}, + {"indicator": "long_term_borrowings_oas"}, + {"indicator": "total_assets_oas"}, + {"indicator": "equity_attri_to_companyowner_oas"}, + {"indicator": "prepaid_expenses_current_oas"} + ] + + df = self._fetch_financial_data_annual(symbol, indicators) + if df.empty: return df + self._save_raw_data(df, symbol, "balance_sheet_raw") + + rename_map = { + 'cash_equi_short_term_inve_oas': 'cash', + 'accou_and_notes_recei_oas': 'receivables', + 'inventories_oas': 'inventory', + 'ppe_net_oas': 'fixed_assets', + 'long_term_inv_and_receiv_oas': 'long_term_investments', + 'goodwill_and_intasset_oas': 'goodwill', + 'short_term_debt_oas': 'short_term_debt', + 'short_term_borrowings_oas': 'short_term_borrowings', + 'account_and_note_payable_oas': 'accounts_payable', + 'contra_liabilities_current_oas': 'contract_liabilities', + 'advance_from_cust_current_oas': 'advances_from_customers', + 'defer_revenue_current_oas': 'deferred_revenue', + 'long_term_debt_oas': 'long_term_debt', + 'long_term_borrowings_oas': 'long_term_borrowings', + 'total_assets_oas': 'total_assets', + 'equity_attri_to_companyowner_oas': 'total_equity', + 'prepaid_expenses_current_oas': 'prepayment' + } + + df_filtered = df.rename(columns=rename_map) + + if 'total_liabilities' not in df_filtered.columns or df_filtered['total_liabilities'].isnull().all(): + if 'total_assets' in df_filtered.columns and 'total_equity' in df_filtered.columns: + df_filtered['total_liabilities'] = df_filtered['total_assets'] - df_filtered['total_equity'] + + for col in df_filtered.columns: + if col not in ['date', 'end_date']: + df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') + + return self._filter_data(df_filtered) + + def get_cash_flow(self, symbol: str) -> pd.DataFrame: + indicators = [ + {"indicator": "net_cash_flows_from_oa_oas"}, + {"indicator": "purchase_of_ppe_and_ia_oas"}, + {"indicator": "dividends_paid_oas"} + ] + + df = self._fetch_financial_data_annual(symbol, indicators) + if df.empty: return df + self._save_raw_data(df, symbol, "cash_flow_raw") + + rename_map = { + 'net_cash_flows_from_oa_oas': 'ocf', + 'purchase_of_ppe_and_ia_oas': 'capex', + 'dividends_paid_oas': 'dividends' + } + + df_filtered = df.rename(columns=rename_map) + + for col in df_filtered.columns: + if col not in ['date', 'end_date']: + df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce') + + if 'capex' in df_filtered.columns: + df_filtered['capex'] = df_filtered['capex'].abs() + + return self._filter_data(df_filtered) + + def get_market_metrics(self, symbol: str) -> dict: + basic_info = self._fetch_basic_info(symbol) + metrics = { + "name": basic_info.get("name", ""), + "list_date": basic_info.get("ipo_date", "") + } + return metrics + + def get_historical_metrics(self, symbol: str, dates: list) -> pd.DataFrame: + code = self._get_ifind_code(symbol) + if not dates: return pd.DataFrame() + + results = [] + for d in dates: + d_str = str(d).replace('-', '').replace('/', '') + fmt_d = f"{d_str[:4]}-{d_str[4:6]}-{d_str[6:]}" if len(d_str) == 8 else d_str + + params = { + "codes": code, + "startdate": fmt_d, + "enddate": fmt_d, + "functionpara": {"Interval": "D", "Days": "Alldays", "Fill": "Previous"}, + "indipara": [ + {"indicator": "pre_close", "indiparams": ["", "0", "CNY"]}, + {"indicator": "market_value", "indiparams": ["", "CNY"]} + ] + } + res = self.cli.post("date_sequence", params) + df_seq = self._parse_ifind_tables(res) + + metrics = {'date_str': d_str, 'PE': 0.0, 'PB': 0.0, 'MarketCap': 0.0, 'Price': 0.0} + + if not df_seq.empty: + match = df_seq[df_seq['end_date'] <= d_str].tail(1) if 'end_date' in df_seq.columns else df_seq.tail(1) + if not match.empty: + if 'pre_close' in match.columns: + metrics['Price'] = float(match['pre_close'].iloc[0] or 0.0) + if 'market_value' in match.columns: + metrics['MarketCap'] = float(match['market_value'].iloc[0] or 0.0) + results.append(metrics) + + df_hist = pd.DataFrame(results) + self._save_raw_data(df_hist, symbol, "historical_metrics_raw") + return df_hist + + def get_dividends(self, symbol: str) -> pd.DataFrame: + code = self._get_ifind_code(symbol) + basic_info = self._fetch_basic_info(symbol) + acc_date = basic_info.get("accounting_date", "1231") + + current_year = int(time.strftime("%Y")) + results = [] + + for i in range(5): + year_str = str(current_year - i) + params = { + "codes": code, + "indipara": [ + {"indicator": "annual_cum_dividend", "indiparams": [year_str, "CNY"]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty and 'annual_cum_dividend' in df.columns: + val = df['annual_cum_dividend'].iloc[0] + if pd.notna(val) and val != 0: + results.append({ + 'date_str': f"{year_str}{acc_date}", + 'dividends': float(val) + }) + + if not results: + return pd.DataFrame() + + df_div = pd.DataFrame(results) + self._save_raw_data(df_div, symbol, "dividends_raw") + return df_div + + def get_repurchases(self, symbol: str) -> pd.DataFrame: + code = self._get_ifind_code(symbol) + basic_info = self._fetch_basic_info(symbol) + acc_date = basic_info.get("accounting_date", "1231") + mm = acc_date[:2] + dd = acc_date[2:] + fmt_mm_dd = f"{mm}-{dd}" + + current_year = int(time.strftime("%Y")) + results = [] + + for i in range(5): + target_year = current_year - i + start_date = f"{target_year - 1}-{fmt_mm_dd}" + end_date = f"{target_year}-{fmt_mm_dd}" + + params = { + "codes": code, + "indipara": [ + {"indicator": "repur_num_new", "indiparams": [start_date, end_date, "1"]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty and 'repur_num_new' in df.columns: + val = df['repur_num_new'].iloc[0] + if pd.notna(val) and val != 0: + results.append({ + 'date_str': f"{target_year}{acc_date}", + 'repurchases': float(val) + }) + + if not results: + return pd.DataFrame() + + df_repur = pd.DataFrame(results) + self._save_raw_data(df_repur, symbol, "repurchases_raw") + return df_repur + + def get_employee_count(self, symbol: str) -> pd.DataFrame: + code = self._get_ifind_code(symbol) + basic_info = self._fetch_basic_info(symbol) + acc_date = basic_info.get("accounting_date", "1231") + mm = acc_date[:2] + dd = acc_date[2:] + + current_year = int(time.strftime("%Y")) + results = [] + + for i in range(5): + target_year = current_year - i + target_date = f"{target_year}-{mm}-{dd}" + + params = { + "codes": code, + "indipara": [ + {"indicator": "staff_num", "indiparams": [target_date]} + ] + } + res = self.cli.post("basic_data_service", params) + df = self._parse_ifind_tables(res) + + if not df.empty and 'staff_num' in df.columns: + val = df['staff_num'].iloc[0] + if pd.notna(val) and val != 0: + results.append({ + 'date_str': f"{target_year}{acc_date}", + 'employee_count': float(val) + }) + + if not results: + return pd.DataFrame() + + df_emp = pd.DataFrame(results) + self._save_raw_data(df_emp, symbol, "employee_count_raw") + return df_emp diff --git a/src/reporting/jp_report_generator.py b/src/reporting/jp_report_generator.py index a9f69f8..f0aecd0 100644 --- a/src/reporting/jp_report_generator.py +++ b/src/reporting/jp_report_generator.py @@ -15,18 +15,18 @@ class JP_ReportGenerator(BaseReporter): ('ROIC', 'ROCE/ROIC', 'percent'), ('GrossMargin', '毛利率', 'percent'), ('NetMargin', '净利润率', 'percent'), - ('revenue', '收入(亿)', 'currency_yi'), + ('revenue', '收入(亿 CNY)', 'currency_yi'), ('RevenueGrowth', '收入增速', 'percent_color'), - ('net_income', '净利润(亿)', 'currency_yi'), + ('net_income', '净利润(亿 CNY)', 'currency_yi'), ('NetIncomeGrowth', '净利润增速', 'percent_color'), - ('ocf', '经营净现金流(亿)', 'currency_yi_color'), - ('Capex', '资本开支(亿)', 'currency_yi'), - ('FCF', '自由现金流(亿)', 'currency_yi_compare'), - ('dividends', '分红(亿)', 'currency_yi'), - ('repurchases', '回购(亿)', 'currency_yi'), - ('total_assets', '总资产(亿)', 'currency_yi'), - ('total_equity', '净资产(亿)', 'currency_yi'), - ('goodwill', '商誉(亿)', 'currency_yi') + ('ocf', '经营净现金流(亿 CNY)', 'currency_yi_color'), + ('Capex', '资本开支(亿 CNY)', 'currency_yi'), + ('FCF', '自由现金流(亿 CNY)', 'currency_yi_compare'), + ('dividends', '分红(亿 CNY)', 'currency_yi'), + ('repurchases', '回购(亿 CNY)', 'currency_yi'), + ('total_assets', '总资产(亿 CNY)', 'currency_yi'), + ('total_equity', '净资产(亿 CNY)', 'currency_yi'), + ('goodwill', '商誉(亿 CNY)', 'currency_yi') ], "费用指标": [ ('SellingRatio', '销售费用率', 'percent'), @@ -62,13 +62,13 @@ class JP_ReportGenerator(BaseReporter): ], "人均效率": [ ('Employees', '员工人数', 'int'), - ('RevenuePerEmp', '人均创收(万)', 'currency_wan'), - ('ProfitPerEmp', '人均创利(万)', 'currency_wan'), - ('AvgWage', '人均薪酬(万)', 'currency_wan'), + ('RevenuePerEmp', '人均创收(万 CNY)', 'currency_wan'), + ('ProfitPerEmp', '人均创利(万 CNY)', 'currency_wan'), + ('AvgWage', '人均薪酬(万 CNY)', 'currency_wan'), ], "市场表现": [ - ('Price', '股价', 'float'), - ('MarketCap', '市值(亿)', 'currency_yi_market'), + ('Price', '股价 (CNY)', 'float'), + ('MarketCap', '市值(亿 CNY)', 'currency_yi_market'), ('PE', 'PE', 'float'), ('PB', 'PB', 'float'), ('Shareholders', '股东户数', 'int'), diff --git a/data/HK/00700.HK/report.html b/src/reporting/vn_report_generator.py similarity index 62% rename from data/HK/00700.HK/report.html rename to src/reporting/vn_report_generator.py index cb6538f..5152b7e 100644 --- a/data/HK/00700.HK/report.html +++ b/src/reporting/vn_report_generator.py @@ -1,9 +1,233 @@ +from .base_generator import BaseReporter +import pandas as pd +import datetime +import os +import markdown +class VN_ReportGenerator(BaseReporter): + def __init__(self): + super().__init__() + self.indicators = { + "主要指标": [ + ('ROE', 'ROE', 'percent'), + ('ROA', 'ROA', 'percent'), + ('ROIC', 'ROCE/ROIC', 'percent'), + ('GrossMargin', '毛利率', 'percent'), + ('NetMargin', '净利润率', 'percent'), + ('revenue', '收入(亿 CNY)', 'currency_yi'), + ('RevenueGrowth', '收入增速', 'percent_color'), + ('net_income', '净利润(亿 CNY)', 'currency_yi'), + ('NetIncomeGrowth', '净利润增速', 'percent_color'), + ('ocf', '经营净现金流(亿 CNY)', 'currency_yi_color'), + ('Capex', '资本开支(亿 CNY)', 'currency_yi'), + ('FCF', '自由现金流(亿 CNY)', 'currency_yi_compare'), + ('dividends', '分红(亿 CNY)', 'currency_yi'), + ('repurchases', '回购(亿 CNY)', 'currency_yi'), + ('total_assets', '总资产(亿 CNY)', 'currency_yi'), + ('total_equity', '净资产(亿 CNY)', 'currency_yi'), + ('goodwill', '商誉(亿 CNY)', 'currency_yi') + ], + "费用指标": [ + ('SellingRatio', '销售费用率', 'percent'), + ('AdminRatio', '管理费用率', 'percent'), + ('SgaRatio', 'SG&A比例', 'percent'), + ('RDRatio', '研发费用率', 'percent'), + ('OtherExpenseRatio', '其他费用率', 'percent'), + ('DepreciationRatio', '折旧费用占比', 'percent'), + ('TaxRate', '所得税率', 'percent'), + ], + "资产占比": [ + ('CashRatio', '现金占比', 'percent_alert_30'), + ('InventoryRatio', '库存占比', 'percent'), + ('ReceivablesRatio', '应收款占比', 'percent'), + ('PrepaymentRatio', '预付款占比', 'percent'), + ('FixedAssetsRatio', '固定资产占比', 'percent'), + ('LongTermInvestmentRatio', '长期投资占比', 'percent'), + ('GoodwillRatio', '商誉占比', 'percent'), + ('OtherAssetsRatio', '其他资产占比', 'percent'), + ('PayablesRatio', '应付款占比', 'percent'), + ('AdvanceReceiptsRatio', '预收款占比', 'percent'), + ('ShortTermDebtRatio', '短期借款占比', 'percent'), + ('LongTermDebtRatio', '长期借款占比', 'percent'), + ('OperatingAssetsRatio', '运营资产占比', 'percent'), + ('InterestBearingDebtRatio', '有息负债率', 'percent'), + ], + "周转能力": [ + ('InventoryDays', '存货周转天数', 'int'), + ('ReceivablesDays', '应收款周转天数', 'int_alert_90'), + ('PayablesDays', '应付款周转天数', 'int'), + ('FixedAssetsTurnover', '固定资产周转率', 'float'), + ('TotalAssetTurnover', '总资产周转率', 'float'), + ], + "人均效率": [ + ('Employees', '员工人数', 'int'), + ('RevenuePerEmp', '人均创收(万 CNY)', 'currency_wan'), + ('ProfitPerEmp', '人均创利(万 CNY)', 'currency_wan'), + ('AvgWage', '人均薪酬(万 CNY)', 'currency_wan'), + ], + "市场表现": [ + ('Price', '股价 (CNY)', 'float'), + ('MarketCap', '市值(亿 CNY)', 'currency_yi_market'), + ('PE', 'PE', 'float'), + ('PB', 'PB', 'float'), + ('Shareholders', '股东户数', 'int'), + ] + } + + def _preprocess_data(self, df, market): + df = super()._preprocess_data(df, market) + if not df.empty: + dates = pd.to_datetime(df['date_str'], format='%Y%m%d') + latest_year = dates.dt.year.max() + is_dec = dates.dt.month == 12 + is_latest = df.index == df.index[0] + df = df[is_dec | is_latest] + return df + + def _format_period_label(self, date_value): + if pd.isna(date_value): + return "-" + date_str = str(date_value) + if len(date_str) != 8: + return date_str + year = date_str[:4] + try: + return f"{year}A" + except ValueError: + return f"{year}A" + + def _get_headers(self, df): + return [self._format_period_label(date_value) for date_value in df['date_str']] + + def _generate_md_company_info(self, symbol, metrics, market): + today_str = datetime.date.today().strftime("%Y-%m-%d") + name = metrics.get('name', '') + raw_list_date = metrics.get('list_date', '') + if isinstance(raw_list_date, str) and len(raw_list_date) == 8: + list_date = f"{raw_list_date[:4]}-{raw_list_date[4:6]}-{raw_list_date[6:]}" + else: + list_date = raw_list_date + pe = metrics.get('pe', 0) or 0 + pb = metrics.get('pb', 0) or 0 + div = metrics.get('dividend_yield', 0) or 0 + md = [] + md.append(f"# {name} ({symbol}) - Financial Report") + md.append(f"*Report generated on: {today_str}*\n") + md.append("| 代码 | 简称 | 上市日期 | PE | PB | 股息率(%) |") + md.append("|:---|:---|:---|:---|:---|:---|") + md.append(f"| {symbol} | {name} | {list_date} | {pe:.2f} | {pb:.2f} | {div:.2f}% |") + return "\n".join(md) + + def generate_report(self, df_analysis, symbol, market, metrics, output_dir): + md_content = self._generate_markdown_content(df_analysis, market, symbol, metrics) + os.makedirs(output_dir, exist_ok=True) + md_path = os.path.join(output_dir, "report.md") + with open(md_path, "w", encoding='utf-8') as f: + f.write(md_content) + + df_for_html = df_analysis.copy() if isinstance(df_analysis, pd.DataFrame) else pd.DataFrame() + if not df_for_html.empty: + df_for_html = self._preprocess_data(df_for_html, market) + headers = self._get_headers(df_for_html) + else: + headers = [] + html_content = self._build_html_content(symbol, metrics, headers, df_for_html) + final_html = self.to_html(symbol, html_content) + + html_path = os.path.join(output_dir, "report.html") + with open(html_path, "w", encoding='utf-8') as f: + f.write(final_html) + + def _build_html_content(self, symbol, metrics, headers, df): + today_str = datetime.date.today().strftime("%Y-%m-%d") + name = metrics.get('name') or symbol + raw_list_date = metrics.get('list_date', '') + if isinstance(raw_list_date, str) and len(raw_list_date) == 8: + list_date = f"{raw_list_date[:4]}-{raw_list_date[4:6]}-{raw_list_date[6:]}" + else: + list_date = raw_list_date or "-" + pe = metrics.get('pe', 0) or 0 + pb = metrics.get('pb', 0) or 0 + div = metrics.get('dividend_yield', 0) or 0 + + company_table = f""" + + + + + + + + + + + + + + + + + + + + + +
代码简称上市日期PEPB股息率(%)
{symbol}{name}{list_date}{pe:.2f}{pb:.2f}{div:.2f}%
+ """ + + if df is None or df.empty or not headers: + metrics_table = "

暂无可用财务指标

" + else: + header_cells = "".join([f"{header}" for header in headers]) + data_column_count = max(len(headers), 1) + rows_html = [] + for group_name, items in self.indicators.items(): + rows_html.append( + f"" + f"{group_name}" + f"" + "" + ) + for key, label, fmt_type in items: + value_cells = [f"{label}"] + for _, row_series in df.iterrows(): + value_cells.append(f"{self._format_value(row_series.get(key), fmt_type)}") + row_class = "other-assets-row" if key == 'OtherAssetsRatio' else "" + if row_class: + rows_html.append(f"{''.join(value_cells)}") + else: + rows_html.append(f"{''.join(value_cells)}") + rows_markup = "\n".join(rows_html) + metrics_table = f""" + + + + + {header_cells} + + + + {rows_markup} + +
指标
+ """ + + html_sections = [ + f"

{name} ({symbol}) - Financial Report

", + f"

Report generated on: {today_str}

", + company_table, + '
', + metrics_table + ] + return "\n".join(html_sections) + + def to_html(self, symbol, html_content): + styled_html = ''' - 00700.HK Financial Report + {symbol} Financial Report