Compare commits
No commits in common. "main" and "v0.1.0" have entirely different histories.
@ -1,51 +0,0 @@
|
||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2024"
|
||||
rust-version = "1.63"
|
||||
name = "thread_local"
|
||||
version = "1.1.9"
|
||||
authors = ["Amanieu d'Antras <amanieu@gmail.com>"]
|
||||
build = false
|
||||
autolib = false
|
||||
autobins = false
|
||||
autoexamples = false
|
||||
autotests = false
|
||||
autobenches = false
|
||||
description = "Per-object thread-local storage"
|
||||
documentation = "https://docs.rs/thread_local/"
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
"thread_local",
|
||||
"concurrent",
|
||||
"thread",
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/Amanieu/thread_local-rs"
|
||||
|
||||
[features]
|
||||
nightly = []
|
||||
|
||||
[lib]
|
||||
name = "thread_local"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bench]]
|
||||
name = "thread_local"
|
||||
path = "benches/thread_local.rs"
|
||||
harness = false
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0.0"
|
||||
|
||||
[dev-dependencies.criterion]
|
||||
version = "0.5.1"
|
||||
@ -1,39 +0,0 @@
|
||||
# VCS
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# Editor/IDE
|
||||
.vscode
|
||||
.idea
|
||||
.DS_Store
|
||||
|
||||
# Node/Next.js
|
||||
frontend/node_modules
|
||||
frontend/.next
|
||||
**/node_modules
|
||||
|
||||
# Rust build artifacts
|
||||
target
|
||||
**/target
|
||||
|
||||
# Python/build caches
|
||||
__pycache__
|
||||
*.pyc
|
||||
|
||||
# Large reference/resources not needed in images
|
||||
# ref/ is usually ignored, but we need service_kit_mirror for build context
|
||||
# We use exclusion pattern (!) to allow specific subdirectories
|
||||
ref/*
|
||||
!ref/service_kit_mirror
|
||||
archive/
|
||||
docs/
|
||||
|
||||
# Logs/temp
|
||||
*.log
|
||||
tmp/
|
||||
temp/
|
||||
.cache
|
||||
|
||||
# Docker compose override (optional)
|
||||
docker-compose.override.yml
|
||||
|
||||
30
.gitignore
vendored
30
.gitignore
vendored
@ -1,26 +1,3 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.venv/
|
||||
.pytest_cache/
|
||||
|
||||
# Node
|
||||
node_modules/
|
||||
frontend/node_modules/
|
||||
services/**/node_modules/
|
||||
|
||||
# Env & local
|
||||
.env
|
||||
.env.*
|
||||
.DS_Store
|
||||
|
||||
# Build artifacts
|
||||
dist/
|
||||
build/
|
||||
ref/
|
||||
# Binaries
|
||||
portwardenc-amd64
|
||||
|
||||
# ===== 通用文件 =====
|
||||
# 操作系统生成的文件
|
||||
.DS_Store
|
||||
@ -44,11 +21,6 @@ Thumbs.db
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# 允许提交文档日志(覆盖上面的通配忽略)
|
||||
!docs/logs/
|
||||
!docs/logs/*.md
|
||||
!docs/*.md
|
||||
|
||||
# 临时文件
|
||||
*.tmp
|
||||
*.temp
|
||||
@ -437,4 +409,4 @@ fixtures/generated/
|
||||
node_modules/.cache/jest/
|
||||
|
||||
# 外部参考资料
|
||||
Reference/
|
||||
Reference/
|
||||
@ -1,6 +0,0 @@
|
||||
# Ignore Rust source changes to prevent Tilt from rebuilding/restarting containers.
|
||||
# We rely on cargo-watch inside the container for hot reload (via volume mounts).
|
||||
**/*.rs
|
||||
**/Cargo.toml
|
||||
**/Cargo.lock
|
||||
|
||||
5712
Cargo.lock
generated
5712
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
34
Cargo.toml
34
Cargo.toml
@ -1,34 +0,0 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"services/alphavantage-provider-service",
|
||||
"services/api-gateway",
|
||||
"services/common-contracts",
|
||||
"services/data-persistence-service",
|
||||
"services/finnhub-provider-service",
|
||||
"services/mock-provider-service",
|
||||
"services/report-generator-service",
|
||||
"services/tushare-provider-service",
|
||||
"services/workflow-orchestrator-service",
|
||||
"services/yfinance-provider-service",
|
||||
"crates/workflow-context",
|
||||
"tests/end-to-end",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
version = "0.1.0"
|
||||
authors = ["Lv, Qi <lvsoft@gmail.com>"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/lvsoft/Fundamental_Analysis"
|
||||
homepage = "https://github.com/lvsoft/Fundamental_Analysis"
|
||||
readme = "README.md"
|
||||
|
||||
[workspace.dependencies]
|
||||
rmcp = "0.9.1"
|
||||
rmcp-macros = "0.9.1"
|
||||
|
||||
[patch.crates-io]
|
||||
service_kit = { path = "ref/service_kit_mirror/service_kit/service_kit" }
|
||||
service-kit-macros = { path = "ref/service_kit_mirror/service_kit/service_kit/service-kit-macros" }
|
||||
|
||||
@ -1,23 +0,0 @@
|
||||
# 加载生产环境配置
|
||||
docker_compose('docker-compose.prod.yml')
|
||||
|
||||
# 定义服务列表
|
||||
# 这些服务涉及到代码编译(Release 模式)或构建(前端),过程较慢
|
||||
# 我们将它们设置为手动触发模式,避免开发过程中意外修改文件导致自动触发漫长的重构建
|
||||
services = [
|
||||
'data-persistence-service',
|
||||
'api-gateway',
|
||||
'mock-provider-service',
|
||||
'alphavantage-provider-service',
|
||||
'tushare-provider-service',
|
||||
'finnhub-provider-service',
|
||||
'yfinance-provider-service',
|
||||
'report-generator-service',
|
||||
'workflow-orchestrator-service',
|
||||
'frontend'
|
||||
]
|
||||
|
||||
# 遍历设置触发模式为手动 (Manual)
|
||||
for name in services:
|
||||
dc_resource(name, trigger_mode=TRIGGER_MODE_MANUAL)
|
||||
|
||||
@ -1,23 +0,0 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# 仅复制依赖文件,提升缓存命中率
|
||||
COPY backend/requirements.txt ./backend/requirements.txt
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --no-cache-dir -r backend/requirements.txt
|
||||
|
||||
# 运行时通过挂载卷提供源码;这里仅创建目录以便于容器内路径存在
|
||||
RUN mkdir -p /workspace/backend
|
||||
|
||||
WORKDIR /workspace/backend
|
||||
|
||||
# 缺省入口由 docker-compose 提供
|
||||
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
"""
|
||||
Application dependencies and providers
|
||||
"""
|
||||
from app.services.config_manager import ConfigManager
|
||||
|
||||
def get_config_manager() -> ConfigManager:
|
||||
return ConfigManager()
|
||||
@ -1,194 +0,0 @@
|
||||
import yaml
|
||||
import os
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
from numbers import Number
|
||||
from app.data_providers.base import BaseDataProvider
|
||||
from app.data_providers.tushare import TushareProvider
|
||||
# from app.data_providers.ifind import TonghsProvider
|
||||
from app.data_providers.yfinance import YfinanceProvider
|
||||
from app.data_providers.finnhub import FinnhubProvider
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataManager:
|
||||
_instance = None
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not cls._instance:
|
||||
cls._instance = super(DataManager, cls).__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, config_path: str = None):
|
||||
if hasattr(self, '_initialized') and self._initialized:
|
||||
return
|
||||
|
||||
if config_path is None:
|
||||
# Assume the config file is in the 'config' directory at the root of the repo
|
||||
# Find the project root by looking for the config directory
|
||||
current_dir = os.path.dirname(__file__)
|
||||
while current_dir != os.path.dirname(current_dir): # Not at filesystem root
|
||||
if os.path.exists(os.path.join(current_dir, "config", "data_sources.yaml")):
|
||||
REPO_ROOT = current_dir
|
||||
break
|
||||
current_dir = os.path.dirname(current_dir)
|
||||
else:
|
||||
# Fallback to the original calculation
|
||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
|
||||
config_path = os.path.join(REPO_ROOT, "config", "data_sources.yaml")
|
||||
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
self.config = yaml.safe_load(f)
|
||||
|
||||
self.providers = {}
|
||||
|
||||
# Build provider base config ONLY from config/config.json (do not read env vars)
|
||||
base_cfg: Dict[str, Any] = {"data_sources": {}}
|
||||
|
||||
try:
|
||||
# Use the same REPO_ROOT calculation as data_sources.yaml
|
||||
current_dir = os.path.dirname(__file__)
|
||||
while current_dir != os.path.dirname(current_dir): # Not at filesystem root
|
||||
if os.path.exists(os.path.join(current_dir, "config", "data_sources.yaml")):
|
||||
REPO_ROOT = current_dir
|
||||
break
|
||||
current_dir = os.path.dirname(current_dir)
|
||||
else:
|
||||
# Fallback to the original calculation
|
||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
|
||||
cfg_json_path = os.path.join(REPO_ROOT, "config", "config.json")
|
||||
if os.path.exists(cfg_json_path):
|
||||
with open(cfg_json_path, "r", encoding="utf-8") as jf:
|
||||
cfg_json = json.load(jf)
|
||||
ds_from_json = (cfg_json.get("data_sources") or {})
|
||||
for name, node in ds_from_json.items():
|
||||
if node.get("api_key"):
|
||||
base_cfg["data_sources"][name] = {"api_key": node.get("api_key")}
|
||||
logger.info(f"Loaded API key for provider '{name}' from config.json")
|
||||
else:
|
||||
logger.debug("config/config.json not found; skipping JSON token load.")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read tokens from config/config.json: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
try:
|
||||
self._init_providers(base_cfg)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize data providers: {e}")
|
||||
|
||||
self._initialized = True
|
||||
|
||||
def _init_providers(self, base_cfg: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Initializes providers with the given base configuration.
|
||||
This method should be called after the base config is loaded.
|
||||
"""
|
||||
provider_map = {
|
||||
"tushare": TushareProvider,
|
||||
# "ifind": TonghsProvider,
|
||||
"yfinance": YfinanceProvider,
|
||||
"finnhub": FinnhubProvider,
|
||||
}
|
||||
|
||||
for name, provider_class in provider_map.items():
|
||||
token = base_cfg.get("data_sources", {}).get(name, {}).get("api_key")
|
||||
source_config = self.config['data_sources'].get(name, {})
|
||||
|
||||
# Initialize the provider if a token is found or not required
|
||||
if token or not source_config.get('api_key_env'):
|
||||
try:
|
||||
self.providers[name] = provider_class(token=token)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize provider '{name}': {e}")
|
||||
else:
|
||||
logger.warning(f"Provider '{name}' requires API key but none provided in config.json. Skipping.")
|
||||
|
||||
def _detect_market(self, stock_code: str) -> str:
|
||||
if stock_code.endswith(('.SH', '.SZ')):
|
||||
return 'CN'
|
||||
elif stock_code.endswith('.HK'):
|
||||
return 'HK'
|
||||
elif stock_code.endswith('.T'): # Assuming .T for Tokyo
|
||||
return 'JP'
|
||||
else: # Default to US
|
||||
return 'US'
|
||||
|
||||
async def get_data(self, method_name: str, stock_code: str, **kwargs):
|
||||
market = self._detect_market(stock_code)
|
||||
priority_list = self.config.get('markets', {}).get(market, {}).get('priority', [])
|
||||
|
||||
for provider_name in priority_list:
|
||||
provider = self.providers.get(provider_name)
|
||||
if not provider:
|
||||
logger.warning(f"Provider '{provider_name}' not initialized.")
|
||||
continue
|
||||
|
||||
try:
|
||||
method = getattr(provider, method_name)
|
||||
data = await method(stock_code=stock_code, **kwargs)
|
||||
is_success = False
|
||||
if data is None:
|
||||
is_success = False
|
||||
elif isinstance(data, list):
|
||||
is_success = len(data) > 0
|
||||
elif isinstance(data, dict):
|
||||
is_success = len(data) > 0
|
||||
else:
|
||||
is_success = True
|
||||
|
||||
if is_success:
|
||||
logger.info(f"Data successfully fetched from '{provider_name}' for '{stock_code}'.")
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"Provider '{provider_name}' failed for '{stock_code}': {e}. Trying next provider.")
|
||||
|
||||
logger.error(f"All data providers failed for '{stock_code}' on method '{method_name}'.")
|
||||
return None
|
||||
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> Dict[str, List[Dict[str, Any]]]:
|
||||
data = await self.get_data('get_financial_statements', stock_code, report_dates=report_dates)
|
||||
if data is None:
|
||||
return {}
|
||||
|
||||
# Normalize to series format
|
||||
if isinstance(data, dict):
|
||||
# Already in series format (e.g., tushare)
|
||||
return data
|
||||
elif isinstance(data, list):
|
||||
# Convert from flat format to series format
|
||||
series: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for report in data:
|
||||
year = str(report.get('year', report.get('end_date', '')[:4]))
|
||||
if not year:
|
||||
continue
|
||||
for key, value in report.items():
|
||||
if key in ['ts_code', 'stock_code', 'year', 'end_date', 'period', 'ann_date', 'f_ann_date', 'report_type']:
|
||||
continue
|
||||
# Accept numpy/pandas numeric types as well as builtin numbers
|
||||
if value is not None and isinstance(value, Number):
|
||||
if key not in series:
|
||||
series[key] = []
|
||||
if not any(d['year'] == year for d in series[key]):
|
||||
# Store as builtin float to avoid JSON serialization issues
|
||||
try:
|
||||
numeric_value = float(value)
|
||||
except Exception:
|
||||
# Fallback: skip if cannot coerce to float
|
||||
continue
|
||||
series[key].append({"year": year, "value": numeric_value})
|
||||
return series
|
||||
else:
|
||||
return {}
|
||||
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
return await self.get_data('get_daily_price', stock_code, start_date=start_date, end_date=end_date)
|
||||
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
return await self.get_data('get_stock_basic', stock_code)
|
||||
|
||||
data_manager = DataManager()
|
||||
@ -1,88 +0,0 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
class BaseDataProvider(ABC):
|
||||
"""
|
||||
Abstract base class for all financial data providers.
|
||||
"""
|
||||
|
||||
def __init__(self, token: Optional[str] = None):
|
||||
"""
|
||||
Initializes the data provider, optionally with an API token.
|
||||
|
||||
:param token: API token for the data provider, if required.
|
||||
"""
|
||||
self.token = token
|
||||
self._initialize()
|
||||
|
||||
def _initialize(self):
|
||||
"""
|
||||
Perform any necessary initialization, such as API client setup.
|
||||
This method is called by the constructor.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fetches basic company information for a given stock code.
|
||||
|
||||
:param stock_code: The stock identifier.
|
||||
:return: A dictionary with basic company info, or None if not found.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetches daily stock prices for a given period.
|
||||
|
||||
:param stock_code: The stock identifier.
|
||||
:param start_date: The start date of the period (e.g., 'YYYYMMDD').
|
||||
:param end_date: The end date of the period (e.g., 'YYYYMMDD').
|
||||
:return: A list of dictionaries, each representing a day's price data.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Fetches financial statements for a list of report dates and returns them
|
||||
in a series format.
|
||||
|
||||
The series format is a dictionary where keys are metric names (e.g., 'revenue')
|
||||
and values are a list of data points over time.
|
||||
e.g., {"revenue": [{"year": "2023", "value": 1000}, ...]}
|
||||
|
||||
Providers should also calculate derived metrics if they are not directly available.
|
||||
|
||||
:param stock_code: The stock identifier.
|
||||
:param report_dates: A list of report dates to fetch data for (e.g., ['20231231', '20221231']).
|
||||
:return: A dictionary in series format.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def get_financial_statement(self, stock_code: str, report_date: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fetches a single financial statement for a specific report date.
|
||||
This is a convenience method that can be implemented by calling get_financial_statements.
|
||||
|
||||
Note: The return value of this function is a single report (dictionary),
|
||||
not a series object. This is for compatibility with parts of the code
|
||||
that need a single flat report.
|
||||
|
||||
:param stock_code: The stock identifier.
|
||||
:param report_date: The report date for the statement (e.g., '20231231').
|
||||
:return: A dictionary with financial statement data, or None if not found.
|
||||
"""
|
||||
series_data = await self.get_financial_statements(stock_code, [report_date])
|
||||
if not series_data:
|
||||
return None
|
||||
|
||||
report: Dict[str, Any] = {"ts_code": stock_code, "end_date": report_date}
|
||||
for metric, points in series_data.items():
|
||||
for point in points:
|
||||
if point.get("year") == report_date[:4]:
|
||||
report[metric] = point.get("value")
|
||||
break
|
||||
return report
|
||||
@ -1,310 +0,0 @@
|
||||
from .base import BaseDataProvider
|
||||
from typing import Any, Dict, List, Optional
|
||||
import finnhub
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
import asyncio
|
||||
import logging
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FinnhubProvider(BaseDataProvider):
|
||||
|
||||
def _initialize(self):
|
||||
if not self.token:
|
||||
raise ValueError("Finnhub API key not provided.")
|
||||
self.client = finnhub.Client(api_key=self.token)
|
||||
try:
|
||||
masked = f"***{self.token[-4:]}" if isinstance(self.token, str) and len(self.token) >= 4 else "***"
|
||||
logger.info(f"[Finnhub] client initialized (token={masked})")
|
||||
except Exception:
|
||||
# 避免日志失败影响初始化
|
||||
pass
|
||||
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
profile = None
|
||||
try:
|
||||
profile = self.client.company_profile2(symbol=stock_code)
|
||||
logger.debug(f"[Finnhub] SDK company_profile2 ok symbol={stock_code} name={profile.get('name') if isinstance(profile, dict) else None}")
|
||||
except Exception as e:
|
||||
logger.warning(f"[Finnhub] SDK company_profile2 failed for {stock_code}: {e}")
|
||||
# Fallback to direct HTTP if SDK call fails
|
||||
try:
|
||||
resp = httpx.get(
|
||||
'https://finnhub.io/api/v1/stock/profile2',
|
||||
params={'symbol': stock_code},
|
||||
headers={'X-Finnhub-Token': self.token},
|
||||
timeout=20.0,
|
||||
)
|
||||
logger.debug(f"[Finnhub] HTTP profile2 status={resp.status_code} len={len(resp.text)}")
|
||||
if resp.status_code == 200:
|
||||
profile = resp.json()
|
||||
else:
|
||||
logger.error(f"[Finnhub] HTTP profile2 failed status={resp.status_code} body={resp.text[:200]}")
|
||||
except Exception:
|
||||
profile = None
|
||||
if not profile:
|
||||
return None
|
||||
|
||||
# Normalize data
|
||||
return {
|
||||
"ts_code": stock_code,
|
||||
"name": profile.get("name"),
|
||||
"area": profile.get("country"),
|
||||
"industry": profile.get("finnhubIndustry"),
|
||||
"exchange": profile.get("exchange"),
|
||||
"ipo_date": profile.get("ipo"),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Finnhub get_stock_basic failed for {stock_code}: {e}")
|
||||
return None
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
start_ts = int(datetime.strptime(start_date, '%Y%m%d').timestamp())
|
||||
end_ts = int(datetime.strptime(end_date, '%Y%m%d').timestamp())
|
||||
|
||||
logger.debug(f"[Finnhub] stock_candles symbol={stock_code} D {start_date}->{end_date}")
|
||||
res = self.client.stock_candles(stock_code, 'D', start_ts, end_ts)
|
||||
if res.get('s') != 'ok':
|
||||
try:
|
||||
logger.warning(f"[Finnhub] stock_candles not ok symbol={stock_code} status={res.get('s')}")
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
df = pd.DataFrame(res)
|
||||
if df.empty:
|
||||
return []
|
||||
|
||||
# Normalize data
|
||||
df['trade_date'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%Y%m%d')
|
||||
df.rename(columns={
|
||||
'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'vol'
|
||||
}, inplace=True)
|
||||
|
||||
return df[['trade_date', 'open', 'high', 'low', 'close', 'vol']].to_dict('records')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Finnhub get_daily_price failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
# 1) 拉取年度报表(financials_reported, annual)
|
||||
res = None
|
||||
try:
|
||||
res = self.client.financials_reported(symbol=stock_code, freq='annual')
|
||||
except Exception as e:
|
||||
logger.warning(f"[Finnhub] SDK financials_reported failed for {stock_code}: {e}")
|
||||
# Fallback: direct HTTP
|
||||
try:
|
||||
r = httpx.get(
|
||||
'https://finnhub.io/api/v1/stock/financials-reported',
|
||||
params={'symbol': stock_code, 'freq': 'annual'},
|
||||
headers={'X-Finnhub-Token': self.token},
|
||||
timeout=30.0,
|
||||
)
|
||||
logger.debug(f"[Finnhub] HTTP financials-reported status={r.status_code} len={len(r.text)}")
|
||||
if r.status_code == 200:
|
||||
res = r.json()
|
||||
else:
|
||||
logger.error(f"[Finnhub] HTTP financials-reported failed status={r.status_code} body={r.text[:300]}")
|
||||
except Exception:
|
||||
res = None
|
||||
if not res or not res.get('data'):
|
||||
logger.warning(f"[Finnhub] financials-reported empty for {stock_code}")
|
||||
return []
|
||||
|
||||
df = pd.DataFrame(res['data'])
|
||||
if df.empty:
|
||||
logger.warning(f"[Finnhub] financials-reported dataframe empty for {stock_code}")
|
||||
return []
|
||||
|
||||
# 2) 仅保留请求的年份
|
||||
years_to_fetch = {str(date)[:4] for date in report_dates}
|
||||
logger.debug(f"[Finnhub] filter years {sorted(list(years_to_fetch))} before={len(df)}")
|
||||
if 'year' in df.columns:
|
||||
df = df[df['year'].astype(str).isin(years_to_fetch)]
|
||||
# 兜底:如果缺失 year 列,则用 endDate 推断
|
||||
if 'year' not in df.columns and 'endDate' in df.columns:
|
||||
df = df[df['endDate'].astype(str).str[:4].isin(years_to_fetch)]
|
||||
|
||||
if df.empty:
|
||||
logger.warning(f"[Finnhub] financials-reported no rows after filter for {stock_code}")
|
||||
return []
|
||||
|
||||
def _normalize_key(s: Optional[str]) -> str:
|
||||
if not isinstance(s, str):
|
||||
return ""
|
||||
return ''.join(ch.lower() for ch in s if ch.isalnum())
|
||||
|
||||
def pick(report_block: List[Dict[str, Any]], concept_candidates: List[str], label_candidates: List[str] = []) -> Optional[float]:
|
||||
if not report_block:
|
||||
return None
|
||||
try:
|
||||
by_concept = { _normalize_key(item.get('concept')): item.get('value') for item in report_block if isinstance(item, dict) }
|
||||
by_label = { _normalize_key(item.get('label')): item.get('value') for item in report_block if isinstance(item, dict) }
|
||||
except Exception:
|
||||
return None
|
||||
for key in concept_candidates:
|
||||
v = by_concept.get(_normalize_key(key))
|
||||
if v is not None:
|
||||
try:
|
||||
return float(v)
|
||||
except Exception:
|
||||
continue
|
||||
for key in label_candidates:
|
||||
v = by_label.get(_normalize_key(key))
|
||||
if v is not None:
|
||||
try:
|
||||
return float(v)
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
# 3) 遍历年度记录,展开并标准化字段名
|
||||
flat_reports: List[Dict[str, Any]] = []
|
||||
for _, row in df.iterrows():
|
||||
bs = (row.get('report') or {}).get('bs', [])
|
||||
ic = (row.get('report') or {}).get('ic', [])
|
||||
cf = (row.get('report') or {}).get('cf', [])
|
||||
|
||||
end_date = str(row.get('endDate') or '')
|
||||
|
||||
revenue = pick(
|
||||
ic,
|
||||
concept_candidates=['Revenues', 'RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenue', 'RevenuesNet', 'SalesRevenueGoodsNet'],
|
||||
label_candidates=['Total revenue', 'Revenue', 'Sales revenue']
|
||||
)
|
||||
net_income = pick(
|
||||
ic,
|
||||
concept_candidates=['NetIncomeLoss', 'ProfitLoss', 'NetIncomeLossAvailableToCommonStockholdersBasic', 'NetIncomeLossAvailableToCommonStockholdersDiluted'],
|
||||
label_candidates=['Net income', 'Net income (loss)']
|
||||
)
|
||||
gross_profit = pick(
|
||||
ic,
|
||||
concept_candidates=['GrossProfit'],
|
||||
label_candidates=['Gross profit']
|
||||
)
|
||||
|
||||
total_assets = pick(
|
||||
bs,
|
||||
concept_candidates=['Assets', 'AssetsTotal', 'AssetsCurrentAndNoncurrent', 'AssetsIncludingAssetsMeasuredAtFairValue'],
|
||||
label_candidates=['Total assets']
|
||||
)
|
||||
total_equity = pick(
|
||||
bs,
|
||||
concept_candidates=['StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest', 'StockholdersEquity', 'StockholdersEquityTotal', 'Equity'],
|
||||
label_candidates=['Total equity', "Stockholders' equity"]
|
||||
)
|
||||
goodwill = pick(
|
||||
bs,
|
||||
concept_candidates=['Goodwill', 'GoodwillAndIntangibleAssets'],
|
||||
label_candidates=['Goodwill', 'Goodwill and intangible assets']
|
||||
)
|
||||
|
||||
n_cashflow_act = pick(
|
||||
cf,
|
||||
concept_candidates=['NetCashProvidedByUsedInOperatingActivities', 'NetCashProvidedByUsedInOperatingActivitiesContinuingOperations', 'NetCashFlowOperating'],
|
||||
label_candidates=['Net cash provided by operating activities']
|
||||
)
|
||||
capex = pick(
|
||||
cf,
|
||||
concept_candidates=['CapitalExpenditures', 'PaymentsToAcquirePropertyPlantAndEquipment', 'PaymentsToAcquireProductiveAssets'],
|
||||
label_candidates=['Capital expenditures']
|
||||
)
|
||||
|
||||
# 计算衍生指标
|
||||
free_cash_flow = None
|
||||
if isinstance(n_cashflow_act, (int, float)) and isinstance(capex, (int, float)):
|
||||
free_cash_flow = n_cashflow_act - capex
|
||||
|
||||
normalized = {
|
||||
# 基本元字段
|
||||
'ts_code': stock_code,
|
||||
'end_date': end_date, # DataManager 会从这里抽取 year
|
||||
|
||||
# 标准命名(见 financial_data_dictionary)
|
||||
'revenue': revenue,
|
||||
'n_income': net_income,
|
||||
'gross_profit': gross_profit,
|
||||
|
||||
'total_assets': total_assets,
|
||||
'total_hldr_eqy_exc_min_int': total_equity,
|
||||
'goodwill': goodwill,
|
||||
|
||||
'n_cashflow_act': n_cashflow_act,
|
||||
'c_pay_acq_const_fiolta': capex,
|
||||
'__free_cash_flow': free_cash_flow,
|
||||
}
|
||||
|
||||
# 一些常用比率(若有足够数据则计算),命名对齐文档
|
||||
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(gross_profit, (int, float)):
|
||||
normalized['grossprofit_margin'] = gross_profit / revenue
|
||||
if isinstance(revenue, (int, float)) and revenue > 0 and isinstance(net_income, (int, float)):
|
||||
normalized['netprofit_margin'] = net_income / revenue
|
||||
if isinstance(total_assets, (int, float)) and total_assets > 0 and isinstance(net_income, (int, float)):
|
||||
normalized['roa'] = net_income / total_assets
|
||||
if isinstance(total_equity, (int, float)) and total_equity > 0 and isinstance(net_income, (int, float)):
|
||||
normalized['roe'] = net_income / total_equity
|
||||
|
||||
flat_reports.append(normalized)
|
||||
try:
|
||||
logger.debug(
|
||||
f"[Finnhub] row endDate={end_date} revenue={revenue} net_income={net_income} gross_profit={gross_profit} "
|
||||
f"assets={total_assets} equity={total_equity} goodwill={goodwill} n_cfo={n_cashflow_act} capex={capex}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Convert flat reports to series dict directly to match DataManager expected format
|
||||
series: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for report in flat_reports:
|
||||
end_date = str(report.get('end_date') or '')
|
||||
year = end_date[:4] if len(end_date) >= 4 else None
|
||||
if not year:
|
||||
continue
|
||||
period = f"{year}1231"
|
||||
|
||||
for key, value in report.items():
|
||||
if key in ['ts_code', 'end_date']:
|
||||
continue
|
||||
# Only collect numeric values
|
||||
try:
|
||||
if value is None:
|
||||
continue
|
||||
num = float(value)
|
||||
except Exception:
|
||||
continue
|
||||
if key not in series:
|
||||
series[key] = []
|
||||
# Avoid duplicate period entries
|
||||
exists = any(dp.get('period') == period for dp in series[key])
|
||||
if not exists:
|
||||
series[key].append({'period': period, 'value': num})
|
||||
|
||||
try:
|
||||
total_points = sum(len(v) for v in series.values())
|
||||
logger.info(f"[Finnhub] built series for {stock_code} keys={len(series)} points={total_points}")
|
||||
except Exception:
|
||||
pass
|
||||
return series
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Finnhub get_financial_statements failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
@ -1,131 +0,0 @@
|
||||
from .base import BaseDataProvider
|
||||
from typing import Any, Dict, List, Optional
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
# 假设 iFinDPy 库已安装在环境中
|
||||
# 重要提示: 用户需要根据官方文档手动安装 iFinDPy
|
||||
try:
|
||||
from iFinDPy import THS_iFinDLogin, THS_BD, THS_HQ
|
||||
except ImportError:
|
||||
print("错误: iFinDPy 模块未找到。请确保已按照同花顺官方指引完成安装。")
|
||||
# 定义虚拟函数以避免在未安装时程序崩溃
|
||||
def THS_iFinDLogin(*args, **kwargs): return -1
|
||||
def THS_BD(*args, **kwargs): return pd.DataFrame()
|
||||
def THS_HQ(*args, **kwargs): return pd.DataFrame()
|
||||
|
||||
|
||||
class TonghsProvider(BaseDataProvider):
|
||||
_is_logged_in = False
|
||||
|
||||
def __init__(self, token: Optional[str] = None):
|
||||
# 使用从 iFinD 用户中心获取的 Refresh Token 进行登录
|
||||
if not TonghsProvider._is_logged_in:
|
||||
if not token:
|
||||
raise ValueError("同花顺 iFinDPy Refresh Token 未在配置中提供。")
|
||||
|
||||
# 调用登录函数,直接传入 token
|
||||
# 注意: 具体的关键字参数名可能需要根据 iFinDPy 的实际文档确认,这里假设为 'token' 或直接作为第一个参数
|
||||
login_result = THS_iFinDLogin(token)
|
||||
|
||||
if login_result == 0:
|
||||
print("同花顺 iFinDPy 登录成功。")
|
||||
TonghsProvider._is_logged_in = True
|
||||
else:
|
||||
print(f"同花顺 iFinDPy 登录失败,错误码: {login_result}")
|
||||
raise ConnectionError("无法登录到同花顺 iFinDPy 服务,请检查您的 Refresh Token 是否正确。")
|
||||
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
# TODO: 请用户确认用于获取公司基本信息的指标 (indicators)
|
||||
indicators = "ths_stock_short_name_stock;ths_listed_market_stock;ths_industry_stock;ths_ipo_date_stock"
|
||||
data = THS_BD(stock_code, indicators, "")
|
||||
|
||||
if data.empty:
|
||||
return None
|
||||
|
||||
# --- 数据归一化 ---
|
||||
# iFinDPy 返回的数据通常是 DataFrame,我们需要将其转换为字典
|
||||
info = data.iloc[0].to_dict()
|
||||
|
||||
return {
|
||||
"ts_code": stock_code,
|
||||
"name": info.get("ths_stock_short_name_stock"),
|
||||
"area": info.get("ths_listed_market_stock"),
|
||||
"industry": info.get("ths_industry_stock"),
|
||||
"list_date": info.get("ths_ipo_date_stock"),
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"同花顺 iFinDPy get_stock_basic 执行失败, 股票代码 {stock_code}: {e}")
|
||||
return None
|
||||
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
# TODO: 请用户确认用于获取日线行情的指标
|
||||
indicators = "open;high;low;close;volume"
|
||||
# iFinDPy 的日期格式通常是 YYYY-MM-DD
|
||||
date_range = f"{start_date};{end_date}"
|
||||
|
||||
data = THS_HQ(stock_code, indicators, date_range)
|
||||
|
||||
if data.empty:
|
||||
return []
|
||||
|
||||
# --- 数据归一化 ---
|
||||
data = data.reset_index()
|
||||
data.rename(columns={
|
||||
"time": "trade_date",
|
||||
"open": "open",
|
||||
"high": "high",
|
||||
"low": "low",
|
||||
"close": "close",
|
||||
"volume": "vol"
|
||||
}, inplace=True)
|
||||
|
||||
return data.to_dict('records')
|
||||
except Exception as e:
|
||||
print(f"同花顺 iFinDPy get_daily_price 执行失败, 股票代码 {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
# TODO: 请用户确认获取财务报表的指标
|
||||
# 这可能需要多次调用 THS_BD 并合并结果
|
||||
|
||||
# 示例:一次性获取多个报告期的数据
|
||||
# 将 report_dates 转换为 iFinDPy 接受的格式,例如 "2022-12-31;2021-12-31"
|
||||
dates_param = ";".join(report_dates)
|
||||
|
||||
# 需要的指标
|
||||
income_indicators = "ths_np_stock" # 净利润
|
||||
bs_indicators = "ths_total_assets_stock;ths_total_liab_stock" # 总资产;总负债
|
||||
revenue_indicators = "ths_revenue_stock" # 营业收入
|
||||
|
||||
# 获取数据
|
||||
income_data = THS_BD(stock_code, income_indicators, f"reportDate={dates_param}")
|
||||
bs_data = THS_BD(stock_code, bs_indicators, f"reportDate={dates_param}")
|
||||
revenue_data = THS_BD(stock_code, revenue_indicators, f"reportDate={dates_param}")
|
||||
|
||||
# 合并数据
|
||||
financials_df = pd.concat([income_data, bs_data, revenue_data], axis=1)
|
||||
financials_df = financials_df.loc[:,~financials_df.columns.duplicated()]
|
||||
financials_df = financials_df.reset_index().rename(columns={"index": "end_date"})
|
||||
|
||||
# --- 数据归一化 ---
|
||||
financials_df.rename(columns={
|
||||
"ths_revenue_stock": "revenue",
|
||||
"ths_np_stock": "net_income",
|
||||
"ths_total_assets_stock": "total_assets",
|
||||
"ths_total_liab_stock": "total_liabilities",
|
||||
}, inplace=True)
|
||||
|
||||
financials_df["ts_code"] = stock_code
|
||||
|
||||
return financials_df.to_dict('records')
|
||||
except Exception as e:
|
||||
print(f"同花顺 iFinDPy get_financial_statements 执行失败, 股票代码 {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
async def get_financial_statement(self, stock_code: str, report_date: str) -> Optional[Dict[str, Any]]:
|
||||
results = await self.get_financial_statements(stock_code, [report_date])
|
||||
return results[0] if results else None
|
||||
@ -1,705 +0,0 @@
|
||||
from .base import BaseDataProvider
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
import logging
|
||||
import asyncio
|
||||
import tushare as ts
|
||||
import math
|
||||
import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TushareProvider(BaseDataProvider):
|
||||
|
||||
def _initialize(self):
|
||||
if not self.token:
|
||||
raise ValueError("Tushare API token not provided.")
|
||||
# 使用官方 SDK 客户端
|
||||
self._pro = ts.pro_api(self.token)
|
||||
# 交易日历缓存:key=(exchange, start, end) -> List[Dict]
|
||||
self._trade_cal_cache: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
async def _resolve_trade_dates(self, dates: List[str], exchange: str = "SSE") -> Dict[str, str]:
|
||||
"""
|
||||
将任意日期映射为“该日若非交易日,则取不晚于该日的最近一个交易日”。
|
||||
返回映射:requested_date -> resolved_trade_date。
|
||||
"""
|
||||
if not dates:
|
||||
return {}
|
||||
start_date = min(dates)
|
||||
end_date = max(dates)
|
||||
cache_key = f"{exchange}:{start_date}:{end_date}"
|
||||
|
||||
if cache_key in self._trade_cal_cache:
|
||||
cal_rows = self._trade_cal_cache[cache_key]
|
||||
else:
|
||||
cal_rows = await self._query(
|
||||
api_name="trade_cal",
|
||||
params={
|
||||
"exchange": exchange,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
fields=["cal_date", "is_open", "pretrade_date"],
|
||||
)
|
||||
self._trade_cal_cache[cache_key] = cal_rows
|
||||
|
||||
by_date: Dict[str, Dict[str, Any]] = {str(r.get("cal_date")): r for r in cal_rows}
|
||||
# 同时准备已开放的交易日期序列,便于兜底搜索
|
||||
open_dates = sorted([d for d, r in by_date.items() if int(r.get("is_open", 0)) == 1])
|
||||
|
||||
def _prev_open(d: str) -> Optional[str]:
|
||||
# 找到 <= d 的最大开市日
|
||||
lo, hi = 0, len(open_dates) - 1
|
||||
ans = None
|
||||
while lo <= hi:
|
||||
mid = (lo + hi) // 2
|
||||
if open_dates[mid] <= d:
|
||||
ans = open_dates[mid]
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid - 1
|
||||
return ans
|
||||
|
||||
resolved: Dict[str, str] = {}
|
||||
for d in dates:
|
||||
row = by_date.get(d)
|
||||
if row is None:
|
||||
# 不在本段日历(极少数情况),做一次兜底:使用区间内最近开市日
|
||||
prev_d = _prev_open(d)
|
||||
if prev_d:
|
||||
resolved[d] = prev_d
|
||||
else:
|
||||
# 最后兜底,仍找不到则原样返回
|
||||
resolved[d] = d
|
||||
continue
|
||||
is_open = int(row.get("is_open", 0))
|
||||
if is_open == 1:
|
||||
resolved[d] = d
|
||||
else:
|
||||
prev = str(row.get("pretrade_date") or "")
|
||||
if prev:
|
||||
resolved[d] = prev
|
||||
else:
|
||||
prev_d = _prev_open(d)
|
||||
resolved[d] = prev_d or d
|
||||
return resolved
|
||||
|
||||
async def _query(
|
||||
self,
|
||||
api_name: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
fields: Optional[List[str]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
使用官方 tushare SDK 统一查询,返回字典列表。
|
||||
为避免阻塞事件循环,内部通过 asyncio.to_thread 在线程中执行同步调用。
|
||||
"""
|
||||
params = params or {}
|
||||
|
||||
def _call() -> List[Dict[str, Any]]:
|
||||
# 将字段列表转换为逗号分隔的字符串(SDK 推荐方式)
|
||||
fields_arg: Optional[str] = ",".join(fields) if isinstance(fields, list) else None
|
||||
|
||||
# 优先使用属性方式(pro.fina_indicator 等);若不存在则回退到通用 query
|
||||
func: Optional[Callable] = getattr(self._pro, api_name, None)
|
||||
try:
|
||||
if callable(func):
|
||||
df = func(**params, fields=fields_arg) if fields_arg else func(**params)
|
||||
else:
|
||||
# 通用回退:pro.query(name, params=..., fields=...)
|
||||
if fields_arg:
|
||||
df = self._pro.query(api_name, params=params, fields=fields_arg)
|
||||
else:
|
||||
df = self._pro.query(api_name, params=params)
|
||||
except Exception as exc:
|
||||
# 将 SDK 抛出的异常包装为统一日志
|
||||
raise RuntimeError(f"tushare.{api_name} failed: {exc}")
|
||||
|
||||
if df is None or df.empty:
|
||||
return []
|
||||
# DataFrame -> List[Dict]
|
||||
return df.to_dict(orient="records")
|
||||
|
||||
try:
|
||||
rows: List[Dict[str, Any]] = await asyncio.to_thread(_call)
|
||||
# 清洗 NaN/Inf,避免 JSON 序列化错误
|
||||
DATE_KEYS = {
|
||||
"cal_date", "pretrade_date", "trade_date", "trade_dt", "date",
|
||||
"end_date", "ann_date", "f_ann_date", "period"
|
||||
}
|
||||
|
||||
def _sanitize_value(key: str, v: Any) -> Any:
|
||||
if v is None:
|
||||
return None
|
||||
# 保持日期/期末字段为字符串(避免 20231231 -> 20231231.0 导致匹配失败)
|
||||
if key in DATE_KEYS:
|
||||
try:
|
||||
s = str(v)
|
||||
# 去除意外的小数点形式
|
||||
if s.endswith(".0"):
|
||||
s = s[:-2]
|
||||
return s
|
||||
except Exception:
|
||||
return str(v)
|
||||
try:
|
||||
# 处理 numpy.nan / numpy.inf / Decimal / numpy 数值等,统一为 Python float
|
||||
fv = float(v)
|
||||
return fv if math.isfinite(fv) else None
|
||||
except Exception:
|
||||
# 利用自反性判断 NaN(NaN != NaN)
|
||||
try:
|
||||
if v != v:
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
return v
|
||||
|
||||
for row in rows:
|
||||
for k, v in list(row.items()):
|
||||
row[k] = _sanitize_value(k, v)
|
||||
# logger.info(f"Tushare '{api_name}' returned {len(rows)} rows.")
|
||||
return rows
|
||||
except Exception as e:
|
||||
logger.error(f"Exception calling tushare '{api_name}': {e}")
|
||||
raise
|
||||
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
rows = await self._query(
|
||||
api_name="stock_basic",
|
||||
params={"ts_code": stock_code},
|
||||
)
|
||||
return rows[0] if rows else None
|
||||
except Exception as e:
|
||||
logger.error(f"Tushare get_stock_basic failed for {stock_code}: {e}")
|
||||
return None
|
||||
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
rows = await self._query(
|
||||
api_name="daily",
|
||||
params={
|
||||
"ts_code": stock_code,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
return rows or []
|
||||
except Exception as e:
|
||||
logger.error(f"Tushare get_daily_price failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
async def get_daily_basic_points(self, stock_code: str, trade_dates: List[str]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取指定交易日列表的 daily_basic 数据(例如 total_mv、pe、pb)。
|
||||
"""
|
||||
try:
|
||||
if not trade_dates:
|
||||
return []
|
||||
# 将请求日期映射到不晚于该日的最近交易日
|
||||
mapping = await self._resolve_trade_dates(trade_dates, exchange="SSE")
|
||||
resolved_dates = list(set(mapping.values()))
|
||||
start_date = min(resolved_dates)
|
||||
end_date = max(resolved_dates)
|
||||
# 一次性取区间内数据,再按解析后的交易日过滤
|
||||
all_rows = await self._query(
|
||||
api_name="daily_basic",
|
||||
params={
|
||||
"ts_code": stock_code,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
wanted = set(resolved_dates)
|
||||
rows = [r for r in all_rows if str(r.get("trade_date")) in wanted]
|
||||
logger.info(f"Tushare daily_basic returned {len(rows)} rows for {stock_code} on {len(trade_dates)} requested dates (resolved to {len(wanted)} trading dates)")
|
||||
return rows
|
||||
except Exception as e:
|
||||
logger.error(f"Tushare get_daily_basic_points failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
async def get_daily_points(self, stock_code: str, trade_dates: List[str]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取指定交易日列表的日行情(例如 close)。
|
||||
"""
|
||||
try:
|
||||
if not trade_dates:
|
||||
return []
|
||||
mapping = await self._resolve_trade_dates(trade_dates, exchange="SSE")
|
||||
resolved_dates = list(set(mapping.values()))
|
||||
start_date = min(resolved_dates)
|
||||
end_date = max(resolved_dates)
|
||||
all_rows = await self._query(
|
||||
api_name="daily",
|
||||
params={
|
||||
"ts_code": stock_code,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
wanted = set(resolved_dates)
|
||||
rows = [r for r in all_rows if str(r.get("trade_date")) in wanted]
|
||||
logger.info(f"Tushare daily returned {len(rows)} rows for {stock_code} on {len(trade_dates)} requested dates (resolved to {len(wanted)} trading dates)")
|
||||
return rows
|
||||
except Exception as e:
|
||||
logger.error(f"Tushare get_daily_points failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
def _calculate_derived_metrics(self, series: Dict[str, List[Dict]], periods: List[str]) -> Dict[str, List[Dict]]:
|
||||
"""
|
||||
在 Tushare provider 内部计算派生指标。
|
||||
"""
|
||||
# --- Helper Functions ---
|
||||
def _get_value(key: str, period: str) -> Optional[float]:
|
||||
if key not in series:
|
||||
return None
|
||||
point = next((p for p in series[key] if p.get("period") == period), None)
|
||||
if point is None or point.get("value") is None:
|
||||
return None
|
||||
try:
|
||||
return float(point["value"])
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def _get_avg_value(key: str, period: str) -> Optional[float]:
|
||||
current_val = _get_value(key, period)
|
||||
try:
|
||||
# 总是和上一年度的年报值(如果存在)进行平均
|
||||
current_year = int(period[:4])
|
||||
prev_year_end_period = str(current_year - 1) + "1231"
|
||||
prev_val = _get_value(key, prev_year_end_period)
|
||||
except (ValueError, TypeError):
|
||||
prev_val = None
|
||||
if current_val is None: return None
|
||||
if prev_val is None: return current_val
|
||||
return (current_val + prev_val) / 2
|
||||
|
||||
def _get_cogs(period: str) -> Optional[float]:
|
||||
revenue = _get_value('revenue', period)
|
||||
gp_margin_raw = _get_value('grossprofit_margin', period)
|
||||
if revenue is None or gp_margin_raw is None: return None
|
||||
gp_margin = gp_margin_raw / 100.0 if abs(gp_margin_raw) > 1 else gp_margin_raw
|
||||
return revenue * (1 - gp_margin)
|
||||
|
||||
def add_series(key: str, data: List[Dict]):
|
||||
if data:
|
||||
series[key] = data
|
||||
|
||||
# --- Calculations ---
|
||||
fcf_data = []
|
||||
for period in periods:
|
||||
op_cashflow = _get_value('n_cashflow_act', period)
|
||||
capex = _get_value('c_pay_acq_const_fiolta', period)
|
||||
if op_cashflow is not None and capex is not None:
|
||||
fcf_data.append({"period": period, "value": op_cashflow - capex})
|
||||
add_series('__free_cash_flow', fcf_data)
|
||||
|
||||
fee_calcs = [
|
||||
('__sell_rate', 'sell_exp', 'revenue'),
|
||||
('__admin_rate', 'admin_exp', 'revenue'),
|
||||
('__rd_rate', 'rd_exp', 'revenue'),
|
||||
('__depr_ratio', 'depr_fa_coga_dpba', 'revenue'),
|
||||
]
|
||||
for key, num_key, den_key in fee_calcs:
|
||||
data = []
|
||||
for period in periods:
|
||||
numerator = _get_value(num_key, period)
|
||||
denominator = _get_value(den_key, period)
|
||||
if numerator is not None and denominator is not None and denominator != 0:
|
||||
data.append({"period": period, "value": (numerator / denominator) * 100})
|
||||
add_series(key, data)
|
||||
|
||||
tax_rate_data = []
|
||||
for period in periods:
|
||||
tax_to_ebt = _get_value('tax_to_ebt', period)
|
||||
if tax_to_ebt is not None:
|
||||
rate = tax_to_ebt * 100 if abs(tax_to_ebt) <= 1 else tax_to_ebt
|
||||
tax_rate_data.append({"period": period, "value": rate})
|
||||
add_series('__tax_rate', tax_rate_data)
|
||||
|
||||
other_fee_data = []
|
||||
for period in periods:
|
||||
gp_raw = _get_value('grossprofit_margin', period)
|
||||
np_raw = _get_value('netprofit_margin', period)
|
||||
rev = _get_value('revenue', period)
|
||||
sell_exp = _get_value('sell_exp', period)
|
||||
admin_exp = _get_value('admin_exp', period)
|
||||
rd_exp = _get_value('rd_exp', period)
|
||||
if all(v is not None for v in [gp_raw, np_raw, rev, sell_exp, admin_exp, rd_exp]) and rev != 0:
|
||||
gp = gp_raw / 100 if abs(gp_raw) > 1 else gp_raw
|
||||
np = np_raw / 100 if abs(np_raw) > 1 else np_raw
|
||||
sell_rate = sell_exp / rev
|
||||
admin_rate = admin_exp / rev
|
||||
rd_rate = rd_exp / rev
|
||||
other_rate = (gp - np - sell_rate - admin_rate - rd_rate) * 100
|
||||
other_fee_data.append({"period": period, "value": other_rate})
|
||||
add_series('__other_fee_rate', other_fee_data)
|
||||
|
||||
asset_ratio_keys = [
|
||||
('__money_cap_ratio', 'money_cap'), ('__inventories_ratio', 'inventories'),
|
||||
('__ar_ratio', 'accounts_receiv_bill'), ('__prepay_ratio', 'prepayment'),
|
||||
('__fix_assets_ratio', 'fix_assets'), ('__lt_invest_ratio', 'lt_eqt_invest'),
|
||||
('__goodwill_ratio', 'goodwill'), ('__ap_ratio', 'accounts_pay'),
|
||||
('__st_borr_ratio', 'st_borr'), ('__lt_borr_ratio', 'lt_borr'),
|
||||
]
|
||||
for key, num_key in asset_ratio_keys:
|
||||
data = []
|
||||
for period in periods:
|
||||
numerator = _get_value(num_key, period)
|
||||
denominator = _get_value('total_assets', period)
|
||||
if numerator is not None and denominator is not None and denominator != 0:
|
||||
data.append({"period": period, "value": (numerator / denominator) * 100})
|
||||
add_series(key, data)
|
||||
|
||||
adv_data = []
|
||||
for period in periods:
|
||||
adv = _get_value('adv_receipts', period) or 0
|
||||
contract = _get_value('contract_liab', period) or 0
|
||||
total_assets = _get_value('total_assets', period)
|
||||
if total_assets is not None and total_assets != 0:
|
||||
adv_data.append({"period": period, "value": ((adv + contract) / total_assets) * 100})
|
||||
add_series('__adv_ratio', adv_data)
|
||||
|
||||
other_assets_data = []
|
||||
known_assets_keys = ['money_cap', 'inventories', 'accounts_receiv_bill', 'prepayment', 'fix_assets', 'lt_eqt_invest', 'goodwill']
|
||||
for period in periods:
|
||||
total_assets = _get_value('total_assets', period)
|
||||
if total_assets is not None and total_assets != 0:
|
||||
sum_known = sum(_get_value(k, period) or 0 for k in known_assets_keys)
|
||||
other_assets_data.append({"period": period, "value": ((total_assets - sum_known) / total_assets) * 100})
|
||||
add_series('__other_assets_ratio', other_assets_data)
|
||||
|
||||
op_assets_data = []
|
||||
for period in periods:
|
||||
total_assets = _get_value('total_assets', period)
|
||||
if total_assets is not None and total_assets != 0:
|
||||
inv = _get_value('inventories', period) or 0
|
||||
ar = _get_value('accounts_receiv_bill', period) or 0
|
||||
pre = _get_value('prepayment', period) or 0
|
||||
ap = _get_value('accounts_pay', period) or 0
|
||||
adv = _get_value('adv_receipts', period) or 0
|
||||
contract_liab = _get_value('contract_liab', period) or 0
|
||||
operating_assets = inv + ar + pre - ap - adv - contract_liab
|
||||
op_assets_data.append({"period": period, "value": (operating_assets / total_assets) * 100})
|
||||
add_series('__operating_assets_ratio', op_assets_data)
|
||||
|
||||
debt_ratio_data = []
|
||||
for period in periods:
|
||||
total_assets = _get_value('total_assets', period)
|
||||
if total_assets is not None and total_assets != 0:
|
||||
st_borr = _get_value('st_borr', period) or 0
|
||||
lt_borr = _get_value('lt_borr', period) or 0
|
||||
debt_ratio_data.append({"period": period, "value": ((st_borr + lt_borr) / total_assets) * 100})
|
||||
add_series('__interest_bearing_debt_ratio', debt_ratio_data)
|
||||
|
||||
payturn_data = []
|
||||
for period in periods:
|
||||
avg_ap = _get_avg_value('accounts_pay', period)
|
||||
cogs = _get_cogs(period)
|
||||
if avg_ap is not None and cogs is not None and cogs != 0:
|
||||
payturn_data.append({"period": period, "value": (365 * avg_ap) / cogs})
|
||||
add_series('payturn_days', payturn_data)
|
||||
|
||||
per_capita_calcs = [
|
||||
('__rev_per_emp', 'revenue', 10000),
|
||||
('__profit_per_emp', 'n_income', 10000),
|
||||
('__salary_per_emp', 'c_paid_to_for_empl', 10000),
|
||||
]
|
||||
for key, num_key, divisor in per_capita_calcs:
|
||||
data = []
|
||||
for period in periods:
|
||||
numerator = _get_value(num_key, period)
|
||||
employees = _get_value('employees', period)
|
||||
if numerator is not None and employees is not None and employees != 0:
|
||||
data.append({"period": period, "value": (numerator / employees) / divisor})
|
||||
add_series(key, data)
|
||||
|
||||
return series
|
||||
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: Optional[List[str]] = None) -> Dict[str, List[Dict[str, Any]]]:
|
||||
# 1) 一次性拉取所需四表(尽量齐全字段),再按指定 report_dates 过滤
|
||||
# 字段列表基于官方示例,避免超量请求可按需精简
|
||||
bs_fields = [
|
||||
"ts_code","ann_date","f_ann_date","end_date","report_type","comp_type","end_type",
|
||||
"money_cap","inventories","prepayment","accounts_receiv","accounts_receiv_bill","goodwill",
|
||||
"lt_eqt_invest","fix_assets","total_assets","accounts_pay","adv_receipts","contract_liab",
|
||||
"st_borr","lt_borr","total_cur_assets","total_cur_liab","total_ncl","total_liab","total_hldr_eqy_exc_min_int",
|
||||
]
|
||||
ic_fields = [
|
||||
"ts_code","ann_date","f_ann_date","end_date","report_type","comp_type","end_type",
|
||||
"total_revenue","revenue","sell_exp","admin_exp","rd_exp","operate_profit","total_profit",
|
||||
"income_tax","n_income","n_income_attr_p","ebit","ebitda","netprofit_margin","grossprofit_margin",
|
||||
]
|
||||
cf_fields = [
|
||||
"ts_code","ann_date","f_ann_date","end_date","comp_type","report_type","end_type",
|
||||
"n_cashflow_act","c_pay_acq_const_fiolta","c_paid_to_for_empl","depr_fa_coga_dpba",
|
||||
]
|
||||
fi_fields = [
|
||||
"ts_code","end_date","ann_date","grossprofit_margin","netprofit_margin","tax_to_ebt","roe","roa","roic",
|
||||
"invturn_days","arturn_days","fa_turn","tr_yoy","dt_netprofit_yoy","assets_turn",
|
||||
]
|
||||
|
||||
try:
|
||||
bs_rows, ic_rows, cf_rows, fi_rows, rep_rows, div_rows, holder_rows, company_rows = await asyncio.gather(
|
||||
self._query("balancesheet", params={"ts_code": stock_code, "report_type": 1}, fields=bs_fields),
|
||||
self._query("income", params={"ts_code": stock_code, "report_type": 1}, fields=ic_fields),
|
||||
self._query("cashflow", params={"ts_code": stock_code, "report_type": 1}, fields=cf_fields),
|
||||
self._query("fina_indicator", params={"ts_code": stock_code}, fields=fi_fields),
|
||||
# 回购公告
|
||||
self._query(
|
||||
"repurchase",
|
||||
params={"ts_code": stock_code},
|
||||
fields=[
|
||||
"ts_code","ann_date","end_date","proc","exp_date","vol","amount","high_limit","low_limit",
|
||||
],
|
||||
),
|
||||
# 分红公告(仅取必要字段)
|
||||
self._query(
|
||||
"dividend",
|
||||
params={"ts_code": stock_code},
|
||||
fields=[
|
||||
"ts_code","end_date","cash_div_tax","pay_date","base_share",
|
||||
],
|
||||
),
|
||||
# 股东户数(按报告期)
|
||||
self._query(
|
||||
"stk_holdernumber",
|
||||
params={"ts_code": stock_code},
|
||||
fields=[
|
||||
"ts_code","ann_date","end_date","holder_num",
|
||||
],
|
||||
),
|
||||
# 公司基本信息(包含员工数)
|
||||
self._query(
|
||||
"stock_company",
|
||||
params={"ts_code": stock_code},
|
||||
fields=[
|
||||
"ts_code","employees",
|
||||
],
|
||||
),
|
||||
)
|
||||
try:
|
||||
logger.info(f"[Dividend] fetched {len(div_rows)} rows for {stock_code}")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Tushare bulk fetch failed for {stock_code}: {e}")
|
||||
bs_rows, ic_rows, cf_rows, fi_rows, rep_rows, div_rows, holder_rows, company_rows = [], [], [], [], [], [], [], []
|
||||
|
||||
# 2) 以 end_date 聚合合并四表
|
||||
by_date: Dict[str, Dict[str, Any]] = {}
|
||||
def _merge_rows(rows: List[Dict[str, Any]]):
|
||||
for r in rows or []:
|
||||
end_date = str(r.get("end_date") or r.get("period") or "")
|
||||
if not end_date:
|
||||
continue
|
||||
if end_date not in by_date:
|
||||
by_date[end_date] = {"ts_code": stock_code, "end_date": end_date}
|
||||
by_date[end_date].update(r)
|
||||
|
||||
_merge_rows(bs_rows)
|
||||
_merge_rows(ic_rows)
|
||||
_merge_rows(cf_rows)
|
||||
_merge_rows(fi_rows)
|
||||
|
||||
# 3) 筛选报告期:今年的最新报告期 + 往年所有年报
|
||||
current_year = str(datetime.date.today().year)
|
||||
all_available_dates = sorted(by_date.keys(), reverse=True)
|
||||
|
||||
latest_current_year_report = None
|
||||
for d in all_available_dates:
|
||||
if d.startswith(current_year):
|
||||
latest_current_year_report = d
|
||||
break
|
||||
|
||||
previous_years_annual_reports = [
|
||||
d for d in all_available_dates if d.endswith("1231") and not d.startswith(current_year)
|
||||
]
|
||||
|
||||
wanted_dates = []
|
||||
if latest_current_year_report:
|
||||
wanted_dates.append(latest_current_year_report)
|
||||
wanted_dates.extend(previous_years_annual_reports)
|
||||
|
||||
all_statements = [by_date[d] for d in wanted_dates if d in by_date]
|
||||
|
||||
logger.info(f"Successfully prepared {len(all_statements)} merged statement(s) for {stock_code} from {len(by_date)} available reports.")
|
||||
|
||||
# Transform to series format
|
||||
series: Dict[str, List[Dict]] = {}
|
||||
if all_statements:
|
||||
for report in all_statements:
|
||||
period = report.get("end_date", "")
|
||||
if not period: continue
|
||||
for key, value in report.items():
|
||||
if key in ['ts_code', 'end_date', 'ann_date', 'f_ann_date', 'report_type', 'comp_type', 'end_type', 'update_flag', 'period']:
|
||||
continue
|
||||
# 仅保留可转为有限 float 的数值,避免 JSON 序列化错误
|
||||
try:
|
||||
fv = float(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if value is not None and math.isfinite(fv):
|
||||
if key not in series:
|
||||
series[key] = []
|
||||
if not any(d['period'] == period for d in series[key]):
|
||||
series[key].append({"period": period, "value": fv})
|
||||
|
||||
# 汇总回购信息为年度序列:按报告期 end_date 年份分组;
|
||||
# 其中 repurchase_amount 取该年内“最后一个 ann_date”的 amount 值。
|
||||
if 'rep_rows' in locals() and rep_rows:
|
||||
rep_by_year: Dict[str, Dict[str, Any]] = {}
|
||||
for r in rep_rows:
|
||||
endd = str(r.get("end_date") or r.get("ann_date") or "")
|
||||
if not endd:
|
||||
continue
|
||||
y = endd[:4]
|
||||
bucket = rep_by_year.setdefault(y, {
|
||||
"amount_sum": 0.0,
|
||||
"vol": 0.0,
|
||||
"high_limit": None,
|
||||
"low_limit": None,
|
||||
"last_ann_date": None,
|
||||
"amount_last": None,
|
||||
})
|
||||
amt = r.get("amount")
|
||||
vol = r.get("vol")
|
||||
hi = r.get("high_limit")
|
||||
lo = r.get("low_limit")
|
||||
ann = str(r.get("ann_date") or "")
|
||||
if isinstance(amt, (int, float)) and amt is not None:
|
||||
bucket["amount_sum"] += float(amt)
|
||||
if ann and ann[:4] == y:
|
||||
last = bucket["last_ann_date"]
|
||||
if last is None or ann > last:
|
||||
bucket["last_ann_date"] = ann
|
||||
bucket["amount_last"] = float(amt)
|
||||
if isinstance(vol, (int, float)) and vol is not None:
|
||||
bucket["vol"] += float(vol)
|
||||
if isinstance(hi, (int, float)) and hi is not None:
|
||||
bucket["high_limit"] = float(hi)
|
||||
if isinstance(lo, (int, float)) and lo is not None:
|
||||
bucket["low_limit"] = float(lo)
|
||||
|
||||
|
||||
if rep_by_year:
|
||||
amt_series = []
|
||||
vol_series = []
|
||||
hi_series = []
|
||||
lo_series = []
|
||||
for y, v in rep_by_year.items():
|
||||
# 当年数据放在当前年最新报告期,否则放在年度报告期
|
||||
if y == current_year and latest_current_year_report:
|
||||
period_key = latest_current_year_report
|
||||
else:
|
||||
period_key = f"{y}1231"
|
||||
|
||||
if v.get("amount_last") is not None:
|
||||
amt_series.append({"period": period_key, "value": v["amount_last"]})
|
||||
if v.get("vol"):
|
||||
vol_series.append({"period": period_key, "value": v["vol"]})
|
||||
if v.get("high_limit") is not None:
|
||||
hi_series.append({"period": period_key, "value": v["high_limit"]})
|
||||
if v.get("low_limit") is not None:
|
||||
lo_series.append({"period": period_key, "value": v["low_limit"]})
|
||||
if amt_series:
|
||||
series["repurchase_amount"] = amt_series
|
||||
if vol_series:
|
||||
series["repurchase_vol"] = vol_series
|
||||
if hi_series:
|
||||
series["repurchase_high_limit"] = hi_series
|
||||
if lo_series:
|
||||
series["repurchase_low_limit"] = lo_series
|
||||
|
||||
# 汇总分红信息为年度序列:以真实派息日 pay_date 的年份分组;
|
||||
# 每条记录金额= 每股分红(cash_div_tax) * 基准股本(base_share),其中 base_share 单位为“万股”,
|
||||
# 金额以“亿”为单位返回,因此需再除以 10000。
|
||||
if 'div_rows' in locals() and div_rows:
|
||||
div_by_year: Dict[str, float] = {}
|
||||
for r in div_rows:
|
||||
pay = str(r.get("pay_date") or "")
|
||||
# 仅统计存在数字年份的真实派息日
|
||||
if not pay or len(pay) < 4 or not any(ch.isdigit() for ch in pay):
|
||||
continue
|
||||
y = pay[:4]
|
||||
cash_div = r.get("cash_div_tax")
|
||||
base_share = r.get("base_share")
|
||||
if isinstance(cash_div, (int, float)) and isinstance(base_share, (int, float)):
|
||||
# 现金分红总额(万元)= 每股分红(元) * 基准股本(万股)
|
||||
# 转为“亿”需除以 10000
|
||||
amount_billion = (float(cash_div) * float(base_share)) / 10000.0
|
||||
div_by_year[y] = div_by_year.get(y, 0.0) + amount_billion
|
||||
if div_by_year:
|
||||
div_series = []
|
||||
for y, v in sorted(div_by_year.items()):
|
||||
# 当年数据放在当前年最新报告期,否则放在年度报告期
|
||||
if y == current_year and latest_current_year_report:
|
||||
period_key = latest_current_year_report
|
||||
else:
|
||||
period_key = f"{y}1231"
|
||||
div_series.append({"period": period_key, "value": v})
|
||||
series["dividend_amount"] = div_series
|
||||
# try:
|
||||
# logger.info(f"[Dividend] Series dividend_amount(period) for {stock_code}: {div_series}")
|
||||
# except Exception:
|
||||
# pass
|
||||
|
||||
# 汇总股东户数信息:按报告期 end_date 分组,取最新的 holder_num
|
||||
if 'holder_rows' in locals() and holder_rows:
|
||||
# 按 end_date 分组,取最新的 ann_date 的 holder_num
|
||||
holder_by_period: Dict[str, Dict[str, Any]] = {}
|
||||
for r in holder_rows:
|
||||
end_date = str(r.get("end_date") or "")
|
||||
if not end_date:
|
||||
continue
|
||||
ann_date = str(r.get("ann_date") or "")
|
||||
holder_num = r.get("holder_num")
|
||||
|
||||
if end_date not in holder_by_period:
|
||||
holder_by_period[end_date] = {
|
||||
"holder_num": holder_num,
|
||||
"latest_ann_date": ann_date
|
||||
}
|
||||
else:
|
||||
# 比较 ann_date,取最新的
|
||||
current_latest = holder_by_period[end_date]["latest_ann_date"]
|
||||
if ann_date and (not current_latest or ann_date > current_latest):
|
||||
holder_by_period[end_date] = {
|
||||
"holder_num": holder_num,
|
||||
"latest_ann_date": ann_date
|
||||
}
|
||||
|
||||
# 使用与财务报表相同的报告期筛选逻辑
|
||||
# 股东户数应该与财务报表的报告期时间点对应
|
||||
holder_series = []
|
||||
for end_date in wanted_dates:
|
||||
if end_date in holder_by_period:
|
||||
data = holder_by_period[end_date]
|
||||
holder_num = data["holder_num"]
|
||||
if isinstance(holder_num, (int, float)) and holder_num is not None:
|
||||
holder_series.append({"period": end_date, "value": float(holder_num)})
|
||||
|
||||
if holder_series:
|
||||
series["holder_num"] = holder_series
|
||||
|
||||
# 汇总员工数信息:员工数放在去年的年末(上一年的12月31日)
|
||||
if 'company_rows' in locals() and company_rows:
|
||||
# 员工数通常是静态数据,取最新的一个值
|
||||
latest_employees = None
|
||||
for r in company_rows:
|
||||
employees = r.get("employees")
|
||||
if isinstance(employees, (int, float)) and employees is not None:
|
||||
latest_employees = float(employees)
|
||||
break # 取第一个有效值
|
||||
|
||||
if latest_employees is not None:
|
||||
# 将员工数放在去年的年末(上一年的12月31日)
|
||||
previous_year = str(datetime.date.today().year - 1)
|
||||
period_key = f"{previous_year}1231"
|
||||
series["employees"] = [{"period": period_key, "value": latest_employees}]
|
||||
|
||||
# Calculate derived metrics
|
||||
periods = sorted(list(set(d['period'] for s in series.values() for d in s)))
|
||||
series = self._calculate_derived_metrics(series, periods)
|
||||
|
||||
return series
|
||||
@ -1,114 +0,0 @@
|
||||
from .base import BaseDataProvider
|
||||
from typing import Any, Dict, List, Optional
|
||||
import yfinance as yf
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class YfinanceProvider(BaseDataProvider):
|
||||
|
||||
def _map_stock_code(self, stock_code: str) -> str:
|
||||
# yfinance uses different tickers for CN market
|
||||
if stock_code.endswith('.SH'):
|
||||
return stock_code.replace('.SH', '.SS')
|
||||
elif stock_code.endswith('.SZ'):
|
||||
# For Shenzhen stocks, try without suffix first, then with .SZ
|
||||
base_code = stock_code.replace('.SZ', '')
|
||||
return base_code # Try without suffix first
|
||||
return stock_code
|
||||
|
||||
async def get_stock_basic(self, stock_code: str) -> Optional[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
ticker = yf.Ticker(self._map_stock_code(stock_code))
|
||||
info = ticker.info
|
||||
|
||||
# Normalize data to match expected format
|
||||
return {
|
||||
"ts_code": stock_code,
|
||||
"name": info.get("longName"),
|
||||
"area": info.get("country"),
|
||||
"industry": info.get("industry"),
|
||||
"market": info.get("market"),
|
||||
"exchange": info.get("exchange"),
|
||||
"list_date": datetime.fromtimestamp(info.get("firstTradeDateEpoch", 0)).strftime('%Y%m%d') if info.get("firstTradeDateEpoch") else None,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"yfinance get_stock_basic failed for {stock_code}: {e}")
|
||||
return None
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
|
||||
async def get_daily_price(self, stock_code: str, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
# yfinance date format is YYYY-MM-DD
|
||||
start_fmt = datetime.strptime(start_date, '%Y%m%d').strftime('%Y-%m-%d')
|
||||
end_fmt = datetime.strptime(end_date, '%Y%m%d').strftime('%Y-%m-%d')
|
||||
|
||||
ticker = yf.Ticker(self._map_stock_code(stock_code))
|
||||
df = ticker.history(start=start_fmt, end=end_fmt)
|
||||
|
||||
df.reset_index(inplace=True)
|
||||
# Normalize column names
|
||||
df.rename(columns={
|
||||
"Date": "trade_date",
|
||||
"Open": "open", "High": "high", "Low": "low", "Close": "close",
|
||||
"Volume": "vol"
|
||||
}, inplace=True)
|
||||
df['trade_date'] = df['trade_date'].dt.strftime('%Y%m%d')
|
||||
return df.to_dict('records')
|
||||
except Exception as e:
|
||||
logger.error(f"yfinance get_daily_price failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
|
||||
async def get_financial_statements(self, stock_code: str, report_dates: List[str]) -> List[Dict[str, Any]]:
|
||||
def _fetch():
|
||||
try:
|
||||
ticker = yf.Ticker(self._map_stock_code(stock_code))
|
||||
|
||||
# yfinance provides financials quarterly or annually. We'll fetch annually and try to match the dates.
|
||||
# Note: This is an approximation as yfinance does not allow fetching by specific end-of-year dates.
|
||||
df_financials = ticker.financials.transpose()
|
||||
df_balance = ticker.balance_sheet.transpose()
|
||||
df_cashflow = ticker.cash_flow.transpose()
|
||||
|
||||
if df_financials.empty and df_balance.empty and df_cashflow.empty:
|
||||
return []
|
||||
|
||||
# Combine the data
|
||||
df_combined = pd.concat([df_financials, df_balance, df_cashflow], axis=1)
|
||||
df_combined.index.name = 'end_date'
|
||||
df_combined.reset_index(inplace=True)
|
||||
df_combined['end_date_str'] = df_combined['end_date'].dt.strftime('%Y%m%d')
|
||||
|
||||
# Filter by requested dates (allowing for some flexibility if exact match not found)
|
||||
# This simplistic filtering might need to be more robust.
|
||||
# For now, we assume the yearly data maps to the year in report_dates.
|
||||
years_to_fetch = {date[:4] for date in report_dates}
|
||||
df_combined = df_combined[df_combined['end_date'].dt.year.astype(str).isin(years_to_fetch)]
|
||||
|
||||
# Data Normalization (yfinance columns are different from Tushare)
|
||||
# This is a sample, a more comprehensive mapping would be required.
|
||||
df_combined.rename(columns={
|
||||
"Total Revenue": "revenue",
|
||||
"Net Income": "net_income",
|
||||
"Total Assets": "total_assets",
|
||||
"Total Liab": "total_liabilities",
|
||||
}, inplace=True, errors='ignore')
|
||||
|
||||
return df_combined.to_dict('records')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"yfinance get_financial_statements failed for {stock_code}: {e}")
|
||||
return []
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(None, _fetch)
|
||||
@ -1,58 +0,0 @@
|
||||
"""
|
||||
FastAPI application entrypoint
|
||||
"""
|
||||
import logging
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.config import settings
|
||||
from app.routers.config import router as config_router
|
||||
from app.routers.financial import router as financial_router
|
||||
from app.routers.orgs import router as orgs_router
|
||||
|
||||
# Configure logging to ensure our app logs show up in development
|
||||
import sys
|
||||
|
||||
# Force our logging configuration to override uvicorn's
|
||||
class ForcefulHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
# Force output to stdout regardless of uvicorn's configuration
|
||||
print(f"[APP] {record.getMessage()}", file=sys.stdout, flush=True)
|
||||
|
||||
# Set up our forceful handler for data providers
|
||||
forceful_handler = ForcefulHandler()
|
||||
forceful_handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Configure data providers logger with forceful output
|
||||
data_providers_logger = logging.getLogger('app.data_providers')
|
||||
data_providers_logger.setLevel(logging.DEBUG)
|
||||
data_providers_logger.addHandler(forceful_handler)
|
||||
|
||||
# Also set up for the main app logger
|
||||
app_logger = logging.getLogger('app')
|
||||
app_logger.setLevel(logging.INFO)
|
||||
app_logger.addHandler(forceful_handler)
|
||||
|
||||
# Ensure our handlers are not suppressed
|
||||
data_providers_logger.propagate = False
|
||||
app_logger.propagate = False
|
||||
|
||||
app = FastAPI(title=settings.APP_NAME, version=settings.APP_VERSION)
|
||||
|
||||
# CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Routers
|
||||
app.include_router(config_router, prefix=f"{settings.API_V1_STR}/config", tags=["config"])
|
||||
app.include_router(financial_router, prefix=f"{settings.API_V1_STR}/financials", tags=["financials"])
|
||||
app.include_router(orgs_router, prefix=f"{settings.API_V1_STR}/orgs", tags=["orgs"])
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"status": "ok", "name": settings.APP_NAME, "version": settings.APP_VERSION}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,143 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
from typing import Dict
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
||||
|
||||
# Lazy loader for DataManager
|
||||
_dm = None
|
||||
def get_dm():
|
||||
global _dm
|
||||
if _dm is not None:
|
||||
return _dm
|
||||
try:
|
||||
from app.data_manager import data_manager as real_dm
|
||||
_dm = real_dm
|
||||
return _dm
|
||||
except Exception:
|
||||
# Return a stub if the real one fails to import
|
||||
class _StubDM:
|
||||
async def get_stock_basic(self, stock_code: str): return None
|
||||
async def get_financial_statements(self, stock_code: str, report_dates): return []
|
||||
_dm = _StubDM()
|
||||
return _dm
|
||||
|
||||
from app.services.analysis_client import AnalysisClient, load_analysis_config
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Constants for config paths
|
||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
BASE_CONFIG_PATH = os.path.join(REPO_ROOT, "config", "config.json")
|
||||
|
||||
def _load_json(path: str) -> Dict:
|
||||
if not os.path.exists(path):
|
||||
return {}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
async def run_full_analysis(org_id: str):
|
||||
"""
|
||||
Asynchronous task to run a full analysis for a given stock.
|
||||
This function is market-agnostic and relies on DataManager.
|
||||
"""
|
||||
logger.info(f"Starting full analysis task for {org_id}")
|
||||
|
||||
# 1. Load configurations
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
llm_provider = base_cfg.get("llm", {}).get("provider", "gemini")
|
||||
llm_config = base_cfg.get("llm", {}).get(llm_provider, {})
|
||||
|
||||
api_key = llm_config.get("api_key")
|
||||
base_url = llm_config.get("base_url")
|
||||
|
||||
if not api_key:
|
||||
logger.error(f"API key for {llm_provider} not configured. Aborting analysis for {org_id}.")
|
||||
return
|
||||
|
||||
analysis_config_full = load_analysis_config()
|
||||
modules_config = analysis_config_full.get("analysis_modules", {})
|
||||
if not modules_config:
|
||||
logger.error(f"Analysis modules configuration not found. Aborting analysis for {org_id}.")
|
||||
return
|
||||
|
||||
# 2. Fetch basic company info (name)
|
||||
try:
|
||||
basic_data = await get_dm().get_stock_basic(stock_code=org_id)
|
||||
company_name = basic_data.get("name", org_id) if basic_data else org_id
|
||||
logger.info(f"Got company name for {org_id}: {company_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get company name for {org_id}. Using org_id as name. Error: {e}")
|
||||
company_name = org_id
|
||||
|
||||
# 3. Fetch financial data
|
||||
financial_data = None
|
||||
try:
|
||||
# You might want to make the date range configurable
|
||||
from datetime import datetime
|
||||
current_year = datetime.now().year
|
||||
report_dates = [f"{year}1231" for year in range(current_year - 5, current_year)]
|
||||
|
||||
financial_statements = await get_dm().get_financial_statements(stock_code=org_id, report_dates=report_dates)
|
||||
if financial_statements:
|
||||
financial_data = {"series": financial_statements}
|
||||
logger.info(f"Successfully fetched financial statements for {org_id}")
|
||||
else:
|
||||
logger.warning(f"Could not fetch financial statements for {org_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching financial data for {org_id}: {e}")
|
||||
|
||||
# 4. Execute analysis modules in order (simplified, assumes no complex dependencies for now)
|
||||
# Note: A full implementation would need the topological sort from the financial router.
|
||||
analysis_results = {}
|
||||
for module_type, module_config in modules_config.items():
|
||||
logger.info(f"Running analysis module: {module_type} for {org_id}")
|
||||
client = AnalysisClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=module_config.get("model", "gemini-1.5-flash")
|
||||
)
|
||||
|
||||
# Simplified context: use results from all previously completed modules
|
||||
context = analysis_results.copy()
|
||||
|
||||
result = await client.generate_analysis(
|
||||
analysis_type=module_type,
|
||||
company_name=company_name,
|
||||
ts_code=org_id,
|
||||
prompt_template=module_config.get("prompt_template", ""),
|
||||
financial_data=financial_data,
|
||||
context=context,
|
||||
)
|
||||
|
||||
if result.get("success"):
|
||||
analysis_results[module_type] = result.get("content", "")
|
||||
logger.info(f"Module {module_type} for {org_id} completed successfully.")
|
||||
else:
|
||||
logger.error(f"Module {module_type} for {org_id} failed: {result.get('error')}")
|
||||
# Store error message to avoid breaking dependencies that might handle missing data
|
||||
analysis_results[module_type] = f"Error: Analysis for {module_type} failed."
|
||||
|
||||
# 5. Save the final report
|
||||
# TODO: Implement database logic to save the `analysis_results` to the report record.
|
||||
logger.info(f"Full analysis for {org_id} finished. Results: {json.dumps(analysis_results, indent=2, ensure_ascii=False)}")
|
||||
|
||||
|
||||
@router.post("/{market}/{org_id}/reports/generate")
|
||||
async def trigger_report_generation(market: str, org_id: str, background_tasks: BackgroundTasks):
|
||||
"""
|
||||
Triggers a background task to generate a full financial report.
|
||||
This endpoint is now market-agnostic.
|
||||
"""
|
||||
logger.info(f"Received report generation request for {org_id} in {market} market.")
|
||||
|
||||
# TODO: Create a report record in the database with "generating" status here.
|
||||
|
||||
background_tasks.add_task(run_full_analysis, org_id)
|
||||
|
||||
logger.info(f"Queued analysis task for {org_id}.")
|
||||
return {"queued": True, "market": market, "org_id": org_id}
|
||||
@ -1,188 +0,0 @@
|
||||
"""
|
||||
Configuration Management Service (file + service based; no direct DB)
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
|
||||
from app.schemas.config import ConfigResponse, ConfigUpdateRequest, DatabaseConfig, NewApiConfig, DataSourceConfig, ConfigTestResponse
|
||||
from app.core.config import settings
|
||||
|
||||
class ConfigManager:
|
||||
"""Manages system configuration by fetching from config-service and updating local config."""
|
||||
|
||||
def __init__(self, config_path: str = None):
|
||||
if config_path is None:
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
self.config_path = os.path.join(project_root, "config", "config.json")
|
||||
else:
|
||||
self.config_path = config_path
|
||||
|
||||
async def _fetch_base_config_from_service(self) -> Dict[str, Any]:
|
||||
base_url = settings.CONFIG_SERVICE_BASE_URL.rstrip("/")
|
||||
url = f"{base_url}/system"
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(url)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError("Config service 返回的系统配置格式错误")
|
||||
return data
|
||||
|
||||
def _merge_configs(self, base: Dict[str, Any], overrides: Dict[str, Any]) -> Dict[str, Any]:
|
||||
for key, value in overrides.items():
|
||||
if isinstance(value, dict) and isinstance(base.get(key), dict):
|
||||
base[key] = self._merge_configs(base[key], value)
|
||||
else:
|
||||
base[key] = value
|
||||
return base
|
||||
|
||||
async def get_config(self) -> ConfigResponse:
|
||||
base_config = await self._fetch_base_config_from_service()
|
||||
|
||||
# 兼容两种位置:优先使用 new_api,其次回退到 llm.new_api
|
||||
new_api_src = base_config.get("new_api") or base_config.get("llm", {}).get("new_api", {})
|
||||
|
||||
return ConfigResponse(
|
||||
database=DatabaseConfig(**base_config.get("database", {})),
|
||||
new_api=NewApiConfig(**(new_api_src or {})),
|
||||
data_sources={
|
||||
k: DataSourceConfig(**v)
|
||||
for k, v in base_config.get("data_sources", {}).items()
|
||||
}
|
||||
)
|
||||
|
||||
async def update_config(self, config_update: ConfigUpdateRequest) -> ConfigResponse:
|
||||
update_dict = config_update.dict(exclude_unset=True)
|
||||
self._validate_config_data(update_dict)
|
||||
|
||||
# 直接写入项目根目录的 config.json
|
||||
current = {}
|
||||
if os.path.exists(self.config_path):
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
current = json.load(f) or {}
|
||||
|
||||
merged = self._merge_configs(current, update_dict)
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(merged, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 返回合并后的视图(与 get_config 一致:从服务读取一次,避免多源不一致)
|
||||
return await self.get_config()
|
||||
|
||||
def _validate_config_data(self, config_data: Dict[str, Any]) -> None:
|
||||
if "database" in config_data:
|
||||
db_config = config_data["database"]
|
||||
if "url" in db_config:
|
||||
url = db_config["url"]
|
||||
if not url.startswith(("postgresql://", "postgresql+asyncpg://")):
|
||||
raise ValueError("数据库URL必须以 postgresql:// 或 postgresql+asyncpg:// 开头")
|
||||
|
||||
if "new_api" in config_data:
|
||||
new_api_config = config_data["new_api"]
|
||||
if "api_key" in new_api_config and len(new_api_config["api_key"]) < 10:
|
||||
raise ValueError("New API Key长度不能少于10个字符")
|
||||
if "base_url" in new_api_config and new_api_config["base_url"]:
|
||||
base_url = new_api_config["base_url"]
|
||||
if not base_url.startswith(("http://", "https://")):
|
||||
raise ValueError("New API Base URL必须以 http:// 或 https:// 开头")
|
||||
|
||||
if "data_sources" in config_data:
|
||||
for source_name, source_config in config_data["data_sources"].items():
|
||||
if "api_key" in source_config and len(source_config["api_key"]) < 10:
|
||||
raise ValueError(f"{source_name} API Key长度不能少于10个字符")
|
||||
|
||||
async def test_config(self, config_type: str, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
try:
|
||||
if config_type == "database":
|
||||
return await self._test_database(config_data)
|
||||
elif config_type == "new_api":
|
||||
return await self._test_new_api(config_data)
|
||||
elif config_type == "tushare":
|
||||
return await self._test_tushare(config_data)
|
||||
elif config_type == "finnhub":
|
||||
return await self._test_finnhub(config_data)
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message=f"不支持的配置类型: {config_type}")
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(success=False, message=f"测试失败: {str(e)}")
|
||||
|
||||
async def _test_database(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
db_url = config_data.get("url")
|
||||
if not db_url:
|
||||
return ConfigTestResponse(success=False, message="数据库URL不能为空")
|
||||
try:
|
||||
if db_url.startswith("postgresql+asyncpg://"):
|
||||
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
|
||||
conn = await asyncpg.connect(db_url)
|
||||
await conn.close()
|
||||
return ConfigTestResponse(success=True, message="数据库连接成功")
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(success=False, message=f"数据库连接失败: {str(e)}")
|
||||
|
||||
async def _test_new_api(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
api_key = config_data.get("api_key")
|
||||
base_url = config_data.get("base_url")
|
||||
if not api_key or not base_url:
|
||||
return ConfigTestResponse(success=False, message="New API Key和Base URL均不能为空")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{base_url.rstrip('/')}/models",
|
||||
headers={"Authorization": f"Bearer {api_key}"}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return ConfigTestResponse(success=True, message="New API连接成功")
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message=f"New API测试失败: HTTP {response.status_code} - {response.text}")
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(success=False, message=f"New API连接失败: {str(e)}")
|
||||
|
||||
async def _test_tushare(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
api_key = config_data.get("api_key")
|
||||
if not api_key:
|
||||
return ConfigTestResponse(success=False, message="Tushare API Key不能为空")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.post(
|
||||
"http://api.tushare.pro",
|
||||
json={
|
||||
"api_name": "stock_basic",
|
||||
"token": api_key,
|
||||
"params": {"list_status": "L"},
|
||||
"fields": "ts_code"
|
||||
}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get("code") == 0:
|
||||
return ConfigTestResponse(success=True, message="Tushare API连接成功")
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message=f"Tushare API错误: {data.get('msg', '未知错误')}")
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message=f"Tushare API测试失败: HTTP {response.status_code}")
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(success=False, message=f"Tushare API连接失败: {str(e)}")
|
||||
|
||||
async def _test_finnhub(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
api_key = config_data.get("api_key")
|
||||
if not api_key:
|
||||
return ConfigTestResponse(success=False, message="Finnhub API Key不能为空")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
"https://finnhub.io/api/v1/quote",
|
||||
params={"symbol": "AAPL", "token": api_key}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if "c" in data:
|
||||
return ConfigTestResponse(success=True, message="Finnhub API连接成功")
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message="Finnhub API响应格式错误")
|
||||
else:
|
||||
return ConfigTestResponse(success=False, message=f"Finnhub API测试失败: HTTP {response.status_code}")
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(success=False, message=f"Finnhub API连接失败: {str(e)}")
|
||||
@ -1,182 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class CompanyProfile(BaseModel):
|
||||
symbol: str
|
||||
name: str
|
||||
industry: Optional[str] = None
|
||||
list_date: Optional[dt.date] = None
|
||||
additional_info: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class TimeSeriesFinancial(BaseModel):
|
||||
symbol: str
|
||||
metric_name: str
|
||||
period_date: dt.date
|
||||
value: float
|
||||
source: Optional[str] = None
|
||||
|
||||
|
||||
class TimeSeriesFinancialBatch(BaseModel):
|
||||
records: List[TimeSeriesFinancial]
|
||||
|
||||
|
||||
class DailyMarketData(BaseModel):
|
||||
symbol: str
|
||||
trade_date: dt.date
|
||||
open_price: Optional[float] = None
|
||||
high_price: Optional[float] = None
|
||||
low_price: Optional[float] = None
|
||||
close_price: Optional[float] = None
|
||||
volume: Optional[int] = None
|
||||
pe: Optional[float] = None
|
||||
pb: Optional[float] = None
|
||||
total_mv: Optional[float] = None
|
||||
|
||||
|
||||
class DailyMarketDataBatch(BaseModel):
|
||||
records: List[DailyMarketData]
|
||||
|
||||
class RealtimeQuote(BaseModel):
|
||||
symbol: str
|
||||
market: str
|
||||
ts: dt.datetime
|
||||
price: float
|
||||
open_price: Optional[float] = None
|
||||
high_price: Optional[float] = None
|
||||
low_price: Optional[float] = None
|
||||
prev_close: Optional[float] = None
|
||||
change: Optional[float] = None
|
||||
change_percent: Optional[float] = None
|
||||
volume: Optional[int] = None
|
||||
source: Optional[str] = None
|
||||
|
||||
|
||||
class NewAnalysisResult(BaseModel):
|
||||
symbol: str
|
||||
module_id: str
|
||||
model_name: Optional[str] = None
|
||||
content: str
|
||||
meta_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class AnalysisResult(BaseModel):
|
||||
id: str
|
||||
symbol: str
|
||||
module_id: str
|
||||
generated_at: dt.datetime
|
||||
model_name: Optional[str] = None
|
||||
content: str
|
||||
meta_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class DataPersistenceClient:
|
||||
def __init__(self, base_url: Optional[str] = None, timeout: float = 20.0):
|
||||
self.base_url = (base_url or settings.DATA_PERSISTENCE_BASE_URL).rstrip("/")
|
||||
self.timeout = timeout
|
||||
|
||||
async def _client(self) -> httpx.AsyncClient:
|
||||
return httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout)
|
||||
|
||||
# Companies
|
||||
async def upsert_company(self, profile: CompanyProfile) -> None:
|
||||
async with await self._client() as client:
|
||||
resp = await client.put("/companies", json=profile.model_dump(mode="json"))
|
||||
resp.raise_for_status()
|
||||
|
||||
async def get_company(self, symbol: str) -> CompanyProfile:
|
||||
async with await self._client() as client:
|
||||
resp = await client.get(f"/companies/{symbol}")
|
||||
resp.raise_for_status()
|
||||
return CompanyProfile.model_validate(resp.json())
|
||||
|
||||
# Financials
|
||||
async def batch_insert_financials(self, batch: TimeSeriesFinancialBatch) -> None:
|
||||
async with await self._client() as client:
|
||||
resp = await client.post("/market-data/financials/batch", json=batch.model_dump(mode="json"))
|
||||
resp.raise_for_status()
|
||||
|
||||
async def get_financials_by_symbol(self, symbol: str, metrics: Optional[List[str]] = None) -> List[TimeSeriesFinancial]:
|
||||
params = {}
|
||||
if metrics:
|
||||
params["metrics"] = ",".join(metrics)
|
||||
async with await self._client() as client:
|
||||
resp = await client.get(f"/market-data/financials/{symbol}", params=params)
|
||||
resp.raise_for_status()
|
||||
return [TimeSeriesFinancial.model_validate(item) for item in resp.json()]
|
||||
|
||||
# Daily data
|
||||
async def batch_insert_daily_data(self, batch: DailyMarketDataBatch) -> None:
|
||||
async with await self._client() as client:
|
||||
resp = await client.post("/market-data/daily/batch", json=batch.model_dump(mode="json"))
|
||||
resp.raise_for_status()
|
||||
|
||||
async def get_daily_data_by_symbol(
|
||||
self,
|
||||
symbol: str,
|
||||
start_date: Optional[dt.date] = None,
|
||||
end_date: Optional[dt.date] = None,
|
||||
) -> List[DailyMarketData]:
|
||||
params = {}
|
||||
if start_date:
|
||||
params["start_date"] = start_date.isoformat()
|
||||
if end_date:
|
||||
params["end_date"] = end_date.isoformat()
|
||||
async with await self._client() as client:
|
||||
resp = await client.get(f"/market-data/daily/{symbol}", params=params)
|
||||
resp.raise_for_status()
|
||||
return [DailyMarketData.model_validate(item) for item in resp.json()]
|
||||
|
||||
# Realtime quotes
|
||||
async def upsert_realtime_quote(self, quote: RealtimeQuote) -> None:
|
||||
async with await self._client() as client:
|
||||
resp = await client.post("/market-data/quotes", json=quote.model_dump(mode="json"))
|
||||
resp.raise_for_status()
|
||||
|
||||
async def get_latest_realtime_quote(
|
||||
self,
|
||||
market: str,
|
||||
symbol: str,
|
||||
max_age_seconds: Optional[int] = None,
|
||||
) -> Optional[RealtimeQuote]:
|
||||
params = {"market": market}
|
||||
if max_age_seconds is not None:
|
||||
params["max_age_seconds"] = int(max_age_seconds)
|
||||
async with await self._client() as client:
|
||||
resp = await client.get(f"/market-data/quotes/{symbol}", params=params)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return RealtimeQuote.model_validate(resp.json())
|
||||
|
||||
# Analysis results
|
||||
async def create_analysis_result(self, new_result: NewAnalysisResult) -> AnalysisResult:
|
||||
async with await self._client() as client:
|
||||
resp = await client.post("/analysis-results", json=new_result.model_dump(mode="json"))
|
||||
resp.raise_for_status()
|
||||
return AnalysisResult.model_validate(resp.json())
|
||||
|
||||
async def get_analysis_results(self, symbol: str, module_id: Optional[str] = None) -> List[AnalysisResult]:
|
||||
params = {"symbol": symbol}
|
||||
if module_id:
|
||||
params["module_id"] = module_id
|
||||
async with await self._client() as client:
|
||||
resp = await client.get("/analysis-results", params=params)
|
||||
resp.raise_for_status()
|
||||
return [AnalysisResult.model_validate(item) for item in resp.json()]
|
||||
|
||||
async def get_analysis_result_by_id(self, result_id: str) -> AnalysisResult:
|
||||
async with await self._client() as client:
|
||||
resp = await client.get(f"/analysis-results/{result_id}")
|
||||
resp.raise_for_status()
|
||||
return AnalysisResult.model_validate(resp.json())
|
||||
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PROJECT_ROOT=/workspace
|
||||
|
||||
WORKDIR /workspace/services/config-service
|
||||
|
||||
COPY services/config-service/requirements.txt ./requirements.txt
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 运行时通过挂载卷提供配置与源码
|
||||
RUN mkdir -p /workspace/services/config-service
|
||||
|
||||
# 缺省入口由 docker-compose 提供
|
||||
|
||||
|
||||
@ -1,64 +0,0 @@
|
||||
"""
|
||||
Config Service - provides read-only access to static configuration files.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
APP_NAME = "config-service"
|
||||
API_V1 = "/api/v1"
|
||||
# 在容器内挂载了项目根到 /workspace
|
||||
PROJECT_ROOT = os.environ.get("PROJECT_ROOT", "/workspace")
|
||||
CONFIG_DIR = os.path.join(PROJECT_ROOT, "config")
|
||||
SYSTEM_CONFIG_PATH = os.path.join(CONFIG_DIR, "config.json")
|
||||
ANALYSIS_CONFIG_PATH = os.path.join(CONFIG_DIR, "analysis-config.json")
|
||||
|
||||
app = FastAPI(title=APP_NAME, version="0.1.0")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
def _read_json_file(path: str) -> Dict[str, Any]:
|
||||
if not os.path.exists(path):
|
||||
raise HTTPException(status_code=404, detail=f"配置文件不存在: {os.path.basename(path)}")
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(status_code=500, detail=f"配置文件格式错误: {e}") from e
|
||||
except OSError as e:
|
||||
raise HTTPException(status_code=500, detail=f"读取配置文件失败: {e}") from e
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root() -> Dict[str, Any]:
|
||||
return {"status": "ok", "name": APP_NAME}
|
||||
|
||||
|
||||
@app.get(f"{API_V1}/system")
|
||||
async def get_system_config() -> Dict[str, Any]:
|
||||
"""
|
||||
返回系统基础配置(纯文件内容,不包含数据库覆盖)。
|
||||
"""
|
||||
return _read_json_file(SYSTEM_CONFIG_PATH)
|
||||
|
||||
|
||||
@app.get(f"{API_V1}/analysis-modules")
|
||||
async def get_analysis_modules() -> Dict[str, Any]:
|
||||
"""
|
||||
返回分析模块配置(原样透传)。
|
||||
"""
|
||||
return _read_json_file(ANALYSIS_CONFIG_PATH)
|
||||
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
fastapi==0.115.0
|
||||
uvicorn[standard]==0.30.6
|
||||
|
||||
BIN
assets/flow.png
BIN
assets/flow.png
Binary file not shown.
|
Before Width: | Height: | Size: 141 KiB |
5553
assets/tushare.json
5553
assets/tushare.json
File diff suppressed because it is too large
Load Diff
@ -23,10 +23,6 @@ class Settings(BaseSettings):
|
||||
GEMINI_API_KEY: Optional[str] = None
|
||||
TUSHARE_TOKEN: Optional[str] = None
|
||||
|
||||
# Microservices
|
||||
CONFIG_SERVICE_BASE_URL: str = "http://config-service:7000/api/v1"
|
||||
DATA_PERSISTENCE_BASE_URL: str = "http://data-persistence-service:3000/api/v1"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
18
backend/app/core/dependencies.py
Normal file
18
backend/app/core/dependencies.py
Normal file
@ -0,0 +1,18 @@
|
||||
"""
|
||||
Application dependencies and providers
|
||||
"""
|
||||
from typing import AsyncGenerator
|
||||
from fastapi import Depends
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.database import AsyncSessionLocal
|
||||
from app.services.config_manager import ConfigManager
|
||||
|
||||
async def get_db_session() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""Provides a database session to the application."""
|
||||
async with AsyncSessionLocal() as session:
|
||||
yield session
|
||||
|
||||
def get_config_manager(db_session: AsyncSession = Depends(get_db_session)) -> ConfigManager:
|
||||
"""Dependency to get the configuration manager."""
|
||||
return ConfigManager(db_session=db_session)
|
||||
36
backend/app/main.py
Normal file
36
backend/app/main.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""
|
||||
FastAPI application entrypoint
|
||||
"""
|
||||
import logging
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.config import settings
|
||||
from app.routers.config import router as config_router
|
||||
from app.routers.financial import router as financial_router
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s: %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
)
|
||||
|
||||
app = FastAPI(title=settings.APP_NAME, version=settings.APP_VERSION)
|
||||
|
||||
# CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Routers
|
||||
app.include_router(config_router, prefix=f"{settings.API_V1_STR}/config", tags=["config"])
|
||||
app.include_router(financial_router, prefix=f"{settings.API_V1_STR}/financials", tags=["financials"])
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"status": "ok", "name": settings.APP_NAME, "version": settings.APP_VERSION}
|
||||
693
backend/app/routers/financial.py
Normal file
693
backend/app/routers/financial.py
Normal file
@ -0,0 +1,693 @@
|
||||
"""
|
||||
API router for financial data (Tushare for China market)
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Dict, List
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
import os
|
||||
|
||||
from app.core.config import settings
|
||||
from app.schemas.financial import (
|
||||
BatchFinancialDataResponse,
|
||||
FinancialConfigResponse,
|
||||
FinancialMeta,
|
||||
StepRecord,
|
||||
CompanyProfileResponse,
|
||||
AnalysisResponse,
|
||||
AnalysisConfigResponse
|
||||
)
|
||||
from app.services.tushare_client import TushareClient
|
||||
from app.services.company_profile_client import CompanyProfileClient
|
||||
from app.services.analysis_client import AnalysisClient, load_analysis_config, get_analysis_config
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Load metric config from file (project root is repo root, not backend/)
|
||||
# routers/ -> app/ -> backend/ -> repo root
|
||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
FINANCIAL_CONFIG_PATH = os.path.join(REPO_ROOT, "config", "financial-tushare.json")
|
||||
BASE_CONFIG_PATH = os.path.join(REPO_ROOT, "config", "config.json")
|
||||
ANALYSIS_CONFIG_PATH = os.path.join(REPO_ROOT, "config", "analysis-config.json")
|
||||
|
||||
|
||||
def _load_json(path: str) -> Dict:
|
||||
if not os.path.exists(path):
|
||||
return {}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
@router.post("/china/{ts_code}/analysis", response_model=List[AnalysisResponse])
|
||||
async def generate_full_analysis(
|
||||
ts_code: str,
|
||||
company_name: str = Query(None, description="Company name for better context"),
|
||||
):
|
||||
"""
|
||||
Generate a full analysis report by orchestrating multiple analysis modules
|
||||
based on dependencies defined in the configuration.
|
||||
"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info(f"[API] Full analysis requested for {ts_code}")
|
||||
|
||||
# Load base and analysis configurations
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
llm_provider = base_cfg.get("llm", {}).get("provider", "gemini")
|
||||
llm_config = base_cfg.get("llm", {}).get(llm_provider, {})
|
||||
|
||||
api_key = llm_config.get("api_key")
|
||||
base_url = llm_config.get("base_url")
|
||||
|
||||
if not api_key:
|
||||
logger.error(f"[API] API key for {llm_provider} not configured")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"API key for {llm_provider} not configured."
|
||||
)
|
||||
|
||||
analysis_config_full = load_analysis_config()
|
||||
modules_config = analysis_config_full.get("analysis_modules", {})
|
||||
if not modules_config:
|
||||
raise HTTPException(status_code=404, detail="Analysis modules configuration not found.")
|
||||
|
||||
# --- Dependency Resolution (Topological Sort) ---
|
||||
def topological_sort(graph):
|
||||
in_degree = {u: 0 for u in graph}
|
||||
for u in graph:
|
||||
for v in graph[u]:
|
||||
in_degree[v] += 1
|
||||
|
||||
queue = [u for u in graph if in_degree[u] == 0]
|
||||
sorted_order = []
|
||||
|
||||
while queue:
|
||||
u = queue.pop(0)
|
||||
sorted_order.append(u)
|
||||
for v in graph.get(u, []):
|
||||
in_degree[v] -= 1
|
||||
if in_degree[v] == 0:
|
||||
queue.append(v)
|
||||
|
||||
if len(sorted_order) == len(graph):
|
||||
return sorted_order
|
||||
else:
|
||||
# Detect cycles and provide a meaningful error
|
||||
cycles = []
|
||||
visited = set()
|
||||
path = []
|
||||
|
||||
def find_cycle_util(node):
|
||||
visited.add(node)
|
||||
path.append(node)
|
||||
for neighbor in graph.get(node, []):
|
||||
if neighbor in path:
|
||||
cycle_start_index = path.index(neighbor)
|
||||
cycles.append(path[cycle_start_index:] + [neighbor])
|
||||
return
|
||||
if neighbor not in visited:
|
||||
find_cycle_util(neighbor)
|
||||
path.pop()
|
||||
|
||||
for node in graph:
|
||||
if node not in visited:
|
||||
find_cycle_util(node)
|
||||
|
||||
return None, cycles
|
||||
|
||||
|
||||
# Build dependency graph
|
||||
dependency_graph = {
|
||||
name: config.get("dependencies", [])
|
||||
for name, config in modules_config.items()
|
||||
}
|
||||
|
||||
# Invert graph for topological sort (from dependency to dependent)
|
||||
adj_list = {u: [] for u in dependency_graph}
|
||||
for u, dependencies in dependency_graph.items():
|
||||
for dep in dependencies:
|
||||
if dep in adj_list:
|
||||
adj_list[dep].append(u)
|
||||
|
||||
sorted_modules, cycle = topological_sort(adj_list)
|
||||
if not sorted_modules:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Circular dependency detected in analysis modules configuration. Cycle: {cycle}"
|
||||
)
|
||||
|
||||
# --- Fetch common data (company name, financial data) ---
|
||||
# This logic is duplicated, could be refactored into a helper
|
||||
financial_data = None
|
||||
if not company_name:
|
||||
logger.info(f"[API] Fetching company name for {ts_code}")
|
||||
try:
|
||||
token = base_cfg.get("data_sources", {}).get("tushare", {}).get("api_key")
|
||||
if token:
|
||||
tushare_client = TushareClient(token=token)
|
||||
basic_data = await tushare_client.query(api_name="stock_basic", params={"ts_code": ts_code}, fields="ts_code,name")
|
||||
if basic_data:
|
||||
company_name = basic_data[0].get("name", ts_code)
|
||||
logger.info(f"[API] Got company name: {company_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get company name, proceeding with ts_code. Error: {e}")
|
||||
company_name = ts_code
|
||||
|
||||
# --- Execute modules in order ---
|
||||
analysis_results = []
|
||||
completed_modules_content = {}
|
||||
|
||||
for module_type in sorted_modules:
|
||||
module_config = modules_config[module_type]
|
||||
logger.info(f"[Orchestrator] Starting analysis for module: {module_type}")
|
||||
|
||||
client = AnalysisClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=module_config.get("model", "gemini-1.5-flash")
|
||||
)
|
||||
|
||||
# Gather context from completed dependencies
|
||||
context = {
|
||||
dep: completed_modules_content.get(dep, "")
|
||||
for dep in module_config.get("dependencies", [])
|
||||
}
|
||||
|
||||
result = await client.generate_analysis(
|
||||
analysis_type=module_type,
|
||||
company_name=company_name,
|
||||
ts_code=ts_code,
|
||||
prompt_template=module_config.get("prompt_template", ""),
|
||||
financial_data=financial_data,
|
||||
context=context,
|
||||
)
|
||||
|
||||
response = AnalysisResponse(
|
||||
ts_code=ts_code,
|
||||
company_name=company_name,
|
||||
analysis_type=module_type,
|
||||
content=result.get("content", ""),
|
||||
model=result.get("model", module_config.get("model")),
|
||||
tokens=result.get("tokens", {}),
|
||||
elapsed_ms=result.get("elapsed_ms", 0),
|
||||
success=result.get("success", False),
|
||||
error=result.get("error")
|
||||
)
|
||||
|
||||
analysis_results.append(response)
|
||||
|
||||
if response.success:
|
||||
completed_modules_content[module_type] = response.content
|
||||
else:
|
||||
# If a module fails, subsequent dependent modules will get an empty string for its context.
|
||||
# This prevents total failure but may affect quality.
|
||||
completed_modules_content[module_type] = f"Error: Analysis for {module_type} failed."
|
||||
logger.error(f"[Orchestrator] Module {module_type} failed: {response.error}")
|
||||
|
||||
logger.info(f"[API] Full analysis for {ts_code} completed.")
|
||||
return analysis_results
|
||||
|
||||
|
||||
@router.get("/config", response_model=FinancialConfigResponse)
|
||||
async def get_financial_config():
|
||||
data = _load_json(FINANCIAL_CONFIG_PATH)
|
||||
api_groups = data.get("api_groups", {})
|
||||
return FinancialConfigResponse(api_groups=api_groups)
|
||||
|
||||
|
||||
@router.get("/china/{ts_code}", response_model=BatchFinancialDataResponse)
|
||||
async def get_china_financials(
|
||||
ts_code: str,
|
||||
years: int = Query(5, ge=1, le=15),
|
||||
):
|
||||
# Load Tushare token
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
token = (
|
||||
os.environ.get("TUSHARE_TOKEN")
|
||||
or settings.TUSHARE_TOKEN
|
||||
or base_cfg.get("data_sources", {}).get("tushare", {}).get("api_key")
|
||||
)
|
||||
if not token:
|
||||
raise HTTPException(status_code=500, detail="Tushare API token not configured. Set TUSHARE_TOKEN env or config/config.json data_sources.tushare.api_key")
|
||||
|
||||
# Load metric config
|
||||
fin_cfg = _load_json(FINANCIAL_CONFIG_PATH)
|
||||
api_groups: Dict[str, List[Dict]] = fin_cfg.get("api_groups", {})
|
||||
|
||||
client = TushareClient(token=token)
|
||||
|
||||
# Meta tracking
|
||||
started_real = datetime.now(timezone.utc)
|
||||
started = time.perf_counter_ns()
|
||||
api_calls_total = 0
|
||||
api_calls_by_group: Dict[str, int] = {}
|
||||
steps: List[StepRecord] = []
|
||||
current_action = "初始化"
|
||||
|
||||
# Get company name from stock_basic API
|
||||
company_name = None
|
||||
try:
|
||||
basic_data = await client.query(api_name="stock_basic", params={"ts_code": ts_code}, fields="ts_code,name")
|
||||
api_calls_total += 1
|
||||
if basic_data and len(basic_data) > 0:
|
||||
company_name = basic_data[0].get("name")
|
||||
except Exception:
|
||||
# If getting company name fails, continue without it
|
||||
pass
|
||||
|
||||
# Collect series per metric key
|
||||
series: Dict[str, List[Dict]] = {}
|
||||
|
||||
# Helper to store year-value pairs while keeping most recent per year
|
||||
def _merge_year_value(key: str, year: str, value, month: int = None):
|
||||
arr = series.setdefault(key, [])
|
||||
# upsert by year
|
||||
for item in arr:
|
||||
if item["year"] == year:
|
||||
item["value"] = value
|
||||
if month is not None:
|
||||
item["month"] = month
|
||||
return
|
||||
arr.append({"year": year, "value": value, "month": month})
|
||||
|
||||
# Query each API group we care
|
||||
errors: Dict[str, str] = {}
|
||||
for group_name, metrics in api_groups.items():
|
||||
step = StepRecord(
|
||||
name=f"拉取 {group_name}",
|
||||
start_ts=started_real.isoformat(),
|
||||
status="running",
|
||||
)
|
||||
steps.append(step)
|
||||
current_action = step.name
|
||||
if not metrics:
|
||||
continue
|
||||
|
||||
# 按 API 分组 metrics(处理 unknown 组中有多个不同 API 的情况)
|
||||
api_groups_dict: Dict[str, List[Dict]] = {}
|
||||
for metric in metrics:
|
||||
api = metric.get("api") or group_name
|
||||
if api: # 跳过空 API
|
||||
if api not in api_groups_dict:
|
||||
api_groups_dict[api] = []
|
||||
api_groups_dict[api].append(metric)
|
||||
|
||||
# 对每个 API 分别处理
|
||||
for api_name, api_metrics in api_groups_dict.items():
|
||||
fields = [m.get("tushareParam") for m in api_metrics if m.get("tushareParam")]
|
||||
if not fields:
|
||||
continue
|
||||
|
||||
date_field = "end_date" if group_name in ("fina_indicator", "income", "balancesheet", "cashflow") else "trade_date"
|
||||
|
||||
# 构建 API 参数
|
||||
params = {"ts_code": ts_code, "limit": 5000}
|
||||
|
||||
# 对于需要日期范围的 API(如 stk_holdernumber),添加日期参数
|
||||
if api_name == "stk_holdernumber":
|
||||
# 计算日期范围:从 years 年前到现在
|
||||
end_date = datetime.now().strftime("%Y%m%d")
|
||||
start_date = (datetime.now() - timedelta(days=years * 365)).strftime("%Y%m%d")
|
||||
params["start_date"] = start_date
|
||||
params["end_date"] = end_date
|
||||
# stk_holdernumber 返回的日期字段通常是 end_date
|
||||
date_field = "end_date"
|
||||
|
||||
# 对于非时间序列 API(如 stock_company),标记为静态数据
|
||||
is_static_data = api_name == "stock_company"
|
||||
|
||||
# 构建 fields 字符串:包含日期字段和所有需要的指标字段
|
||||
# 确保日期字段存在,因为我们需要用它来确定年份
|
||||
fields_list = list(fields)
|
||||
if date_field not in fields_list:
|
||||
fields_list.insert(0, date_field)
|
||||
# 对于 fina_indicator 等 API,通常还需要 ts_code 和 ann_date
|
||||
if api_name in ("fina_indicator", "income", "balancesheet", "cashflow"):
|
||||
for req_field in ["ts_code", "ann_date"]:
|
||||
if req_field not in fields_list:
|
||||
fields_list.insert(0, req_field)
|
||||
fields_str = ",".join(fields_list)
|
||||
|
||||
try:
|
||||
data_rows = await client.query(api_name=api_name, params=params, fields=fields_str)
|
||||
api_calls_total += 1
|
||||
api_calls_by_group[group_name] = api_calls_by_group.get(group_name, 0) + 1
|
||||
except Exception as e:
|
||||
# 记录错误但继续处理其他 API
|
||||
error_key = f"{group_name}_{api_name}"
|
||||
errors[error_key] = str(e)
|
||||
continue
|
||||
|
||||
tmp: Dict[str, Dict] = {}
|
||||
current_year = datetime.now().strftime("%Y")
|
||||
|
||||
for row in data_rows:
|
||||
if is_static_data:
|
||||
# 对于静态数据(如 stock_company),使用当前年份
|
||||
# 只处理第一行数据,因为静态数据通常只有一行
|
||||
if current_year not in tmp:
|
||||
year = current_year
|
||||
month = None
|
||||
tmp[year] = row
|
||||
tmp[year]['_month'] = month
|
||||
# 跳过后续行
|
||||
continue
|
||||
else:
|
||||
# 对于时间序列数据,按日期字段处理
|
||||
date_val = row.get(date_field)
|
||||
if not date_val:
|
||||
continue
|
||||
year = str(date_val)[:4]
|
||||
month = int(str(date_val)[4:6]) if len(str(date_val)) >= 6 else None
|
||||
existing = tmp.get(year)
|
||||
if existing is None or str(row.get(date_field)) > str(existing.get(date_field)):
|
||||
tmp[year] = row
|
||||
tmp[year]['_month'] = month
|
||||
|
||||
for metric in api_metrics:
|
||||
key = metric.get("tushareParam")
|
||||
if not key:
|
||||
continue
|
||||
for year, row in tmp.items():
|
||||
month = row.get('_month')
|
||||
_merge_year_value(key, year, row.get(key), month)
|
||||
|
||||
step.status = "done"
|
||||
step.end_ts = datetime.now(timezone.utc).isoformat()
|
||||
step.duration_ms = int((time.perf_counter_ns() - started) / 1_000_000)
|
||||
|
||||
finished_real = datetime.now(timezone.utc)
|
||||
elapsed_ms = int((time.perf_counter_ns() - started) / 1_000_000)
|
||||
|
||||
if not series:
|
||||
# If nothing succeeded, expose partial error info
|
||||
raise HTTPException(status_code=502, detail={"message": "No data returned from Tushare", "errors": errors})
|
||||
|
||||
# Truncate years and sort
|
||||
for key, arr in series.items():
|
||||
# Deduplicate and sort desc by year, then cut to requested years, and return asc
|
||||
uniq = {item["year"]: item for item in arr}
|
||||
arr_sorted_desc = sorted(uniq.values(), key=lambda x: x["year"], reverse=True)
|
||||
arr_limited = arr_sorted_desc[:years]
|
||||
arr_sorted = sorted(arr_limited, key=lambda x: x["year"]) # ascending by year
|
||||
series[key] = arr_sorted
|
||||
|
||||
meta = FinancialMeta(
|
||||
started_at=started_real.isoformat(),
|
||||
finished_at=finished_real.isoformat(),
|
||||
elapsed_ms=elapsed_ms,
|
||||
api_calls_total=api_calls_total,
|
||||
api_calls_by_group=api_calls_by_group,
|
||||
current_action=None,
|
||||
steps=steps,
|
||||
)
|
||||
|
||||
return BatchFinancialDataResponse(ts_code=ts_code, name=company_name, series=series, meta=meta)
|
||||
|
||||
|
||||
@router.get("/china/{ts_code}/company-profile", response_model=CompanyProfileResponse)
|
||||
async def get_company_profile(
|
||||
ts_code: str,
|
||||
company_name: str = Query(None, description="Company name for better context"),
|
||||
):
|
||||
"""
|
||||
Get company profile for a company using Gemini AI (non-streaming, single response)
|
||||
"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info(f"[API] Company profile requested for {ts_code}")
|
||||
|
||||
# Load config
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
llm_provider = base_cfg.get("llm", {}).get("provider", "gemini")
|
||||
llm_config = base_cfg.get("llm", {}).get(llm_provider, {})
|
||||
|
||||
api_key = llm_config.get("api_key")
|
||||
base_url = llm_config.get("base_url") # Will be None if not set, handled by client
|
||||
|
||||
if not api_key:
|
||||
logger.error(f"[API] API key for {llm_provider} not configured")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"API key for {llm_provider} not configured."
|
||||
)
|
||||
|
||||
client = CompanyProfileClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model="gemini-1.5-flash"
|
||||
)
|
||||
|
||||
# Get company name from ts_code if not provided
|
||||
if not company_name:
|
||||
logger.info(f"[API] Fetching company name for {ts_code}")
|
||||
# Try to get from stock_basic API
|
||||
try:
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
token = (
|
||||
os.environ.get("TUSHARE_TOKEN")
|
||||
or settings.TUSHARE_TOKEN
|
||||
or base_cfg.get("data_sources", {}).get("tushare", {}).get("api_key")
|
||||
)
|
||||
if token:
|
||||
from app.services.tushare_client import TushareClient
|
||||
tushare_client = TushareClient(token=token)
|
||||
basic_data = await tushare_client.query(api_name="stock_basic", params={"ts_code": ts_code}, fields="ts_code,name")
|
||||
if basic_data and len(basic_data) > 0:
|
||||
company_name = basic_data[0].get("name", ts_code)
|
||||
logger.info(f"[API] Got company name: {company_name}")
|
||||
else:
|
||||
company_name = ts_code
|
||||
else:
|
||||
company_name = ts_code
|
||||
except Exception as e:
|
||||
logger.warning(f"[API] Failed to get company name: {e}")
|
||||
company_name = ts_code
|
||||
|
||||
logger.info(f"[API] Generating profile for {company_name}")
|
||||
|
||||
# Generate profile using non-streaming API
|
||||
result = await client.generate_profile(
|
||||
company_name=company_name,
|
||||
ts_code=ts_code,
|
||||
financial_data=None
|
||||
)
|
||||
|
||||
logger.info(f"[API] Profile generation completed, success={result.get('success')}")
|
||||
|
||||
return CompanyProfileResponse(
|
||||
ts_code=ts_code,
|
||||
company_name=company_name,
|
||||
content=result.get("content", ""),
|
||||
model=result.get("model", "gemini-2.5-flash"),
|
||||
tokens=result.get("tokens", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
||||
elapsed_ms=result.get("elapsed_ms", 0),
|
||||
success=result.get("success", False),
|
||||
error=result.get("error")
|
||||
)
|
||||
|
||||
|
||||
@router.get("/analysis-config", response_model=AnalysisConfigResponse)
|
||||
async def get_analysis_config_endpoint():
|
||||
"""Get analysis configuration"""
|
||||
config = load_analysis_config()
|
||||
return AnalysisConfigResponse(analysis_modules=config.get("analysis_modules", {}))
|
||||
|
||||
|
||||
@router.put("/analysis-config", response_model=AnalysisConfigResponse)
|
||||
async def update_analysis_config_endpoint(analysis_config: AnalysisConfigResponse):
|
||||
"""Update analysis configuration"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# 保存到文件
|
||||
config_data = {
|
||||
"analysis_modules": analysis_config.analysis_modules
|
||||
}
|
||||
|
||||
with open(ANALYSIS_CONFIG_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(config_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"[API] Analysis config updated successfully")
|
||||
return AnalysisConfigResponse(analysis_modules=analysis_config.analysis_modules)
|
||||
except Exception as e:
|
||||
logger.error(f"[API] Failed to update analysis config: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to update analysis config: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/china/{ts_code}/analysis/{analysis_type}", response_model=AnalysisResponse)
|
||||
async def generate_analysis(
|
||||
ts_code: str,
|
||||
analysis_type: str,
|
||||
company_name: str = Query(None, description="Company name for better context"),
|
||||
):
|
||||
"""
|
||||
Generate analysis for a company using Gemini AI
|
||||
Supported analysis types:
|
||||
- fundamental_analysis (基本面分析)
|
||||
- bull_case (看涨分析)
|
||||
- bear_case (看跌分析)
|
||||
- market_analysis (市场分析)
|
||||
- news_analysis (新闻分析)
|
||||
- trading_analysis (交易分析)
|
||||
- insider_institutional (内部人与机构动向分析)
|
||||
- final_conclusion (最终结论)
|
||||
"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info(f"[API] Analysis requested for {ts_code}, type: {analysis_type}")
|
||||
|
||||
# Load config
|
||||
base_cfg = _load_json(BASE_CONFIG_PATH)
|
||||
llm_provider = base_cfg.get("llm", {}).get("provider", "gemini")
|
||||
llm_config = base_cfg.get("llm", {}).get(llm_provider, {})
|
||||
|
||||
api_key = llm_config.get("api_key")
|
||||
base_url = llm_config.get("base_url")
|
||||
|
||||
if not api_key:
|
||||
logger.error(f"[API] API key for {llm_provider} not configured")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"API key for {llm_provider} not configured."
|
||||
)
|
||||
|
||||
# Get analysis configuration
|
||||
analysis_cfg = get_analysis_config(analysis_type)
|
||||
if not analysis_cfg:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Analysis type '{analysis_type}' not found in configuration"
|
||||
)
|
||||
|
||||
model = analysis_cfg.get("model", "gemini-2.5-flash")
|
||||
prompt_template = analysis_cfg.get("prompt_template", "")
|
||||
|
||||
if not prompt_template:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Prompt template not found for analysis type '{analysis_type}'"
|
||||
)
|
||||
|
||||
# Get company name from ts_code if not provided
|
||||
financial_data = None
|
||||
if not company_name:
|
||||
logger.info(f"[API] Fetching company name and financial data for {ts_code}")
|
||||
try:
|
||||
token = (
|
||||
os.environ.get("TUSHARE_TOKEN")
|
||||
or settings.TUSHARE_TOKEN
|
||||
or base_cfg.get("data_sources", {}).get("tushare", {}).get("api_key")
|
||||
)
|
||||
if token:
|
||||
tushare_client = TushareClient(token=token)
|
||||
basic_data = await tushare_client.query(api_name="stock_basic", params={"ts_code": ts_code}, fields="ts_code,name")
|
||||
if basic_data and len(basic_data) > 0:
|
||||
company_name = basic_data[0].get("name", ts_code)
|
||||
logger.info(f"[API] Got company name: {company_name}")
|
||||
|
||||
# Try to get financial data for context
|
||||
try:
|
||||
fin_cfg = _load_json(FINANCIAL_CONFIG_PATH)
|
||||
api_groups = fin_cfg.get("api_groups", {})
|
||||
|
||||
# Get financial data summary for context
|
||||
series: Dict[str, List[Dict]] = {}
|
||||
for group_name, metrics in api_groups.items():
|
||||
if not metrics:
|
||||
continue
|
||||
api_groups_dict: Dict[str, List[Dict]] = {}
|
||||
for metric in metrics:
|
||||
api = metric.get("api") or group_name
|
||||
if api:
|
||||
if api not in api_groups_dict:
|
||||
api_groups_dict[api] = []
|
||||
api_groups_dict[api].append(metric)
|
||||
|
||||
for api_name, api_metrics in api_groups_dict.items():
|
||||
fields = [m.get("tushareParam") for m in api_metrics if m.get("tushareParam")]
|
||||
if not fields:
|
||||
continue
|
||||
|
||||
date_field = "end_date" if group_name in ("fina_indicator", "income", "balancesheet", "cashflow") else "trade_date"
|
||||
|
||||
params = {"ts_code": ts_code, "limit": 500}
|
||||
fields_list = list(fields)
|
||||
if date_field not in fields_list:
|
||||
fields_list.insert(0, date_field)
|
||||
if api_name in ("fina_indicator", "income", "balancesheet", "cashflow"):
|
||||
for req_field in ["ts_code", "ann_date"]:
|
||||
if req_field not in fields_list:
|
||||
fields_list.insert(0, req_field)
|
||||
fields_str = ",".join(fields_list)
|
||||
|
||||
try:
|
||||
data_rows = await tushare_client.query(api_name=api_name, params=params, fields=fields_str)
|
||||
if data_rows:
|
||||
# Get latest year's data
|
||||
latest_row = data_rows[0] if data_rows else {}
|
||||
for metric in api_metrics:
|
||||
key = metric.get("tushareParam")
|
||||
if key and key in latest_row:
|
||||
if key not in series:
|
||||
series[key] = []
|
||||
series[key].append({
|
||||
"year": latest_row.get(date_field, "")[:4] if latest_row.get(date_field) else str(datetime.now().year),
|
||||
"value": latest_row.get(key)
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
financial_data = {"series": series}
|
||||
except Exception as e:
|
||||
logger.warning(f"[API] Failed to get financial data: {e}")
|
||||
financial_data = None
|
||||
else:
|
||||
company_name = ts_code
|
||||
else:
|
||||
company_name = ts_code
|
||||
except Exception as e:
|
||||
logger.warning(f"[API] Failed to get company name: {e}")
|
||||
company_name = ts_code
|
||||
|
||||
logger.info(f"[API] Generating {analysis_type} for {company_name}")
|
||||
|
||||
# Initialize analysis client with configured model
|
||||
client = AnalysisClient(api_key=api_key, base_url=base_url, model=model)
|
||||
|
||||
# Generate analysis
|
||||
result = await client.generate_analysis(
|
||||
analysis_type=analysis_type,
|
||||
company_name=company_name,
|
||||
ts_code=ts_code,
|
||||
prompt_template=prompt_template,
|
||||
financial_data=financial_data
|
||||
)
|
||||
|
||||
logger.info(f"[API] Analysis generation completed, success={result.get('success')}")
|
||||
|
||||
return AnalysisResponse(
|
||||
ts_code=ts_code,
|
||||
company_name=company_name,
|
||||
analysis_type=analysis_type,
|
||||
content=result.get("content", ""),
|
||||
model=result.get("model", model),
|
||||
tokens=result.get("tokens", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}),
|
||||
elapsed_ms=result.get("elapsed_ms", 0),
|
||||
success=result.get("success", False),
|
||||
error=result.get("error")
|
||||
)
|
||||
@ -5,9 +5,10 @@ from typing import Dict, List, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PeriodDataPoint(BaseModel):
|
||||
period: str
|
||||
class YearDataPoint(BaseModel):
|
||||
year: str
|
||||
value: Optional[float]
|
||||
month: Optional[int] = None # 月份信息,用于确定季度
|
||||
|
||||
|
||||
class StepRecord(BaseModel):
|
||||
@ -32,7 +33,7 @@ class FinancialMeta(BaseModel):
|
||||
class BatchFinancialDataResponse(BaseModel):
|
||||
ts_code: str
|
||||
name: Optional[str] = None
|
||||
series: Dict[str, List[PeriodDataPoint]]
|
||||
series: Dict[str, List[YearDataPoint]]
|
||||
meta: Optional[FinancialMeta] = None
|
||||
|
||||
|
||||
@ -71,29 +72,3 @@ class AnalysisResponse(BaseModel):
|
||||
|
||||
class AnalysisConfigResponse(BaseModel):
|
||||
analysis_modules: Dict[str, Dict]
|
||||
|
||||
|
||||
class TodaySnapshotResponse(BaseModel):
|
||||
ts_code: str
|
||||
trade_date: str
|
||||
name: Optional[str] = None
|
||||
close: Optional[float] = None
|
||||
pe: Optional[float] = None
|
||||
pb: Optional[float] = None
|
||||
dv_ratio: Optional[float] = None
|
||||
total_mv: Optional[float] = None
|
||||
|
||||
|
||||
class RealTimeQuoteResponse(BaseModel):
|
||||
symbol: str
|
||||
market: str
|
||||
ts: str
|
||||
price: float
|
||||
open_price: Optional[float] = None
|
||||
high_price: Optional[float] = None
|
||||
low_price: Optional[float] = None
|
||||
prev_close: Optional[float] = None
|
||||
change: Optional[float] = None
|
||||
change_percent: Optional[float] = None
|
||||
volume: Optional[int] = None
|
||||
source: Optional[str] = None
|
||||
@ -14,8 +14,7 @@ class AnalysisClient:
|
||||
|
||||
def __init__(self, api_key: str, base_url: str, model: str):
|
||||
"""Initialize OpenAI client with API key, base URL, and model"""
|
||||
# Increase client timeout to allow long-running analysis (5 minutes)
|
||||
self.client = openai.AsyncOpenAI(api_key=api_key, base_url=base_url, timeout=300.0)
|
||||
self.client = openai.AsyncOpenAI(api_key=api_key, base_url=base_url)
|
||||
self.model_name = model
|
||||
|
||||
async def generate_analysis(
|
||||
@ -57,7 +56,6 @@ class AnalysisClient:
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
timeout=300.0,
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content if response.choices else ""
|
||||
@ -132,51 +130,6 @@ class AnalysisClient:
|
||||
|
||||
return prompt
|
||||
|
||||
async def generate_analysis_stream(
|
||||
self,
|
||||
analysis_type: str,
|
||||
company_name: str,
|
||||
ts_code: str,
|
||||
prompt_template: str,
|
||||
financial_data: Optional[Dict] = None,
|
||||
context: Optional[Dict] = None
|
||||
):
|
||||
"""Yield analysis content chunks using OpenAI-compatible streaming API.
|
||||
|
||||
Yields plain text chunks as they arrive.
|
||||
"""
|
||||
# Build prompt
|
||||
prompt = self._build_prompt(
|
||||
prompt_template,
|
||||
company_name,
|
||||
ts_code,
|
||||
financial_data,
|
||||
context,
|
||||
)
|
||||
|
||||
try:
|
||||
stream = await self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
stream=True,
|
||||
timeout=300.0,
|
||||
)
|
||||
|
||||
# The SDK yields events with incremental deltas
|
||||
async for event in stream:
|
||||
try:
|
||||
choice = event.choices[0] if getattr(event, "choices", None) else None
|
||||
delta = getattr(choice, "delta", None) if choice is not None else None
|
||||
content = getattr(delta, "content", None) if delta is not None else None
|
||||
if content:
|
||||
yield content
|
||||
except Exception:
|
||||
# Best-effort: ignore malformed chunks
|
||||
continue
|
||||
except Exception as e:
|
||||
# Emit error message to the stream so the client can surface it
|
||||
yield f"\n\n[错误] {type(e).__name__}: {str(e)}\n"
|
||||
|
||||
|
||||
def load_analysis_config() -> Dict:
|
||||
"""Load analysis configuration from JSON file"""
|
||||
304
backend/app/services/config_manager.py
Normal file
304
backend/app/services/config_manager.py
Normal file
@ -0,0 +1,304 @@
|
||||
"""
|
||||
Configuration Management Service
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Any, Dict
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.models.system_config import SystemConfig
|
||||
from app.schemas.config import ConfigResponse, ConfigUpdateRequest, DatabaseConfig, NewApiConfig, DataSourceConfig, ConfigTestResponse
|
||||
|
||||
class ConfigManager:
|
||||
"""Manages system configuration by merging a static JSON file with dynamic settings from the database."""
|
||||
|
||||
def __init__(self, db_session: AsyncSession, config_path: str = None):
|
||||
self.db = db_session
|
||||
if config_path is None:
|
||||
# Default path: backend/app/services -> project_root/config/config.json
|
||||
# __file__ = backend/app/services/config_manager.py
|
||||
# go up three levels to project root
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
self.config_path = os.path.join(project_root, "config", "config.json")
|
||||
else:
|
||||
self.config_path = config_path
|
||||
|
||||
def _load_base_config_from_file(self) -> Dict[str, Any]:
|
||||
"""Loads the base configuration from the JSON file."""
|
||||
if not os.path.exists(self.config_path):
|
||||
return {}
|
||||
try:
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (IOError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
async def _load_dynamic_config_from_db(self) -> Dict[str, Any]:
|
||||
"""Loads dynamic configuration overrides from the database.
|
||||
|
||||
当数据库表尚未创建(如开发环境未运行迁移)时,优雅降级为返回空覆盖配置,避免接口 500。
|
||||
"""
|
||||
try:
|
||||
db_configs: Dict[str, Any] = {}
|
||||
result = await self.db.execute(select(SystemConfig))
|
||||
for record in result.scalars().all():
|
||||
db_configs[record.config_key] = record.config_value
|
||||
return db_configs
|
||||
except Exception:
|
||||
# 表不存在或其他数据库错误时,忽略动态配置覆盖
|
||||
return {}
|
||||
|
||||
def _merge_configs(self, base: Dict[str, Any], overrides: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Deeply merges the override config into the base config."""
|
||||
for key, value in overrides.items():
|
||||
if isinstance(value, dict) and isinstance(base.get(key), dict):
|
||||
base[key] = self._merge_configs(base[key], value)
|
||||
else:
|
||||
base[key] = value
|
||||
return base
|
||||
|
||||
async def get_config(self) -> ConfigResponse:
|
||||
"""Gets the final, merged configuration."""
|
||||
base_config = self._load_base_config_from_file()
|
||||
db_config = await self._load_dynamic_config_from_db()
|
||||
|
||||
merged_config = self._merge_configs(base_config, db_config)
|
||||
|
||||
# 兼容两种位置:优先使用 new_api,其次回退到 llm.new_api
|
||||
new_api_src = merged_config.get("new_api") or merged_config.get("llm", {}).get("new_api", {})
|
||||
|
||||
return ConfigResponse(
|
||||
database=DatabaseConfig(**merged_config.get("database", {})),
|
||||
new_api=NewApiConfig(**(new_api_src or {})),
|
||||
data_sources={
|
||||
k: DataSourceConfig(**v)
|
||||
for k, v in merged_config.get("data_sources", {}).items()
|
||||
}
|
||||
)
|
||||
|
||||
async def update_config(self, config_update: ConfigUpdateRequest) -> ConfigResponse:
|
||||
"""Updates configuration in the database and returns the new merged config."""
|
||||
try:
|
||||
update_dict = config_update.dict(exclude_unset=True)
|
||||
|
||||
# 验证配置数据
|
||||
self._validate_config_data(update_dict)
|
||||
|
||||
for key, value in update_dict.items():
|
||||
existing_config = await self.db.get(SystemConfig, key)
|
||||
if existing_config:
|
||||
# Merge with existing DB value before updating
|
||||
if isinstance(existing_config.config_value, dict) and isinstance(value, dict):
|
||||
merged_value = self._merge_configs(existing_config.config_value, value)
|
||||
existing_config.config_value = merged_value
|
||||
else:
|
||||
existing_config.config_value = value
|
||||
else:
|
||||
new_config = SystemConfig(config_key=key, config_value=value)
|
||||
self.db.add(new_config)
|
||||
|
||||
await self.db.commit()
|
||||
return await self.get_config()
|
||||
except Exception as e:
|
||||
await self.db.rollback()
|
||||
raise e
|
||||
|
||||
def _validate_config_data(self, config_data: Dict[str, Any]) -> None:
|
||||
"""Validate configuration data before saving."""
|
||||
if "database" in config_data:
|
||||
db_config = config_data["database"]
|
||||
if "url" in db_config:
|
||||
url = db_config["url"]
|
||||
if not url.startswith(("postgresql://", "postgresql+asyncpg://")):
|
||||
raise ValueError("数据库URL必须以 postgresql:// 或 postgresql+asyncpg:// 开头")
|
||||
|
||||
if "new_api" in config_data:
|
||||
new_api_config = config_data["new_api"]
|
||||
if "api_key" in new_api_config and len(new_api_config["api_key"]) < 10:
|
||||
raise ValueError("New API Key长度不能少于10个字符")
|
||||
if "base_url" in new_api_config and new_api_config["base_url"]:
|
||||
base_url = new_api_config["base_url"]
|
||||
if not base_url.startswith(("http://", "https://")):
|
||||
raise ValueError("New API Base URL必须以 http:// 或 https:// 开头")
|
||||
|
||||
if "data_sources" in config_data:
|
||||
for source_name, source_config in config_data["data_sources"].items():
|
||||
if "api_key" in source_config and len(source_config["api_key"]) < 10:
|
||||
raise ValueError(f"{source_name} API Key长度不能少于10个字符")
|
||||
|
||||
async def test_config(self, config_type: str, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
"""Test a specific configuration."""
|
||||
try:
|
||||
if config_type == "database":
|
||||
return await self._test_database(config_data)
|
||||
elif config_type == "new_api":
|
||||
return await self._test_new_api(config_data)
|
||||
elif config_type == "tushare":
|
||||
return await self._test_tushare(config_data)
|
||||
elif config_type == "finnhub":
|
||||
return await self._test_finnhub(config_data)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"不支持的配置类型: {config_type}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"测试失败: {str(e)}"
|
||||
)
|
||||
|
||||
async def _test_database(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
"""Test database connection."""
|
||||
db_url = config_data.get("url")
|
||||
if not db_url:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message="数据库URL不能为空"
|
||||
)
|
||||
|
||||
try:
|
||||
# 解析数据库URL
|
||||
if db_url.startswith("postgresql+asyncpg://"):
|
||||
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
|
||||
|
||||
# 测试连接
|
||||
conn = await asyncpg.connect(db_url)
|
||||
await conn.close()
|
||||
|
||||
return ConfigTestResponse(
|
||||
success=True,
|
||||
message="数据库连接成功"
|
||||
)
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"数据库连接失败: {str(e)}"
|
||||
)
|
||||
|
||||
async def _test_new_api(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
"""Test New API (OpenAI-compatible) connection."""
|
||||
api_key = config_data.get("api_key")
|
||||
base_url = config_data.get("base_url")
|
||||
|
||||
if not api_key or not base_url:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message="New API Key和Base URL均不能为空"
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
# Test API availability by listing models
|
||||
response = await client.get(
|
||||
f"{base_url.rstrip('/')}/models",
|
||||
headers={"Authorization": f"Bearer {api_key}"}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return ConfigTestResponse(
|
||||
success=True,
|
||||
message="New API连接成功"
|
||||
)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"New API测试失败: HTTP {response.status_code} - {response.text}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"New API连接失败: {str(e)}"
|
||||
)
|
||||
|
||||
async def _test_tushare(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
"""Test Tushare API connection."""
|
||||
api_key = config_data.get("api_key")
|
||||
|
||||
if not api_key:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message="Tushare API Key不能为空"
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
# 测试API可用性
|
||||
response = await client.post(
|
||||
"http://api.tushare.pro",
|
||||
json={
|
||||
"api_name": "stock_basic",
|
||||
"token": api_key,
|
||||
"params": {"list_status": "L"},
|
||||
"fields": "ts_code"
|
||||
}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get("code") == 0:
|
||||
return ConfigTestResponse(
|
||||
success=True,
|
||||
message="Tushare API连接成功"
|
||||
)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"Tushare API错误: {data.get('msg', '未知错误')}"
|
||||
)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"Tushare API测试失败: HTTP {response.status_code}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"Tushare API连接失败: {str(e)}"
|
||||
)
|
||||
|
||||
async def _test_finnhub(self, config_data: Dict[str, Any]) -> ConfigTestResponse:
|
||||
"""Test Finnhub API connection."""
|
||||
api_key = config_data.get("api_key")
|
||||
|
||||
if not api_key:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message="Finnhub API Key不能为空"
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
# 测试API可用性
|
||||
response = await client.get(
|
||||
f"https://finnhub.io/api/v1/quote",
|
||||
params={"symbol": "AAPL", "token": api_key}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if "c" in data: # 检查是否有价格数据
|
||||
return ConfigTestResponse(
|
||||
success=True,
|
||||
message="Finnhub API连接成功"
|
||||
)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message="Finnhub API响应格式错误"
|
||||
)
|
||||
else:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"Finnhub API测试失败: HTTP {response.status_code}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ConfigTestResponse(
|
||||
success=False,
|
||||
message=f"Finnhub API连接失败: {str(e)}"
|
||||
)
|
||||
52
backend/app/services/tushare_client.py
Normal file
52
backend/app/services/tushare_client.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
Minimal async client for Tushare Pro API
|
||||
"""
|
||||
from typing import Any, Dict, List, Optional
|
||||
import httpx
|
||||
|
||||
TUSHARE_PRO_URL = "https://api.tushare.pro"
|
||||
|
||||
|
||||
class TushareClient:
|
||||
def __init__(self, token: str):
|
||||
self.token = token
|
||||
self._client = httpx.AsyncClient(timeout=30)
|
||||
|
||||
async def query(
|
||||
self,
|
||||
api_name: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
fields: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
payload = {
|
||||
"api_name": api_name,
|
||||
"token": self.token,
|
||||
"params": params or {},
|
||||
}
|
||||
# default larger page size if not provided
|
||||
if "limit" not in payload["params"]:
|
||||
payload["params"]["limit"] = 5000
|
||||
if fields:
|
||||
payload["fields"] = fields
|
||||
resp = await self._client.post(TUSHARE_PRO_URL, json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if data.get("code") != 0:
|
||||
err = data.get("msg") or "Tushare error"
|
||||
raise RuntimeError(f"{api_name}: {err}")
|
||||
fields_def = data.get("data", {}).get("fields", [])
|
||||
items = data.get("data", {}).get("items", [])
|
||||
rows: List[Dict[str, Any]] = []
|
||||
for it in items:
|
||||
row = {fields_def[i]: it[i] for i in range(len(fields_def))}
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
async def aclose(self):
|
||||
await self._client.aclose()
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
await self.aclose()
|
||||
@ -7,11 +7,3 @@ aiosqlite==0.20.0
|
||||
alembic==1.13.3
|
||||
openai==1.37.0
|
||||
asyncpg
|
||||
greenlet>=3.1.0
|
||||
|
||||
# Data Providers
|
||||
tushare==1.4.1
|
||||
yfinance==0.2.37
|
||||
finnhub-python==2.4.20
|
||||
pandas==2.2.2
|
||||
PyYAML==6.0.1
|
||||
File diff suppressed because one or more lines are too long
@ -1,37 +0,0 @@
|
||||
# Configuration for data sources used by the DataManager
|
||||
|
||||
# Defines the available data sources and their specific configurations.
|
||||
# 'api_key_env' specifies the environment variable that should hold the API key/token.
|
||||
data_sources:
|
||||
tushare:
|
||||
api_key_env: TUSHARE_TOKEN
|
||||
description: "Primary data source for China market (A-shares)."
|
||||
yfinance:
|
||||
api_key_env: null # No API key required
|
||||
description: "Good for global market data, especially US stocks."
|
||||
finnhub:
|
||||
api_key_env: FINNHUB_API_KEY
|
||||
description: "Another comprehensive source for global stock data."
|
||||
|
||||
# Defines the priority of data providers for each market.
|
||||
# The DataManager will try them in order until data is successfully fetched.
|
||||
markets:
|
||||
CN: # China Market
|
||||
priority:
|
||||
- tushare
|
||||
- yfinance # yfinance can be a fallback
|
||||
US: # US Market
|
||||
priority:
|
||||
- finnhub
|
||||
- yfinance
|
||||
HK: # Hong Kong Market
|
||||
priority:
|
||||
- yfinance
|
||||
- finnhub
|
||||
JP: # Japan Market
|
||||
priority:
|
||||
- yfinance
|
||||
DEFAULT:
|
||||
priority:
|
||||
- yfinance
|
||||
- finnhub
|
||||
@ -1,23 +0,0 @@
|
||||
[package]
|
||||
name = "workflow-context"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "workflow_context"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
git2 = { version = "0.18", features = ["vendored-openssl"] }
|
||||
sha2 = "0.10"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1.0"
|
||||
hex = "0.4"
|
||||
walkdir = "2.3"
|
||||
regex = "1.10"
|
||||
globset = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.8"
|
||||
@ -1,320 +0,0 @@
|
||||
use anyhow::{Result, anyhow, Context};
|
||||
use std::io::Read;
|
||||
use std::sync::Arc;
|
||||
use regex::Regex;
|
||||
use crate::types::{DocNode, DocNodeKind, EntryKind};
|
||||
use crate::traits::{ContextStore, Transaction};
|
||||
|
||||
pub trait DocManager {
|
||||
/// Reload state based on the latest Commit
|
||||
fn reload(&mut self, commit_hash: &str) -> Result<()>;
|
||||
|
||||
/// Get the current document tree outline
|
||||
fn get_outline(&self) -> Result<DocNode>;
|
||||
|
||||
/// Read node content
|
||||
fn read_content(&self, path: &str) -> Result<String>;
|
||||
|
||||
/// Write content (Upsert)
|
||||
fn write_content(&mut self, path: &str, content: &str) -> Result<()>;
|
||||
|
||||
/// Insert subsection (Implies Promotion)
|
||||
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()>;
|
||||
|
||||
/// Demote Composite to Leaf (Aggregation)
|
||||
fn demote(&mut self, path: &str) -> Result<()>;
|
||||
|
||||
/// Commit changes
|
||||
fn save(&mut self, message: &str) -> Result<String>;
|
||||
}
|
||||
|
||||
pub struct DocOS<S: ContextStore> {
|
||||
store: Arc<S>,
|
||||
req_id: String,
|
||||
commit_hash: String,
|
||||
transaction: Option<Box<dyn Transaction>>,
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocOS<S> {
|
||||
pub fn new(store: Arc<S>, req_id: &str, commit_hash: &str) -> Self {
|
||||
Self {
|
||||
store,
|
||||
req_id: req_id.to_string(),
|
||||
commit_hash: commit_hash.to_string(),
|
||||
transaction: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_transaction(&mut self) -> Result<&mut Box<dyn Transaction>> {
|
||||
if self.transaction.is_none() {
|
||||
let tx = self.store.begin_transaction(&self.req_id, &self.commit_hash)?;
|
||||
self.transaction = Some(tx);
|
||||
}
|
||||
Ok(self.transaction.as_mut().unwrap())
|
||||
}
|
||||
|
||||
fn is_leaf(&self, path: &str) -> Result<bool> {
|
||||
match self.store.read_file(&self.req_id, &self.commit_hash, path) {
|
||||
Ok(_) => Ok(true),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_composite(&self, path: &str) -> Result<bool> {
|
||||
match self.store.list_dir(&self.req_id, &self.commit_hash, path) {
|
||||
Ok(_) => Ok(true),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse Markdown headers to find subsections
|
||||
fn parse_markdown_headers(&self, content: &str) -> Vec<DocNode> {
|
||||
let re = Regex::new(r"(?m)^(#{1,6})\s+(.+)").unwrap();
|
||||
let mut sections = Vec::new();
|
||||
|
||||
for cap in re.captures_iter(content) {
|
||||
let _level = cap[1].len();
|
||||
let name = cap[2].trim().to_string();
|
||||
|
||||
// Simplified logic: All headers are children of the file node
|
||||
// In a real rich outline, we would build a tree based on level.
|
||||
// For this MVP, we treat found sections as direct children in the outline view.
|
||||
sections.push(DocNode {
|
||||
name: name.clone(),
|
||||
path: "".to_string(), // Virtual path, no direct file address
|
||||
kind: DocNodeKind::Section,
|
||||
children: vec![],
|
||||
});
|
||||
}
|
||||
sections
|
||||
}
|
||||
|
||||
fn build_node(&self, name: String, path: String, kind: DocNodeKind) -> Result<DocNode> {
|
||||
let mut node = DocNode {
|
||||
name,
|
||||
path: path.clone(),
|
||||
kind: kind.clone(),
|
||||
children: vec![],
|
||||
};
|
||||
|
||||
match kind {
|
||||
DocNodeKind::Composite => {
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, &path)?;
|
||||
|
||||
// 1. Process index.md first if exists (content of this composite node)
|
||||
let mut index_content = String::new();
|
||||
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &format!("{}/index.md", path)) {
|
||||
reader.read_to_string(&mut index_content).unwrap_or_default();
|
||||
let sections = self.parse_markdown_headers(&index_content);
|
||||
node.children.extend(sections);
|
||||
}
|
||||
|
||||
// 2. Process children files/dirs
|
||||
let mut children_nodes = Vec::new();
|
||||
for entry in entries {
|
||||
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let child_path = if path == "/" {
|
||||
entry.name.clone()
|
||||
} else {
|
||||
format!("{}/{}", path, entry.name)
|
||||
};
|
||||
|
||||
let child_kind = match entry.kind {
|
||||
EntryKind::Dir => DocNodeKind::Composite,
|
||||
EntryKind::File => DocNodeKind::Leaf,
|
||||
};
|
||||
|
||||
let child_node = self.build_node(entry.name, child_path, child_kind)?;
|
||||
children_nodes.push(child_node);
|
||||
}
|
||||
// Sort children by name (simple default)
|
||||
children_nodes.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
node.children.extend(children_nodes);
|
||||
}
|
||||
DocNodeKind::Leaf => {
|
||||
// Parse content for sections
|
||||
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &path) {
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content).unwrap_or_default();
|
||||
let sections = self.parse_markdown_headers(&content);
|
||||
node.children.extend(sections);
|
||||
}
|
||||
}
|
||||
DocNodeKind::Section => {
|
||||
// Sections don't have children in this simplified view
|
||||
}
|
||||
}
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocManager for DocOS<S> {
|
||||
fn reload(&mut self, commit_hash: &str) -> Result<()> {
|
||||
self.commit_hash = commit_hash.to_string();
|
||||
self.transaction = None;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_outline(&self) -> Result<DocNode> {
|
||||
self.build_node("Root".to_string(), "/".to_string(), DocNodeKind::Composite)
|
||||
}
|
||||
|
||||
fn read_content(&self, path: &str) -> Result<String> {
|
||||
let target_path = if path == "/" {
|
||||
"index.md".to_string()
|
||||
} else if self.is_composite(path)? {
|
||||
format!("{}/index.md", path)
|
||||
} else {
|
||||
path.to_string()
|
||||
};
|
||||
|
||||
let mut reader = self.store.read_file(&self.req_id, &self.commit_hash, &target_path)
|
||||
.context("Failed to read content")?;
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content)?;
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
fn write_content(&mut self, path: &str, content: &str) -> Result<()> {
|
||||
let is_comp = self.is_composite(path)?;
|
||||
let target_path = if is_comp {
|
||||
format!("{}/index.md", path)
|
||||
} else {
|
||||
path.to_string()
|
||||
};
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.write(&target_path, content.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()> {
|
||||
let is_leaf = self.is_leaf(parent_path)?;
|
||||
let is_composite = self.is_composite(parent_path)?;
|
||||
|
||||
if !is_leaf && !is_composite && parent_path != "/" {
|
||||
return Err(anyhow!("Parent path '{}' does not exist", parent_path));
|
||||
}
|
||||
|
||||
if is_leaf {
|
||||
// Promote: Leaf -> Composite
|
||||
let old_content = self.read_content(parent_path)?;
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.remove(parent_path)?;
|
||||
|
||||
let index_path = format!("{}/index.md", parent_path);
|
||||
tx.write(&index_path, old_content.as_bytes())?;
|
||||
|
||||
let child_path = format!("{}/{}", parent_path, name);
|
||||
tx.write(&child_path, content.as_bytes())?;
|
||||
|
||||
} else {
|
||||
let child_path = if parent_path == "/" {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}/{}", parent_path, name)
|
||||
};
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.write(&child_path, content.as_bytes())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn demote(&mut self, path: &str) -> Result<()> {
|
||||
if !self.is_composite(path)? {
|
||||
return Err(anyhow!("Path '{}' is not a composite node (directory)", path));
|
||||
}
|
||||
if path == "/" {
|
||||
return Err(anyhow!("Cannot demote root"));
|
||||
}
|
||||
|
||||
// 1. Read index.md (Main content)
|
||||
let mut main_content = String::new();
|
||||
if let Ok(content) = self.read_content(path) {
|
||||
main_content = content;
|
||||
}
|
||||
|
||||
// Reading directory entries
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path)?;
|
||||
|
||||
// Sort entries to have deterministic order
|
||||
let mut sorted_entries = entries;
|
||||
sorted_entries.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
let mut combined_content = main_content;
|
||||
|
||||
// Iterate for content reading (Borrowing self immutably)
|
||||
for entry in &sorted_entries {
|
||||
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let child_rel_path = format!("{}/{}", path, entry.name);
|
||||
let child_content = self.read_content(&child_rel_path)?;
|
||||
combined_content.push_str(&format!("\n\n# {}\n\n", entry.name));
|
||||
combined_content.push_str(&child_content);
|
||||
}
|
||||
|
||||
// Get list of items to remove before starting transaction (to avoid double borrow)
|
||||
// We need a recursive list of paths to remove from git index.
|
||||
let paths_to_remove = self.collect_recursive_paths(path)?;
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
|
||||
// 3. Remove everything recursively
|
||||
for p in paths_to_remove {
|
||||
tx.remove(&p)?;
|
||||
}
|
||||
// Also remove the directory path itself (conceptually, or handled by git index cleanup)
|
||||
// In our simplified VGCS, remove(dir) is not enough if not empty.
|
||||
// But we just cleaned up recursively.
|
||||
|
||||
// 4. Write new file
|
||||
tx.write(path, combined_content.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn save(&mut self, message: &str) -> Result<String> {
|
||||
if let Some(tx) = self.transaction.take() {
|
||||
let new_oid = tx.commit(message, "DocOS User")?;
|
||||
self.commit_hash = new_oid.clone();
|
||||
Ok(new_oid)
|
||||
} else {
|
||||
Ok(self.commit_hash.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocOS<S> {
|
||||
// Helper: Collect paths recursively (reading from store, immutable self)
|
||||
fn collect_recursive_paths(&self, path: &str) -> Result<Vec<String>> {
|
||||
let mut paths = Vec::new();
|
||||
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path);
|
||||
if let Ok(entries) = entries {
|
||||
for entry in entries {
|
||||
let child_path = format!("{}/{}", path, entry.name);
|
||||
match entry.kind {
|
||||
EntryKind::File => {
|
||||
paths.push(child_path);
|
||||
},
|
||||
EntryKind::Dir => {
|
||||
// Add children of dir first
|
||||
let mut sub_paths = self.collect_recursive_paths(&child_path)?;
|
||||
paths.append(&mut sub_paths);
|
||||
// No need to remove dir itself in git, but we might track it?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(paths)
|
||||
}
|
||||
}
|
||||
@ -1,11 +0,0 @@
|
||||
pub mod types;
|
||||
pub mod traits;
|
||||
pub mod vgcs;
|
||||
pub mod docos;
|
||||
pub mod worker_runtime;
|
||||
|
||||
pub use types::*;
|
||||
pub use traits::*;
|
||||
pub use vgcs::Vgcs;
|
||||
pub use docos::{DocOS, DocManager};
|
||||
pub use worker_runtime::{WorkerContext, ContextShell, OutputFormat, FindOptions, NodeMetadata, GrepMatch, FileStats};
|
||||
@ -1,39 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use std::io::Read;
|
||||
use crate::types::{DirEntry, FileChange};
|
||||
|
||||
pub trait ContextStore {
|
||||
/// Initialize a new repository for the request
|
||||
fn init_repo(&self, req_id: &str) -> Result<()>;
|
||||
|
||||
/// Read file content. Transparently handles BlobRef redirection.
|
||||
fn read_file(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Box<dyn Read + Send>>;
|
||||
|
||||
/// List directory contents
|
||||
fn list_dir(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Vec<DirEntry>>;
|
||||
|
||||
/// Get changes between two commits
|
||||
fn diff(&self, req_id: &str, from_commit: &str, to_commit: &str) -> Result<Vec<FileChange>>;
|
||||
|
||||
/// Three-way merge (In-Memory), returns new Tree OID
|
||||
fn merge_trees(&self, req_id: &str, base: &str, ours: &str, theirs: &str) -> Result<String>;
|
||||
|
||||
/// Smart merge two commits, automatically finding the best common ancestor.
|
||||
/// Returns the OID of the new merge commit.
|
||||
fn merge_commits(&self, req_id: &str, our_commit: &str, their_commit: &str) -> Result<String>;
|
||||
|
||||
/// Start a write transaction
|
||||
fn begin_transaction(&self, req_id: &str, base_commit: &str) -> Result<Box<dyn Transaction>>;
|
||||
}
|
||||
|
||||
pub trait Transaction {
|
||||
/// Write file content
|
||||
fn write(&mut self, path: &str, content: &[u8]) -> Result<()>;
|
||||
|
||||
/// Remove file
|
||||
fn remove(&mut self, path: &str) -> Result<()>;
|
||||
|
||||
/// Commit changes
|
||||
fn commit(self: Box<Self>, message: &str, author: &str) -> Result<String>;
|
||||
}
|
||||
|
||||
@ -1,50 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum EntryKind {
|
||||
File,
|
||||
Dir,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DirEntry {
|
||||
pub name: String,
|
||||
pub kind: EntryKind,
|
||||
pub object_id: String,
|
||||
// New metadata fields
|
||||
pub size: Option<u64>,
|
||||
pub line_count: Option<usize>,
|
||||
pub word_count: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum FileChange {
|
||||
Added(String),
|
||||
Modified(String),
|
||||
Deleted(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BlobRef {
|
||||
#[serde(rename = "$vgcs_ref")]
|
||||
pub vgcs_ref: String, // "v1"
|
||||
pub sha256: String,
|
||||
pub size: u64,
|
||||
pub mime_type: String,
|
||||
pub original_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub enum DocNodeKind {
|
||||
Leaf, // Pure content node (file)
|
||||
Composite, // Composite node (dir with index.md)
|
||||
Section, // Virtual node (Markdown Header inside a file)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct DocNode {
|
||||
pub name: String,
|
||||
pub path: String, // Logical path e.g., "Analysis/Revenue"
|
||||
pub kind: DocNodeKind,
|
||||
pub children: Vec<DocNode>, // Only for Composite or Section-bearing Leaf
|
||||
}
|
||||
@ -1,361 +0,0 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::fs::{self, File};
|
||||
use std::io::{Cursor, Read, Write};
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use git2::{Repository, Oid, ObjectType, Signature, Index, IndexEntry, IndexTime};
|
||||
use sha2::{Sha256, Digest};
|
||||
|
||||
use crate::traits::{ContextStore, Transaction};
|
||||
use crate::types::{DirEntry, EntryKind, FileChange, BlobRef};
|
||||
|
||||
pub struct Vgcs {
|
||||
root_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Vgcs {
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Self {
|
||||
Self {
|
||||
root_path: path.as_ref().to_path_buf(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_repo_path(&self, req_id: &str) -> PathBuf {
|
||||
self.root_path.join("repos").join(format!("{}.git", req_id))
|
||||
}
|
||||
|
||||
fn get_blob_store_root(&self, req_id: &str) -> PathBuf {
|
||||
self.root_path.join("blobs").join(req_id)
|
||||
}
|
||||
|
||||
fn get_blob_path(&self, req_id: &str, sha256: &str) -> PathBuf {
|
||||
self.get_blob_store_root(req_id)
|
||||
.join(&sha256[0..2])
|
||||
.join(sha256)
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextStore for Vgcs {
|
||||
fn init_repo(&self, req_id: &str) -> Result<()> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
if !repo_path.exists() {
|
||||
fs::create_dir_all(&repo_path).context("Failed to create repo dir")?;
|
||||
Repository::init_bare(&repo_path).context("Failed to init bare repo")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_file(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Box<dyn Read + Send>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let oid = Oid::from_str(commit_hash).context("Invalid commit hash")?;
|
||||
let commit = repo.find_commit(oid).context("Commit not found")?;
|
||||
let tree = commit.tree().context("Tree not found")?;
|
||||
|
||||
let entry = tree.get_path(Path::new(path)).context("File not found in tree")?;
|
||||
let object = entry.to_object(&repo).context("Object not found")?;
|
||||
|
||||
if let Some(blob) = object.as_blob() {
|
||||
let content = blob.content();
|
||||
// Try parsing as BlobRef
|
||||
if let Ok(blob_ref) = serde_json::from_slice::<BlobRef>(content) {
|
||||
if blob_ref.vgcs_ref == "v1" {
|
||||
let blob_path = self.get_blob_path(req_id, &blob_ref.sha256);
|
||||
let file = File::open(blob_path).context("Failed to open blob file from store")?;
|
||||
return Ok(Box::new(file));
|
||||
}
|
||||
}
|
||||
// Return raw content
|
||||
return Ok(Box::new(Cursor::new(content.to_vec())));
|
||||
}
|
||||
|
||||
Err(anyhow!("Path is not a file"))
|
||||
}
|
||||
|
||||
fn list_dir(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Vec<DirEntry>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let oid = Oid::from_str(commit_hash).context("Invalid commit hash")?;
|
||||
let commit = repo.find_commit(oid).context("Commit not found")?;
|
||||
let root_tree = commit.tree().context("Tree not found")?;
|
||||
|
||||
let tree = if path.is_empty() || path == "/" || path == "." {
|
||||
root_tree
|
||||
} else {
|
||||
let entry = root_tree.get_path(Path::new(path)).context("Path not found")?;
|
||||
let object = entry.to_object(&repo).context("Object not found")?;
|
||||
object.into_tree().map_err(|_| anyhow!("Path is not a directory"))?
|
||||
};
|
||||
|
||||
let mut entries = Vec::new();
|
||||
for entry in tree.iter() {
|
||||
let name = entry.name().unwrap_or("").to_string();
|
||||
let kind = match entry.kind() {
|
||||
Some(ObjectType::Tree) => EntryKind::Dir,
|
||||
_ => EntryKind::File,
|
||||
};
|
||||
let object_id = entry.id().to_string();
|
||||
|
||||
// Metadata extraction (Expensive but necessary for the prompt)
|
||||
let mut size = None;
|
||||
let mut line_count = None;
|
||||
let mut word_count = None;
|
||||
|
||||
if kind == EntryKind::File {
|
||||
if let Ok(object) = entry.to_object(&repo) {
|
||||
if let Some(blob) = object.as_blob() {
|
||||
let content = blob.content();
|
||||
size = Some(content.len() as u64);
|
||||
|
||||
// Check for binary content or just use heuristic
|
||||
if !content.contains(&0) {
|
||||
let s = String::from_utf8_lossy(content);
|
||||
line_count = Some(s.lines().count());
|
||||
word_count = Some(s.split_whitespace().count());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entries.push(DirEntry { name, kind, object_id, size, line_count, word_count });
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
fn diff(&self, req_id: &str, from_commit: &str, to_commit: &str) -> Result<Vec<FileChange>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let from_oid = Oid::from_str(from_commit).context("Invalid from_commit")?;
|
||||
let to_oid = Oid::from_str(to_commit).context("Invalid to_commit")?;
|
||||
|
||||
let from_tree = repo.find_commit(from_oid)?.tree()?;
|
||||
let to_tree = repo.find_commit(to_oid)?.tree()?;
|
||||
|
||||
let diff = repo.diff_tree_to_tree(Some(&from_tree), Some(&to_tree), None)?;
|
||||
|
||||
let mut changes = Vec::new();
|
||||
diff.foreach(&mut |delta, _| {
|
||||
let path = delta.new_file().path().or(delta.old_file().path()).unwrap();
|
||||
let path_str = path.to_string_lossy().to_string();
|
||||
|
||||
match delta.status() {
|
||||
git2::Delta::Added => changes.push(FileChange::Added(path_str)),
|
||||
git2::Delta::Deleted => changes.push(FileChange::Deleted(path_str)),
|
||||
git2::Delta::Modified => changes.push(FileChange::Modified(path_str)),
|
||||
_ => {}
|
||||
}
|
||||
true
|
||||
}, None, None, None)?;
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn merge_trees(&self, req_id: &str, base: &str, ours: &str, theirs: &str) -> Result<String> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let base_tree = repo.find_commit(Oid::from_str(base)?)?.tree()?;
|
||||
let our_tree = repo.find_commit(Oid::from_str(ours)?)?.tree()?;
|
||||
let their_tree = repo.find_commit(Oid::from_str(theirs)?)?.tree()?;
|
||||
|
||||
let mut index = repo.merge_trees(&base_tree, &our_tree, &their_tree, None)?;
|
||||
|
||||
if index.has_conflicts() {
|
||||
return Err(anyhow!("Merge conflict detected"));
|
||||
}
|
||||
|
||||
let oid = index.write_tree_to(&repo)?;
|
||||
Ok(oid.to_string())
|
||||
}
|
||||
|
||||
fn merge_commits(&self, req_id: &str, our_commit: &str, their_commit: &str) -> Result<String> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let our_oid = Oid::from_str(our_commit).context("Invalid our_commit")?;
|
||||
let their_oid = Oid::from_str(their_commit).context("Invalid their_commit")?;
|
||||
|
||||
let base_oid = repo.merge_base(our_oid, their_oid).context("Failed to find merge base")?;
|
||||
|
||||
let base_commit = repo.find_commit(base_oid)?;
|
||||
let our_commit_obj = repo.find_commit(our_oid)?;
|
||||
let their_commit_obj = repo.find_commit(their_oid)?;
|
||||
|
||||
// If base equals one of the commits, it's a fast-forward
|
||||
if base_oid == our_oid {
|
||||
return Ok(their_commit.to_string());
|
||||
}
|
||||
if base_oid == their_oid {
|
||||
return Ok(our_commit.to_string());
|
||||
}
|
||||
|
||||
let base_tree = base_commit.tree()?;
|
||||
let our_tree = our_commit_obj.tree()?;
|
||||
let their_tree = their_commit_obj.tree()?;
|
||||
|
||||
let mut index = repo.merge_trees(&base_tree, &our_tree, &their_tree, None)?;
|
||||
|
||||
if index.has_conflicts() {
|
||||
return Err(anyhow!("Merge conflict detected between {} and {}", our_commit, their_commit));
|
||||
}
|
||||
|
||||
let tree_oid = index.write_tree_to(&repo)?;
|
||||
let tree = repo.find_tree(tree_oid)?;
|
||||
|
||||
let sig = Signature::now("vgcs-merge", "system")?;
|
||||
|
||||
let merge_commit_oid = repo.commit(
|
||||
None, // Detached
|
||||
&sig,
|
||||
&sig,
|
||||
&format!("Merge commit {} into {}", their_commit, our_commit),
|
||||
&tree,
|
||||
&[&our_commit_obj, &their_commit_obj],
|
||||
)?;
|
||||
|
||||
Ok(merge_commit_oid.to_string())
|
||||
}
|
||||
|
||||
fn begin_transaction(&self, req_id: &str, base_commit: &str) -> Result<Box<dyn Transaction>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let mut index = Index::new()?;
|
||||
let mut base_commit_oid = None;
|
||||
|
||||
if !base_commit.is_empty() {
|
||||
let base_oid = Oid::from_str(base_commit).context("Invalid base_commit")?;
|
||||
if !base_oid.is_zero() {
|
||||
// Scope the borrow of repo
|
||||
{
|
||||
let commit = repo.find_commit(base_oid).context("Base commit not found")?;
|
||||
let tree = commit.tree()?;
|
||||
index.read_tree(&tree)?;
|
||||
}
|
||||
base_commit_oid = Some(base_oid);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Box::new(VgcsTransaction {
|
||||
repo,
|
||||
req_id: req_id.to_string(),
|
||||
root_path: self.root_path.clone(),
|
||||
base_commit: base_commit_oid,
|
||||
index,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VgcsTransaction {
|
||||
repo: Repository,
|
||||
req_id: String,
|
||||
root_path: PathBuf,
|
||||
base_commit: Option<Oid>,
|
||||
index: Index,
|
||||
}
|
||||
|
||||
impl Transaction for VgcsTransaction {
|
||||
fn write(&mut self, path: &str, content: &[u8]) -> Result<()> {
|
||||
let final_content = if content.len() > 1024 * 1024 { // 1MB
|
||||
// Calculate SHA256
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(content);
|
||||
let result = hasher.finalize();
|
||||
let sha256 = hex::encode(result);
|
||||
|
||||
// Write to Blob Store
|
||||
let blob_path = self.root_path
|
||||
.join("blobs")
|
||||
.join(&self.req_id)
|
||||
.join(&sha256[0..2])
|
||||
.join(&sha256);
|
||||
|
||||
if !blob_path.exists() {
|
||||
if let Some(parent) = blob_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
let mut file = File::create(&blob_path)?;
|
||||
file.write_all(content)?;
|
||||
}
|
||||
|
||||
// Create BlobRef JSON
|
||||
let blob_ref = BlobRef {
|
||||
vgcs_ref: "v1".to_string(),
|
||||
sha256: sha256,
|
||||
size: content.len() as u64,
|
||||
mime_type: "application/octet-stream".to_string(), // Simplified
|
||||
original_name: Path::new(path).file_name().unwrap_or_default().to_string_lossy().to_string(),
|
||||
};
|
||||
|
||||
serde_json::to_vec(&blob_ref)?
|
||||
} else {
|
||||
content.to_vec()
|
||||
};
|
||||
|
||||
// Write to ODB manually
|
||||
let oid = self.repo.blob(&final_content)?;
|
||||
|
||||
let mut entry = create_index_entry(path, 0o100644);
|
||||
entry.id = oid;
|
||||
entry.file_size = final_content.len() as u32;
|
||||
|
||||
self.index.add(&entry).context("Failed to add entry to index")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove(&mut self, path: &str) -> Result<()> {
|
||||
self.index.remove_path(Path::new(path))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn commit(mut self: Box<Self>, message: &str, author: &str) -> Result<String> {
|
||||
let tree_oid = self.index.write_tree_to(&self.repo)?;
|
||||
let tree = self.repo.find_tree(tree_oid)?;
|
||||
|
||||
let sig = Signature::now(author, "vgcs@system")?;
|
||||
|
||||
let commit_oid = if let Some(base_oid) = self.base_commit {
|
||||
let parent_commit = self.repo.find_commit(base_oid)?;
|
||||
self.repo.commit(
|
||||
None, // Detached commit
|
||||
&sig,
|
||||
&sig,
|
||||
message,
|
||||
&tree,
|
||||
&[&parent_commit],
|
||||
)?
|
||||
} else {
|
||||
self.repo.commit(
|
||||
None, // Detached commit
|
||||
&sig,
|
||||
&sig,
|
||||
message,
|
||||
&tree,
|
||||
&[],
|
||||
)?
|
||||
};
|
||||
|
||||
Ok(commit_oid.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn create_index_entry(path: &str, mode: u32) -> IndexEntry {
|
||||
IndexEntry {
|
||||
ctime: IndexTime::new(0, 0),
|
||||
mtime: IndexTime::new(0, 0),
|
||||
dev: 0,
|
||||
ino: 0,
|
||||
mode,
|
||||
uid: 0,
|
||||
gid: 0,
|
||||
file_size: 0,
|
||||
id: Oid::zero(),
|
||||
flags: 0,
|
||||
flags_extended: 0,
|
||||
path: path.as_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
@ -1,378 +0,0 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::env;
|
||||
use anyhow::{Result, Context, anyhow};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::de::DeserializeOwned;
|
||||
use globset::Glob;
|
||||
use regex::Regex;
|
||||
use crate::{DocOS, DocManager, Vgcs, DocNodeKind};
|
||||
|
||||
// --- Data Structures ---
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum OutputFormat {
|
||||
Text,
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct NodeMetadata {
|
||||
pub path: String,
|
||||
pub kind: String, // "File" or "Dir"
|
||||
pub size: u64,
|
||||
// pub modified: bool, // TODO: Implement diff check against base
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct FindOptions {
|
||||
pub recursive: bool,
|
||||
pub max_depth: Option<usize>,
|
||||
pub type_filter: Option<String>, // "File" or "Dir"
|
||||
pub min_size: Option<u64>,
|
||||
pub max_size: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GrepMatch {
|
||||
pub path: String,
|
||||
pub line_number: usize,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FileStats {
|
||||
pub path: String,
|
||||
pub lines: usize,
|
||||
pub bytes: usize,
|
||||
}
|
||||
|
||||
// --- Trait Definition ---
|
||||
|
||||
pub trait ContextShell {
|
||||
fn tree(&self, path: &str, depth: Option<usize>, format: OutputFormat) -> Result<String>;
|
||||
fn find(&self, name_pattern: &str, options: FindOptions) -> Result<Vec<NodeMetadata>>;
|
||||
fn grep(&self, pattern: &str, paths: Option<Vec<String>>) -> Result<Vec<GrepMatch>>;
|
||||
fn cat(&self, paths: &[String]) -> Result<String>;
|
||||
fn wc(&self, paths: &[String]) -> Result<Vec<FileStats>>;
|
||||
fn patch(&mut self, path: &str, original: &str, replacement: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
// --- WorkerContext Implementation ---
|
||||
|
||||
pub struct WorkerContext {
|
||||
doc: DocOS<Vgcs>,
|
||||
}
|
||||
|
||||
impl WorkerContext {
|
||||
pub fn from_env() -> Result<Self> {
|
||||
let req_id = env::var("WORKFLOW_REQ_ID").context("Missing WORKFLOW_REQ_ID")?;
|
||||
let commit = env::var("WORKFLOW_BASE_COMMIT").context("Missing WORKFLOW_BASE_COMMIT")?;
|
||||
let data_path = env::var("WORKFLOW_DATA_PATH").context("Missing WORKFLOW_DATA_PATH")?;
|
||||
|
||||
let vgcs = Vgcs::new(&data_path);
|
||||
let doc = DocOS::new(Arc::new(vgcs), &req_id, &commit);
|
||||
|
||||
Ok(Self { doc })
|
||||
}
|
||||
|
||||
pub fn new(data_path: &str, req_id: &str, commit: &str) -> Self {
|
||||
let vgcs = Vgcs::new(data_path);
|
||||
let doc = DocOS::new(Arc::new(vgcs), req_id, commit);
|
||||
Self { doc }
|
||||
}
|
||||
|
||||
pub fn read_json<T: DeserializeOwned>(&self, path: impl AsRef<Path>) -> Result<T> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
let content = self.doc.read_content(&path_str)?;
|
||||
let data = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse JSON from {}", path_str))?;
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
pub fn read_text(&self, path: impl AsRef<Path>) -> Result<String> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
self.doc.read_content(&path_str)
|
||||
}
|
||||
|
||||
pub fn write_file(&mut self, path: impl AsRef<Path>, content: &str) -> Result<()> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
self.doc.write_content(&path_str, content)
|
||||
}
|
||||
|
||||
pub fn attach_subsection(&mut self, parent: impl AsRef<Path>, name: &str, content: &str) -> Result<()> {
|
||||
let parent_str = parent.as_ref().to_string_lossy();
|
||||
self.doc.insert_subsection(&parent_str, name, content)
|
||||
}
|
||||
|
||||
pub fn commit(&mut self, message: &str) -> Result<String> {
|
||||
self.doc.save(message)
|
||||
}
|
||||
|
||||
pub fn get_tool_definitions() -> serde_json::Value {
|
||||
serde_json::json!([
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tree",
|
||||
"description": "List directory structure to understand the file layout.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string", "description": "Root path to list (default: root)" },
|
||||
"depth": { "type": "integer", "description": "Recursion depth" },
|
||||
"format": { "type": "string", "enum": ["Text", "Json"], "default": "Text" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "find",
|
||||
"description": "Find files by name pattern (glob). Fast metadata search.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["pattern"],
|
||||
"properties": {
|
||||
"pattern": { "type": "string", "description": "Glob pattern (e.g. **/*.rs)" },
|
||||
"recursive": { "type": "boolean", "default": true }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "grep",
|
||||
"description": "Search for content within files using regex.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["pattern"],
|
||||
"properties": {
|
||||
"pattern": { "type": "string", "description": "Regex pattern" },
|
||||
"paths": { "type": "array", "items": { "type": "string" }, "description": "Limit search to these paths" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "cat",
|
||||
"description": "Read and assemble content of multiple files.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["paths"],
|
||||
"properties": {
|
||||
"paths": { "type": "array", "items": { "type": "string" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "patch",
|
||||
"description": "Replace a specific text block in a file. Use this for small corrections.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["path", "original", "replacement"],
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"original": { "type": "string", "description": "Exact text to look for. Must be unique in file." },
|
||||
"replacement": { "type": "string", "description": "New text to insert." }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextShell for WorkerContext {
|
||||
fn tree(&self, path: &str, depth: Option<usize>, format: OutputFormat) -> Result<String> {
|
||||
let root_node = self.doc.get_outline()?;
|
||||
|
||||
let target_node = if path == "/" || path == "." {
|
||||
Some(&root_node)
|
||||
} else {
|
||||
fn find_node<'a>(node: &'a crate::DocNode, path: &str) -> Option<&'a crate::DocNode> {
|
||||
if node.path == path {
|
||||
return Some(node);
|
||||
}
|
||||
for child in &node.children {
|
||||
if let Some(found) = find_node(child, path) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
find_node(&root_node, path)
|
||||
};
|
||||
|
||||
let node = target_node.ok_or_else(|| anyhow!("Path not found: {}", path))?;
|
||||
|
||||
match format {
|
||||
OutputFormat::Json => {
|
||||
Ok(serde_json::to_string_pretty(node)?)
|
||||
},
|
||||
OutputFormat::Text => {
|
||||
let mut output = String::new();
|
||||
fn print_tree(node: &crate::DocNode, prefix: &str, is_last: bool, depth: usize, max_depth: Option<usize>, output: &mut String) {
|
||||
if let Some(max) = max_depth {
|
||||
if depth > max { return; }
|
||||
}
|
||||
|
||||
let name = if node.path == "/" { "." } else { &node.name };
|
||||
|
||||
if depth > 0 {
|
||||
let connector = if is_last { "└── " } else { "├── " };
|
||||
output.push_str(&format!("{}{}{}\n", prefix, connector, name));
|
||||
} else {
|
||||
output.push_str(&format!("{}\n", name));
|
||||
}
|
||||
|
||||
let child_prefix = if depth > 0 {
|
||||
if is_last { format!("{} ", prefix) } else { format!("{}│ ", prefix) }
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
for (i, child) in node.children.iter().enumerate() {
|
||||
print_tree(child, &child_prefix, i == node.children.len() - 1, depth + 1, max_depth, output);
|
||||
}
|
||||
}
|
||||
print_tree(node, "", true, 0, depth, &mut output);
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find(&self, name_pattern: &str, options: FindOptions) -> Result<Vec<NodeMetadata>> {
|
||||
let root = self.doc.get_outline()?;
|
||||
let mut results = Vec::new();
|
||||
|
||||
let glob = Glob::new(name_pattern)?.compile_matcher();
|
||||
|
||||
fn traverse(node: &crate::DocNode, glob: &globset::GlobMatcher, opts: &FindOptions, depth: usize, results: &mut Vec<NodeMetadata>) {
|
||||
if let Some(max) = opts.max_depth {
|
||||
if depth > max { return; }
|
||||
}
|
||||
|
||||
let match_name = glob.is_match(&node.name) || glob.is_match(&node.path);
|
||||
|
||||
let kind_str = match node.kind {
|
||||
DocNodeKind::Composite => "Dir",
|
||||
DocNodeKind::Leaf => "File",
|
||||
DocNodeKind::Section => "Section",
|
||||
};
|
||||
|
||||
let type_match = match &opts.type_filter {
|
||||
Some(t) => t.eq_ignore_ascii_case(kind_str),
|
||||
None => true,
|
||||
};
|
||||
|
||||
if depth > 0 && match_name && type_match {
|
||||
results.push(NodeMetadata {
|
||||
path: node.path.clone(),
|
||||
kind: kind_str.to_string(),
|
||||
size: 0,
|
||||
});
|
||||
}
|
||||
|
||||
if opts.recursive || depth == 0 {
|
||||
for child in &node.children {
|
||||
traverse(child, glob, opts, depth + 1, results);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
traverse(&root, &glob, &options, 0, &mut results);
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn grep(&self, pattern: &str, paths: Option<Vec<String>>) -> Result<Vec<GrepMatch>> {
|
||||
let re = Regex::new(pattern).context("Invalid regex pattern")?;
|
||||
|
||||
let target_paths = match paths {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
let all_nodes = self.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("File".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
all_nodes.into_iter().map(|n| n.path).collect()
|
||||
}
|
||||
};
|
||||
|
||||
let mut matches = Vec::new();
|
||||
|
||||
for path in target_paths {
|
||||
if let Ok(content) = self.read_text(&path) {
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
if re.is_match(line) {
|
||||
matches.push(GrepMatch {
|
||||
path: path.clone(),
|
||||
line_number: i + 1,
|
||||
content: line.trim().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
fn cat(&self, paths: &[String]) -> Result<String> {
|
||||
let mut output = String::new();
|
||||
for path in paths {
|
||||
match self.read_text(path) {
|
||||
Ok(content) => {
|
||||
output.push_str(&format!("<file path=\"{}\">\n", path));
|
||||
output.push_str(&content);
|
||||
if !content.ends_with('\n') {
|
||||
output.push('\n');
|
||||
}
|
||||
output.push_str("</file>\n\n");
|
||||
},
|
||||
Err(e) => {
|
||||
output.push_str(&format!("<!-- Failed to read {}: {} -->\n", path, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn wc(&self, paths: &[String]) -> Result<Vec<FileStats>> {
|
||||
let mut stats = Vec::new();
|
||||
for path in paths {
|
||||
if let Ok(content) = self.read_text(path) {
|
||||
stats.push(FileStats {
|
||||
path: path.clone(),
|
||||
lines: content.lines().count(),
|
||||
bytes: content.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
fn patch(&mut self, path: &str, original: &str, replacement: &str) -> Result<()> {
|
||||
let content = self.read_text(path)?;
|
||||
|
||||
let matches: Vec<_> = content.match_indices(original).collect();
|
||||
|
||||
match matches.len() {
|
||||
0 => return Err(anyhow!("Original text not found in {}", path)),
|
||||
1 => {
|
||||
let new_content = content.replace(original, replacement);
|
||||
self.write_file(path, &new_content)?;
|
||||
Ok(())
|
||||
},
|
||||
_ => return Err(anyhow!("Ambiguous match: original text found {} times", matches.len())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,141 +0,0 @@
|
||||
use workflow_context::{ContextStore, Vgcs, DocOS, DocManager, DocNodeKind};
|
||||
use tempfile::TempDir;
|
||||
use std::sync::Arc;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
#[test]
|
||||
fn test_docos_basic() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-1";
|
||||
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// 1. Init DocOS with empty repo
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 2. Create a file (Leaf)
|
||||
docos.write_content("Introduction", "Intro Content")?;
|
||||
let _commit_1 = docos.save("Add Intro")?;
|
||||
|
||||
// 3. Verify outline
|
||||
let outline = docos.get_outline()?;
|
||||
// Root -> [Introduction (Leaf)]
|
||||
assert_eq!(outline.children.len(), 1);
|
||||
let intro_node = &outline.children[0];
|
||||
assert_eq!(intro_node.name, "Introduction");
|
||||
assert_eq!(intro_node.kind, DocNodeKind::Leaf);
|
||||
|
||||
// 4. Read content
|
||||
let content = docos.read_content("Introduction")?;
|
||||
assert_eq!(content, "Intro Content");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docos_fission() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-2";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 1. Start with a Leaf: "Analysis"
|
||||
docos.write_content("Analysis", "General Analysis")?;
|
||||
let commit_1 = docos.save("Init Analysis")?;
|
||||
|
||||
// 2. Insert subsection "Revenue" into "Analysis"
|
||||
// This should promote "Analysis" to Composite
|
||||
docos.reload(&commit_1)?;
|
||||
docos.insert_subsection("Analysis", "Revenue", "Revenue Data")?;
|
||||
let commit_2 = docos.save("Split Analysis")?;
|
||||
|
||||
// 3. Verify Structure
|
||||
docos.reload(&commit_2)?;
|
||||
let outline = docos.get_outline()?;
|
||||
|
||||
// Root -> [Analysis (Composite)]
|
||||
assert_eq!(outline.children.len(), 1);
|
||||
let analysis_node = &outline.children[0];
|
||||
assert_eq!(analysis_node.name, "Analysis");
|
||||
assert_eq!(analysis_node.kind, DocNodeKind::Composite);
|
||||
|
||||
// Analysis -> [Revenue (Leaf)] (index.md is hidden in outline)
|
||||
assert_eq!(analysis_node.children.len(), 1);
|
||||
let revenue_node = &analysis_node.children[0];
|
||||
assert_eq!(revenue_node.name, "Revenue");
|
||||
assert_eq!(revenue_node.kind, DocNodeKind::Leaf);
|
||||
|
||||
// 4. Verify Content
|
||||
// Reading "Analysis" should now read "Analysis/index.md" which contains "General Analysis"
|
||||
let analysis_content = docos.read_content("Analysis")?;
|
||||
assert_eq!(analysis_content, "General Analysis");
|
||||
|
||||
let revenue_content = docos.read_content("Analysis/Revenue")?;
|
||||
assert_eq!(revenue_content, "Revenue Data");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docos_fusion_and_outline() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-3";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 1. Create a composite structure (Pre-fissioned state)
|
||||
// Root -> [Chapter1 (Composite)] -> [SectionA (Leaf), SectionB (Leaf)]
|
||||
docos.write_content("Chapter1/index.md", "Chapter 1 Intro")?;
|
||||
docos.write_content("Chapter1/SectionA", "Content A")?;
|
||||
docos.write_content("Chapter1/SectionB", "Content B")?;
|
||||
let commit_1 = docos.save("Setup Structure")?;
|
||||
|
||||
docos.reload(&commit_1)?;
|
||||
|
||||
// Verify Initial Outline
|
||||
let outline_1 = docos.get_outline()?;
|
||||
let ch1 = &outline_1.children[0];
|
||||
assert_eq!(ch1.kind, DocNodeKind::Composite);
|
||||
assert_eq!(ch1.children.len(), 2); // SectionA, SectionB
|
||||
|
||||
// 2. Demote (Fusion)
|
||||
docos.demote("Chapter1")?;
|
||||
let commit_2 = docos.save("Demote Chapter 1")?;
|
||||
|
||||
// 3. Verify Fusion Result
|
||||
docos.reload(&commit_2)?;
|
||||
let outline_2 = docos.get_outline()?;
|
||||
|
||||
// Now Chapter1 should be a Leaf
|
||||
let ch1_fused = &outline_2.children[0];
|
||||
assert_eq!(ch1_fused.name, "Chapter1");
|
||||
assert_eq!(ch1_fused.kind, DocNodeKind::Leaf);
|
||||
|
||||
// But wait! Because of our Outline Enhancement (Markdown Headers),
|
||||
// we expect the Fused file to have children (Sections) derived from headers!
|
||||
// The demote logic appends children with "# Name".
|
||||
// So "SectionA" became "# SectionA".
|
||||
|
||||
// Let's inspect the children of the Fused node
|
||||
// We expect 2 children: "SectionA" and "SectionB" (as Sections)
|
||||
assert_eq!(ch1_fused.children.len(), 2);
|
||||
assert_eq!(ch1_fused.children[0].name, "SectionA");
|
||||
assert_eq!(ch1_fused.children[0].kind, DocNodeKind::Section);
|
||||
assert_eq!(ch1_fused.children[1].name, "SectionB");
|
||||
|
||||
// 4. Verify Content of Fused File
|
||||
let content = docos.read_content("Chapter1")?;
|
||||
// Should contain Intro + # SectionA ... + # SectionB ...
|
||||
assert!(content.contains("Chapter 1 Intro"));
|
||||
assert!(content.contains("# SectionA"));
|
||||
assert!(content.contains("Content A"));
|
||||
assert!(content.contains("# SectionB"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -1,171 +0,0 @@
|
||||
use workflow_context::{ContextStore, Vgcs};
|
||||
use std::io::Read;
|
||||
use tempfile::TempDir;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
#[test]
|
||||
fn test_basic_workflow() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-001";
|
||||
|
||||
// 1. Init
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// 2. Write Transaction (Initial Commit)
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("test.txt", b"Hello World")?;
|
||||
let commit_hash_1 = tx.commit("Initial commit", "Test User")?;
|
||||
|
||||
// 3. Read
|
||||
let mut reader = store.read_file(req_id, &commit_hash_1, "test.txt")?;
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content)?;
|
||||
assert_eq!(content, "Hello World");
|
||||
|
||||
// 4. Modify file
|
||||
let mut tx = store.begin_transaction(req_id, &commit_hash_1)?;
|
||||
tx.write("test.txt", b"Hello World Modified")?;
|
||||
tx.write("new.txt", b"New File")?;
|
||||
let commit_hash_2 = tx.commit("Second commit", "Test User")?;
|
||||
|
||||
// 5. Verify Diff
|
||||
let changes = store.diff(req_id, &commit_hash_1, &commit_hash_2)?;
|
||||
// Should have 1 Modified (test.txt) and 1 Added (new.txt)
|
||||
assert_eq!(changes.len(), 2);
|
||||
|
||||
// 6. List Dir
|
||||
let entries = store.list_dir(req_id, &commit_hash_2, "")?;
|
||||
assert_eq!(entries.len(), 2); // test.txt, new.txt
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_file_support() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-large";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Create 2MB data
|
||||
let large_data = vec![b'a'; 2 * 1024 * 1024];
|
||||
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("large.bin", &large_data)?;
|
||||
let commit_hash = tx.commit("Add large file", "Tester")?;
|
||||
|
||||
// Read back
|
||||
let mut reader = store.read_file(req_id, &commit_hash, "large.bin")?;
|
||||
let mut read_data = Vec::new();
|
||||
reader.read_to_end(&mut read_data)?;
|
||||
|
||||
assert_eq!(read_data.len(), large_data.len());
|
||||
// Checking first and last bytes to be reasonably sure without comparing 2MB in assertion message on failure
|
||||
assert_eq!(read_data[0], b'a');
|
||||
assert_eq!(read_data[read_data.len()-1], b'a');
|
||||
assert_eq!(read_data, large_data);
|
||||
|
||||
// Check internal blob store
|
||||
// We don't calculate SHA256 here to verify path exactly, but we check if blobs dir has content
|
||||
let blobs_dir = temp_dir.path().join("blobs").join(req_id);
|
||||
assert!(blobs_dir.exists());
|
||||
// Should have subdirectories for SHA prefix
|
||||
let entries = std::fs::read_dir(blobs_dir)?.collect::<Vec<_>>();
|
||||
assert!(!entries.is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parallel_branching_and_merge() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
// Clone temp_path for threads
|
||||
let temp_path = temp_dir.path().to_path_buf();
|
||||
let store = Arc::new(Vgcs::new(&temp_path));
|
||||
let req_id = "req-parallel";
|
||||
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Initial commit
|
||||
let base_commit = {
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("base.txt", b"Base Content")?;
|
||||
tx.commit("Base Commit", "System")?
|
||||
};
|
||||
|
||||
// Fork 1: Modify base.txt
|
||||
let store1 = store.clone();
|
||||
let base1 = base_commit.clone();
|
||||
let handle1 = thread::spawn(move || -> anyhow::Result<String> {
|
||||
let mut tx = store1.begin_transaction(req_id, &base1)?;
|
||||
tx.write("base.txt", b"Base Content Modified by 1")?;
|
||||
tx.write("file1.txt", b"File 1 Content")?;
|
||||
Ok(tx.commit("Fork 1 Commit", "User 1")?)
|
||||
});
|
||||
|
||||
// Fork 2: Add file2.txt (No conflict)
|
||||
let store2 = store.clone();
|
||||
let base2 = base_commit.clone();
|
||||
let handle2 = thread::spawn(move || -> anyhow::Result<String> {
|
||||
let mut tx = store2.begin_transaction(req_id, &base2)?;
|
||||
tx.write("file2.txt", b"File 2 Content")?;
|
||||
Ok(tx.commit("Fork 2 Commit", "User 2")?)
|
||||
});
|
||||
|
||||
let commit1 = handle1.join().unwrap()?;
|
||||
let commit2 = handle2.join().unwrap()?;
|
||||
|
||||
// Merge Fork 2 into Fork 1 (Memory Merge)
|
||||
// This merge should succeed as they touch different files/areas (mostly)
|
||||
// But wait, Fork 1 modified base.txt, Fork 2 kept it as is.
|
||||
// Git merge should take Fork 1's change and include Fork 2's new file.
|
||||
|
||||
// We need to commit the merge result to verify it
|
||||
let merge_tree_oid = store.merge_trees(req_id, &base_commit, &commit1, &commit2)?;
|
||||
|
||||
// Manually create a commit from the merge tree to verify content (optional but good)
|
||||
// In real system, Orchestrator would do this.
|
||||
// For test, we can just verify the tree contains what we expect or use a helper.
|
||||
// Or we can just trust merge_trees returns an OID on success.
|
||||
|
||||
assert!(!merge_tree_oid.is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_conflict() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-conflict";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Base
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("conflict.txt", b"Base Version")?;
|
||||
let base_commit = tx.commit("Base", "System")?;
|
||||
|
||||
// Branch A: Edit conflict.txt
|
||||
let mut tx_a = store.begin_transaction(req_id, &base_commit)?;
|
||||
tx_a.write("conflict.txt", b"Version A")?;
|
||||
let commit_a = tx_a.commit("Commit A", "User A")?;
|
||||
|
||||
// Branch B: Edit conflict.txt differently
|
||||
let mut tx_b = store.begin_transaction(req_id, &base_commit)?;
|
||||
tx_b.write("conflict.txt", b"Version B")?;
|
||||
let commit_b = tx_b.commit("Commit B", "User B")?;
|
||||
|
||||
// Try Merge
|
||||
let result = store.merge_trees(req_id, &base_commit, &commit_a, &commit_b);
|
||||
|
||||
// Should fail with conflict
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err();
|
||||
assert!(err.to_string().contains("Merge conflict"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -1,142 +0,0 @@
|
||||
use workflow_context::{WorkerContext, ContextShell, OutputFormat, FindOptions, Vgcs, ContextStore};
|
||||
use tempfile::TempDir;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
fn setup_env() -> (TempDir, String, String) {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let data_path = temp_dir.path().to_str().unwrap().to_string();
|
||||
let req_id = "req-shell-test".to_string();
|
||||
|
||||
// Init Repo
|
||||
let vgcs = Vgcs::new(&data_path);
|
||||
vgcs.init_repo(&req_id).unwrap();
|
||||
|
||||
(temp_dir, data_path, req_id)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_comprehensive() -> anyhow::Result<()> {
|
||||
let (_tmp, data_path, req_id) = setup_env();
|
||||
|
||||
// 1. Setup Initial Context
|
||||
let mut ctx = WorkerContext::new(&data_path, &req_id, ZERO_OID);
|
||||
|
||||
ctx.write_file("README.md", "Project Root\n\nIntroduction here.")?;
|
||||
ctx.write_file("src/main.rs", "fn main() {\n println!(\"Hello\");\n println!(\"Hello\");\n}")?; // Double Hello for ambiguity test
|
||||
ctx.write_file("src/util.rs", "pub fn util() -> i32 { 42 }")?;
|
||||
ctx.write_file("data/config.json", "{\n \"key\": \"value\",\n \"retries\": 3\n}")?;
|
||||
ctx.write_file("文档/说明.txt", "这是一个中文文件。")?; // Unicode Path & Content
|
||||
|
||||
let commit_1 = ctx.commit("Init")?;
|
||||
let mut ctx = WorkerContext::new(&data_path, &req_id, &commit_1);
|
||||
|
||||
// --- Find Tests ---
|
||||
println!("Testing Find...");
|
||||
|
||||
// Test: Recursive vs Non-recursive
|
||||
// Note: Includes directories (src, data, 文档) + files (5) = 8
|
||||
let all_nodes = ctx.find("**/*", FindOptions { recursive: true, ..Default::default() })?;
|
||||
assert_eq!(all_nodes.len(), 8);
|
||||
|
||||
// Test: Only Files
|
||||
let only_files = ctx.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("File".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
assert_eq!(only_files.len(), 5);
|
||||
|
||||
// Test: Non-recursive (Top level)
|
||||
let root_nodes = ctx.find("*", FindOptions { recursive: false, ..Default::default() })?;
|
||||
// Expect README.md, src(dir), data(dir), 文档(dir)
|
||||
assert!(root_nodes.iter().any(|f| f.path == "README.md"));
|
||||
assert!(root_nodes.iter().any(|f| f.path == "src"));
|
||||
|
||||
// Test: Type Filter (Dir)
|
||||
let dirs = ctx.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("Dir".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
assert!(dirs.iter().any(|d| d.path == "src"));
|
||||
assert!(dirs.iter().any(|d| d.path == "data"));
|
||||
assert!(dirs.iter().any(|d| d.path == "文档"));
|
||||
assert!(!dirs.iter().any(|d| d.path == "README.md"));
|
||||
|
||||
// --- Grep Tests ---
|
||||
println!("Testing Grep...");
|
||||
|
||||
// Test: Regex Match
|
||||
let matches = ctx.grep(r"fn \w+\(\)", None)?;
|
||||
assert_eq!(matches.len(), 2); // main() and util()
|
||||
|
||||
// Test: Unicode Content
|
||||
let zh_matches = ctx.grep("中文", None)?;
|
||||
assert_eq!(zh_matches.len(), 1);
|
||||
assert_eq!(zh_matches[0].path, "文档/说明.txt");
|
||||
|
||||
// Test: Invalid Regex
|
||||
let bad_regex = ctx.grep("(", None);
|
||||
assert!(bad_regex.is_err());
|
||||
|
||||
// --- Patch Tests ---
|
||||
println!("Testing Patch...");
|
||||
|
||||
// Test: Ambiguous Match (Safety Check)
|
||||
// src/main.rs has two "println!(\"Hello\");"
|
||||
let res = ctx.patch("src/main.rs", "println!(\"Hello\");", "println!(\"World\");");
|
||||
assert!(res.is_err(), "Should fail on ambiguous match");
|
||||
let err_msg = res.unwrap_err().to_string();
|
||||
assert!(err_msg.contains("Ambiguous match"), "Error message mismatch: {}", err_msg);
|
||||
|
||||
// Test: Unique Match
|
||||
// Patch "Introduction here." to "Intro v2." in README.md
|
||||
ctx.patch("README.md", "Introduction here.", "Intro v2.")?;
|
||||
ctx.commit("Patch 1")?; // Must commit to verify via read (if read uses committed state)
|
||||
|
||||
// Verify
|
||||
let readme = ctx.read_text("README.md")?;
|
||||
assert!(readme.contains("Intro v2."));
|
||||
|
||||
// Test: Special Characters (Literal Match)
|
||||
// Let's try to patch JSON which has braces and quotes
|
||||
ctx.patch("data/config.json", "\"retries\": 3", "\"retries\": 5")?;
|
||||
ctx.commit("Patch 2")?;
|
||||
|
||||
let config = ctx.read_text("data/config.json")?;
|
||||
assert!(config.contains("\"retries\": 5"));
|
||||
|
||||
// Test: Cross-line Patch
|
||||
// Replace the whole function body in util.rs
|
||||
let old_block = "pub fn util() -> i32 { 42 }";
|
||||
let new_block = "pub fn util() -> i32 {\n return 100;\n}";
|
||||
ctx.patch("src/util.rs", old_block, new_block)?;
|
||||
ctx.commit("Patch 3")?;
|
||||
|
||||
let util = ctx.read_text("src/util.rs")?;
|
||||
assert!(util.contains("return 100;"));
|
||||
|
||||
// Test: Patch non-existent file
|
||||
let res = ctx.patch("ghost.txt", "foo", "bar");
|
||||
assert!(res.is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_schema_validity() {
|
||||
let defs = WorkerContext::get_tool_definitions();
|
||||
assert!(defs.is_array());
|
||||
let arr = defs.as_array().unwrap();
|
||||
|
||||
// Verify critical fields exist for OpenAI
|
||||
for tool in arr {
|
||||
let obj = tool.as_object().unwrap();
|
||||
assert_eq!(obj["type"], "function");
|
||||
let func = obj["function"].as_object().unwrap();
|
||||
assert!(func.contains_key("name"));
|
||||
assert!(func.contains_key("description"));
|
||||
assert!(func.contains_key("parameters"));
|
||||
}
|
||||
}
|
||||
@ -1,339 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 遇到错误立即退出
|
||||
set -e
|
||||
|
||||
# 配置变量
|
||||
REGISTRY="harbor.3prism.ai"
|
||||
PROJECT="fundamental_analysis"
|
||||
VERSION="latest"
|
||||
NAMESPACE="$REGISTRY/$PROJECT"
|
||||
|
||||
# 颜色输出
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${GREEN}=== 开始构建并推送镜像到 $NAMESPACE ===${NC}"
|
||||
|
||||
# 定义服务列表
|
||||
# 格式: "服务名:Dockerfile路径"
|
||||
# 注意:所有的后端服务现在都使用通用的 docker/Dockerfile.backend.prod
|
||||
SERVICES=(
|
||||
"data-persistence-service:docker/Dockerfile.backend.prod"
|
||||
"api-gateway:docker/Dockerfile.backend.prod"
|
||||
"alphavantage-provider-service:docker/Dockerfile.backend.prod"
|
||||
"tushare-provider-service:docker/Dockerfile.backend.prod"
|
||||
"finnhub-provider-service:docker/Dockerfile.backend.prod"
|
||||
"yfinance-provider-service:docker/Dockerfile.backend.prod"
|
||||
"report-generator-service:docker/Dockerfile.backend.prod"
|
||||
"workflow-orchestrator-service:docker/Dockerfile.backend.prod"
|
||||
"mock-provider-service:docker/Dockerfile.backend.prod"
|
||||
"frontend:docker/Dockerfile.frontend.prod"
|
||||
)
|
||||
|
||||
# 总大小计数器
|
||||
TOTAL_SIZE=0
|
||||
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
KEY="${entry%%:*}"
|
||||
DOCKERFILE="${entry#*:}"
|
||||
IMAGE_NAME="$NAMESPACE/$KEY:$VERSION"
|
||||
|
||||
echo -e "\n${YELLOW}>>> 正在构建 $KEY ...${NC}"
|
||||
echo "使用 Dockerfile: $DOCKERFILE"
|
||||
|
||||
# 构建镜像
|
||||
if [ "$KEY" == "frontend" ]; then
|
||||
# 前端不需要 SERVICE_NAME build-arg
|
||||
docker build -t "$IMAGE_NAME" -f "$DOCKERFILE" .
|
||||
elif [ "$KEY" == "data-persistence-service" ]; then
|
||||
# 特殊处理 data-persistence-service 的二进制名称差异
|
||||
docker build -t "$IMAGE_NAME" --build-arg SERVICE_NAME="data-persistence-service-server" -f "$DOCKERFILE" .
|
||||
else
|
||||
# 后端服务需要传递 SERVICE_NAME
|
||||
docker build -t "$IMAGE_NAME" --build-arg SERVICE_NAME="$KEY" -f "$DOCKERFILE" .
|
||||
fi
|
||||
|
||||
# 获取镜像大小 (MB)
|
||||
SIZE_BYTES=$(docker inspect "$IMAGE_NAME" --format='{{.Size}}')
|
||||
SIZE_MB=$(echo "scale=2; $SIZE_BYTES / 1024 / 1024" | bc)
|
||||
|
||||
echo -e "${GREEN}√ $KEY 构建完成. 大小: ${SIZE_MB} MB${NC}"
|
||||
|
||||
# 累加大小
|
||||
TOTAL_SIZE=$(echo "$TOTAL_SIZE + $SIZE_BYTES" | bc)
|
||||
|
||||
echo -e "${YELLOW}>>> 正在推送 $KEY 到 Harbor ...${NC}"
|
||||
docker push "$IMAGE_NAME"
|
||||
done
|
||||
|
||||
TOTAL_SIZE_MB=$(echo "scale=2; $TOTAL_SIZE / 1024 / 1024" | bc)
|
||||
echo -e "\n${GREEN}=== 所有镜像处理完成 ===${NC}"
|
||||
echo -e "${GREEN}总大小: ${TOTAL_SIZE_MB} MB${NC}"
|
||||
|
||||
|
||||
# 生成服务器使用的 docker-compose.server.yml
|
||||
echo -e "\n${YELLOW}>>> 正在生成服务器部署文件 docker-compose.server.yml ...${NC}"
|
||||
|
||||
# 基于 docker-compose.prod.yml 生成,但是替换 build 为 image
|
||||
# 这里我们直接手动定义,因为解析 yaml 替换比较复杂,且我们清楚结构
|
||||
|
||||
cat > docker-compose.server.yml <<EOF
|
||||
services:
|
||||
postgres-db:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
nats:
|
||||
image: nats:2.9
|
||||
container_name: fundamental-nats
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
data-persistence-service:
|
||||
image: $NAMESPACE/data-persistence-service:$VERSION
|
||||
container_name: data-persistence-service
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-db:5432/fundamental
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
SKIP_MIGRATIONS_ON_MISMATCH: "1"
|
||||
depends_on:
|
||||
postgres-db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
api-gateway:
|
||||
image: $NAMESPACE/api-gateway:$VERSION
|
||||
container_name: api-gateway
|
||||
environment:
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
nats:
|
||||
condition: service_started
|
||||
data-persistence-service:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:4000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: always
|
||||
|
||||
mock-provider-service:
|
||||
image: $NAMESPACE/mock-provider-service:$VERSION
|
||||
container_name: mock-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8006
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: mock-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
alphavantage-provider-service:
|
||||
image: $NAMESPACE/alphavantage-provider-service:$VERSION
|
||||
container_name: alphavantage-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: alphavantage-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
tushare-provider-service:
|
||||
image: $NAMESPACE/tushare-provider-service:$VERSION
|
||||
container_name: tushare-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8001
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
TUSHARE_API_URL: http://api.waditu.com
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: tushare-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
finnhub-provider-service:
|
||||
image: $NAMESPACE/finnhub-provider-service:$VERSION
|
||||
container_name: finnhub-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8002
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
FINNHUB_API_URL: https://finnhub.io/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: finnhub-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
yfinance-provider-service:
|
||||
image: $NAMESPACE/yfinance-provider-service:$VERSION
|
||||
container_name: yfinance-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8003
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: yfinance-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
dns:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
restart: always
|
||||
|
||||
report-generator-service:
|
||||
image: $NAMESPACE/report-generator-service:$VERSION
|
||||
container_name: report-generator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8004
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
GOTENBERG_URL: http://gotenberg:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
workflow-orchestrator-service:
|
||||
image: $NAMESPACE/workflow-orchestrator-service:$VERSION
|
||||
container_name: workflow-orchestrator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8005
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:8
|
||||
container_name: gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
frontend:
|
||||
image: $NAMESPACE/frontend:$VERSION
|
||||
container_name: fundamental-frontend
|
||||
ports:
|
||||
- "8080:80" # Map host 8080 to container 80 (Nginx)
|
||||
depends_on:
|
||||
api-gateway:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
pgdata:
|
||||
nats_data:
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
EOF
|
||||
|
||||
echo -e "${GREEN}生成完成: docker-compose.server.yml${NC}"
|
||||
echo -e "请将此文件复制到远程服务器,并执行: docker-compose -f docker-compose.server.yml up -d"
|
||||
|
||||
5
scripts/dev.py → dev.py
Normal file → Executable file
5
scripts/dev.py → dev.py
Normal file → Executable file
@ -108,8 +108,7 @@ def main():
|
||||
parser.add_argument("--backend-app", default=os.getenv("BACKEND_APP", "main:app"), help="Uvicorn app path, e.g. main:app")
|
||||
args = parser.parse_args()
|
||||
|
||||
# scripts/dev.py -> 仓库根目录
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
repo_root = Path(__file__).resolve().parent
|
||||
backend_dir = repo_root / "backend"
|
||||
frontend_dir = repo_root / "frontend"
|
||||
|
||||
@ -205,5 +204,3 @@ def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
@ -1,69 +0,0 @@
|
||||
services:
|
||||
api-gateway:
|
||||
ports:
|
||||
- "4000:4000"
|
||||
|
||||
workflow-orchestrator-service:
|
||||
ports:
|
||||
- "8005:8005" # Expose for debugging if needed
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
alphavantage-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
tushare-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
finnhub-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
yfinance-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
report-generator-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
mock-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/mock-provider-service/Dockerfile
|
||||
container_name: mock-provider-service
|
||||
environment:
|
||||
SERVER_PORT: 8006
|
||||
NATS_ADDR: nats://nats:4222
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
SERVICE_HOST: mock-provider-service
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
depends_on:
|
||||
- nats
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8006/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
@ -1,292 +0,0 @@
|
||||
services:
|
||||
postgres-db:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
nats:
|
||||
image: nats:2.9
|
||||
container_name: fundamental-nats
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
data-persistence-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: data-persistence-service-server
|
||||
container_name: data-persistence-service
|
||||
# Note: The binary name in Dockerfile is generic 'app' or we can override entrypoint.
|
||||
# The Dockerfile entrypoint is /usr/local/bin/app.
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-db:5432/fundamental
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
SKIP_MIGRATIONS_ON_MISMATCH: "1"
|
||||
depends_on:
|
||||
postgres-db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
api-gateway:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: api-gateway
|
||||
container_name: api-gateway
|
||||
environment:
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
nats:
|
||||
condition: service_started
|
||||
data-persistence-service:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:4000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: always
|
||||
|
||||
mock-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: mock-provider-service
|
||||
container_name: mock-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8006
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: mock-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
alphavantage-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: alphavantage-provider-service
|
||||
container_name: alphavantage-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: alphavantage-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
tushare-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: tushare-provider-service
|
||||
container_name: tushare-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8001
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
TUSHARE_API_URL: http://api.waditu.com
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: tushare-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
finnhub-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: finnhub-provider-service
|
||||
container_name: finnhub-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8002
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
FINNHUB_API_URL: https://finnhub.io/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: finnhub-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
yfinance-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: yfinance-provider-service
|
||||
container_name: yfinance-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8003
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: yfinance-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
dns:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
restart: always
|
||||
|
||||
report-generator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: report-generator-service
|
||||
container_name: report-generator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8004
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
GOTENBERG_URL: http://gotenberg:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:8
|
||||
container_name: gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
workflow-orchestrator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.backend.prod
|
||||
args:
|
||||
SERVICE_NAME: workflow-orchestrator-service
|
||||
container_name: workflow-orchestrator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8005
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
frontend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.frontend.prod
|
||||
container_name: fundamental-frontend
|
||||
ports:
|
||||
- "8080:80" # Map host 8080 to container 80 (Nginx)
|
||||
depends_on:
|
||||
api-gateway:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
pgdata:
|
||||
nats_data:
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
|
||||
@ -1,230 +0,0 @@
|
||||
services:
|
||||
postgres-db:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
nats:
|
||||
image: nats:2.9
|
||||
container_name: fundamental-nats
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
data-persistence-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/data-persistence-service:latest
|
||||
container_name: data-persistence-service
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-db:5432/fundamental
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
SKIP_MIGRATIONS_ON_MISMATCH: "1"
|
||||
depends_on:
|
||||
postgres-db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
api-gateway:
|
||||
image: harbor.3prism.ai/fundamental_analysis/api-gateway:latest
|
||||
container_name: api-gateway
|
||||
environment:
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
nats:
|
||||
condition: service_started
|
||||
data-persistence-service:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:4000/health >/dev/null || exit 1"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: always
|
||||
|
||||
alphavantage-provider-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/alphavantage-provider-service:latest
|
||||
container_name: alphavantage-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: alphavantage-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
tushare-provider-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/tushare-provider-service:latest
|
||||
container_name: tushare-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8001
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
TUSHARE_API_URL: http://api.waditu.com
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: tushare-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
finnhub-provider-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/finnhub-provider-service:latest
|
||||
container_name: finnhub-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8002
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
FINNHUB_API_URL: https://finnhub.io/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: finnhub-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
yfinance-provider-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/yfinance-provider-service:latest
|
||||
container_name: yfinance-provider-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8003
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: yfinance-provider-service
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
dns:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
restart: always
|
||||
|
||||
report-generator-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/report-generator-service:latest
|
||||
container_name: report-generator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8004
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
GOTENBERG_URL: http://gotenberg:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
workflow-orchestrator-service:
|
||||
image: harbor.3prism.ai/fundamental_analysis/workflow-orchestrator-service:latest
|
||||
container_name: workflow-orchestrator-service
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
SERVER_PORT: 8005
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:8
|
||||
container_name: gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
frontend:
|
||||
image: harbor.3prism.ai/fundamental_analysis/frontend:latest
|
||||
container_name: fundamental-frontend
|
||||
ports:
|
||||
- "28080:80" # Map host 28080 to container 80 (Nginx)
|
||||
depends_on:
|
||||
api-gateway:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
pgdata:
|
||||
nats_data:
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
@ -1,51 +0,0 @@
|
||||
services:
|
||||
postgres-test:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres-test
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental_test
|
||||
ports:
|
||||
- "5433:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental_test"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
nats-test:
|
||||
image: nats:2.9
|
||||
container_name: fundamental-nats-test
|
||||
ports:
|
||||
- "4223:4222"
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
data-persistence-test:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/data-persistence-service/Dockerfile
|
||||
container_name: data-persistence-service-test
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
# Connect to postgres-test using internal docker network alias
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-test:5432/fundamental_test
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
ports:
|
||||
- "3005:3000"
|
||||
depends_on:
|
||||
postgres-test:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
networks:
|
||||
test-network:
|
||||
|
||||
|
||||
@ -1,384 +0,0 @@
|
||||
services:
|
||||
postgres-db:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental
|
||||
volumes:
|
||||
- pgdata:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- app-network
|
||||
ports:
|
||||
- "5434:5432"
|
||||
nats:
|
||||
image: nats:2.9
|
||||
volumes:
|
||||
- nats_data:/data
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
data-persistence-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: data-persistence-service
|
||||
working_dir: /app/services/data-persistence-service
|
||||
command: ["cargo", "watch", "-x", "run --bin data-persistence-service-server"]
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
# Rust service connects to the internal DB service name
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-db:5432/fundamental
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
postgres-db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
volumes:
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
|
||||
frontend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: frontend/Dockerfile
|
||||
container_name: fundamental-frontend
|
||||
working_dir: /workspace/frontend
|
||||
command: ["/workspace/frontend/scripts/docker-dev-entrypoint.sh"]
|
||||
environment:
|
||||
# Vite Proxy Target
|
||||
VITE_API_TARGET: http://api-gateway:4000
|
||||
# 让 Next 的 API 路由代理到新的 api-gateway
|
||||
NEXT_PUBLIC_BACKEND_URL: http://api-gateway:4000/v1
|
||||
# SSR 内部访问自身 API 的内部地址,避免使用 x-forwarded-host 导致访问宿主机端口
|
||||
FRONTEND_INTERNAL_URL: http://fundamental-frontend:3001
|
||||
BACKEND_INTERNAL_URL: http://api-gateway:4000/v1
|
||||
NODE_ENV: development
|
||||
NEXT_TELEMETRY_DISABLED: "1"
|
||||
volumes:
|
||||
- ./:/workspace
|
||||
# 隔离 node_modules,避免与宿主机冲突
|
||||
- frontend_node_modules:/workspace/frontend/node_modules
|
||||
ports:
|
||||
- "13001:3001"
|
||||
depends_on:
|
||||
api-gateway:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
|
||||
api-gateway:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: api-gateway
|
||||
restart: unless-stopped
|
||||
working_dir: /app/services/api-gateway
|
||||
command: ["cargo", "watch", "-x", "run --bin api-gateway"]
|
||||
ports:
|
||||
- "4000:4000"
|
||||
environment:
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
nats:
|
||||
condition: service_started
|
||||
data-persistence-service:
|
||||
condition: service_healthy
|
||||
alphavantage-provider-service:
|
||||
condition: service_started
|
||||
mock-provider-service:
|
||||
condition: service_started
|
||||
tushare-provider-service:
|
||||
condition: service_started
|
||||
finnhub-provider-service:
|
||||
condition: service_started
|
||||
yfinance-provider-service:
|
||||
condition: service_started
|
||||
report-generator-service:
|
||||
condition: service_started
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:4000/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
volumes:
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
|
||||
mock-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: mock-provider-service
|
||||
working_dir: /app/services/mock-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8006
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: mock-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8006/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
alphavantage-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: alphavantage-provider-service
|
||||
working_dir: /app/services/alphavantage-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: alphavantage-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8000/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
tushare-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: tushare-provider-service
|
||||
working_dir: /app/services/tushare-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8001
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
TUSHARE_API_URL: http://api.waditu.com
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: tushare-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8001/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
finnhub-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: finnhub-provider-service
|
||||
working_dir: /app/services/finnhub-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8002
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
FINNHUB_API_URL: https://finnhub.io/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: finnhub-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8002/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
yfinance-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: yfinance-provider-service
|
||||
working_dir: /app/services/yfinance-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8003
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: yfinance-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
dns:
|
||||
- 8.8.8.8
|
||||
- 8.8.4.4
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8003/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
report-generator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: report-generator-service
|
||||
working_dir: /app/services/report-generator-service
|
||||
command: ["cargo", "watch", "-x", "run --bin report-generator-service"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8004
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
GOTENBERG_URL: http://gotenberg:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- gotenberg
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8004/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
gotenberg:
|
||||
image: gotenberg/gotenberg:8
|
||||
container_name: gotenberg
|
||||
ports:
|
||||
- "3000:3000"
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
workflow-orchestrator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: workflow-orchestrator-service
|
||||
working_dir: /app/services/workflow-orchestrator-service
|
||||
command: ["cargo", "watch", "-x", "run --bin workflow-orchestrator-service"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8005
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8005/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
# =================================================================
|
||||
# Python Services (Legacy - to be replaced)
|
||||
# =================================================================
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
pgdata:
|
||||
frontend_node_modules:
|
||||
nats_data:
|
||||
cargo-target:
|
||||
driver: local
|
||||
cargo-cache:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
@ -1,67 +0,0 @@
|
||||
# 1. Build Stage
|
||||
FROM rust:1.90-bookworm as builder
|
||||
ARG SERVICE_NAME
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Copy the entire workspace
|
||||
COPY . .
|
||||
|
||||
# Build the specific service in release mode
|
||||
ENV SQLX_OFFLINE=true
|
||||
RUN cargo build --release --bin ${SERVICE_NAME}
|
||||
|
||||
# Prepare runtime assets directory
|
||||
RUN mkdir -p /app/assets
|
||||
|
||||
# Conditionally copy potential asset folders if they exist for the service
|
||||
# We use a shell loop or explicit checks. Docker COPY doesn't support conditionals well.
|
||||
# So we do it in the builder stage using shell.
|
||||
|
||||
# 1. Migrations (e.g., data-persistence-service)
|
||||
RUN if [ -d "services/${SERVICE_NAME}/migrations" ]; then \
|
||||
mkdir -p /app/assets/migrations && \
|
||||
cp -r services/${SERVICE_NAME}/migrations/* /app/assets/migrations/; \
|
||||
fi
|
||||
|
||||
# 2. Templates (e.g., report-generator-service)
|
||||
RUN if [ -d "services/${SERVICE_NAME}/templates" ]; then \
|
||||
mkdir -p /app/assets/templates && \
|
||||
cp -r services/${SERVICE_NAME}/templates/* /app/assets/templates/; \
|
||||
fi
|
||||
|
||||
# 2.1 Cookies (e.g., report-generator-service)
|
||||
RUN if [ -f "services/${SERVICE_NAME}/cookies.txt" ]; then \
|
||||
cp services/${SERVICE_NAME}/cookies.txt /app/assets/cookies.txt; \
|
||||
fi
|
||||
|
||||
# 3. Config folder (root level, needed by some services like data-persistence)
|
||||
# We copy it to a specific location.
|
||||
RUN cp -r config /app/config
|
||||
|
||||
# 4. Service Kit Mirror (needed by data-persistence-service build usually, but maybe runtime?)
|
||||
# It was needed for build. Runtime usually doesn't need it unless it compiles code at runtime.
|
||||
|
||||
# 2. Runtime Stage
|
||||
FROM debian:bookworm-slim
|
||||
ARG SERVICE_NAME
|
||||
ENV TZ=Asia/Shanghai
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
libssl3 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy binary
|
||||
COPY --from=builder /usr/src/app/target/release/${SERVICE_NAME} /usr/local/bin/app
|
||||
|
||||
# Copy prepared assets
|
||||
COPY --from=builder /app/assets /app/
|
||||
COPY --from=builder /app/config /app/config
|
||||
|
||||
# Set the binary as the entrypoint
|
||||
ENTRYPOINT ["/usr/local/bin/app"]
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
FROM rust:1.90-bookworm
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Copy the entire workspace
|
||||
COPY . .
|
||||
|
||||
# Set SQLX offline mode to avoid needing a running DB during build
|
||||
ENV SQLX_OFFLINE=true
|
||||
|
||||
# Build the entire workspace in release mode
|
||||
# This compiles all crates in the workspace at once
|
||||
RUN cargo build --release --workspace
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
FROM rust:1.90-bookworm
|
||||
|
||||
# Install cargo-watch for hot reload
|
||||
RUN cargo install cargo-watch
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create target and cache directories to ensure permissions
|
||||
RUN mkdir -p /app/target && mkdir -p /usr/local/cargo
|
||||
|
||||
# Default command
|
||||
CMD ["cargo", "watch", "-x", "run"]
|
||||
|
||||
@ -1,25 +0,0 @@
|
||||
FROM debian:bookworm-slim
|
||||
ENV TZ=Asia/Shanghai
|
||||
|
||||
# Install minimal runtime dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
libssl3 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# The build context is expected to be prepared by the deployment script
|
||||
# It should contain:
|
||||
# - app (the binary)
|
||||
# - config/ (if needed)
|
||||
# - assets/ (if needed)
|
||||
|
||||
COPY . .
|
||||
|
||||
# Ensure the binary is executable
|
||||
RUN chmod +x /app/app
|
||||
|
||||
ENTRYPOINT ["/app/app"]
|
||||
|
||||
@ -1,24 +0,0 @@
|
||||
# 1. Build Stage
|
||||
FROM node:20-slim AS builder
|
||||
WORKDIR /app
|
||||
|
||||
# Environment variables for build time
|
||||
# ENV NODE_ENV=production <- REMOVED: This causes npm ci to skip devDependencies (tsc, vite)
|
||||
# These must match the Nginx proxy paths
|
||||
ENV VITE_API_TARGET=/api
|
||||
ENV NEXT_PUBLIC_BACKEND_URL=/api/v1
|
||||
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY frontend/ .
|
||||
RUN npm run build
|
||||
|
||||
# 2. Runtime Stage
|
||||
FROM nginx:alpine
|
||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||
COPY docker/nginx.prod.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
EXPOSE 80
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# Compression
|
||||
gzip on;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Proxy API requests to the backend
|
||||
# Matches /api/v1/..., /api/context/..., etc.
|
||||
location /api/ {
|
||||
proxy_pass http://api-gateway:4000/api/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# Proxy specific endpoints that are at root level in api-gateway
|
||||
location /health {
|
||||
proxy_pass http://api-gateway:4000/health;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
|
||||
location /tasks/ {
|
||||
proxy_pass http://api-gateway:4000/tasks/;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,271 +0,0 @@
|
||||
# 用户使用文档
|
||||
|
||||
## 欢迎使用基本面分析系统
|
||||
|
||||
基本面分析系统是一个专业的股票分析平台,帮助投资者通过多维度的基本面分析,做出更明智的投资决策。
|
||||
|
||||
## 目录
|
||||
|
||||
- [快速开始](#快速开始)
|
||||
- [主要功能](#主要功能)
|
||||
- [使用指南](#使用指南)
|
||||
- [常见问题](#常见问题)
|
||||
- [系统配置](#系统配置)
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 生成分析报告
|
||||
|
||||
1. **访问首页**:在浏览器中打开系统首页
|
||||
2. **输入股票代码**:在输入框中输入股票代码,例如:
|
||||
- 中国股票:`600519`(会自动识别为 600519.SH)或 `600519.SH`
|
||||
- 美国股票:`AAPL`
|
||||
- 香港股票:`00700.HK`
|
||||
3. **选择交易市场**:从下拉菜单中选择对应的交易市场(中国、香港、美国、日本)
|
||||
4. **生成报告**:点击"生成报告"按钮,系统将自动获取财务数据并生成分析报告
|
||||
|
||||
### 2. 查看报告
|
||||
|
||||
报告生成后,您将看到包含以下内容的综合报告:
|
||||
|
||||
- **股价图表**:来自 TradingView 的实时股价图表
|
||||
- **财务数据**:多年财务指标对比,包括:
|
||||
- 主要指标:ROE、ROA、ROIC、毛利率、净利润率等
|
||||
- 费用指标:销售费用率、管理费用率、研发费用率等
|
||||
- 资产占比:现金占比、库存占比、应收款占比等
|
||||
- 周转能力:存货周转天数、应收款周转天数等
|
||||
- 人均效率:人均创收、人均创利、人均工资等
|
||||
- 市场表现:股价、市值、PE、PB、股东户数等
|
||||
- **AI 分析模块**:基于财务数据的智能分析,包括:
|
||||
- 公司简介
|
||||
- 业务分析
|
||||
- 财务健康度评估
|
||||
- 投资建议等
|
||||
|
||||
### 3. 报告操作
|
||||
|
||||
- **开始分析**:点击"开始分析"按钮,系统将按顺序生成各个分析模块
|
||||
- **停止**:在分析过程中,可以随时点击"停止"按钮中断分析
|
||||
- **继续**:停止后可以点击"继续"按钮恢复分析
|
||||
- **重新生成分析**:对任意分析模块,可以点击"重新生成分析"按钮重新生成
|
||||
|
||||
## 主要功能
|
||||
|
||||
### 1. 股票分析报告
|
||||
|
||||
系统提供全面的股票基本面分析,包括:
|
||||
|
||||
- **财务数据展示**:自动从 Tushare 等数据源获取最新的财务数据
|
||||
- **多维度分析**:涵盖盈利能力、运营效率、财务健康度等多个维度
|
||||
- **历史对比**:展示多年的财务指标变化趋势
|
||||
- **实时图表**:集成 TradingView 高级图表组件,提供专业的股价图表
|
||||
|
||||
### 2. 智能分析模块
|
||||
|
||||
系统使用 AI 模型(如 Google Gemini)对财务数据进行深度分析:
|
||||
|
||||
- **自动生成**:根据财务数据自动生成业务分析和投资建议
|
||||
- **模块化设计**:不同分析模块相互独立,可按需生成
|
||||
- **依赖关系**:支持分析模块之间的依赖关系,确保分析的准确性
|
||||
- **实时进度**:显示每个分析模块的生成进度和状态
|
||||
|
||||
### 3. 系统配置管理
|
||||
|
||||
系统提供完善的配置管理功能:
|
||||
|
||||
- **AI 服务配置**:配置 AI 模型的 API 密钥和端点
|
||||
- **数据源配置**:配置 Tushare、Finnhub 等数据源的 API 密钥
|
||||
- **分析模块配置**:自定义分析模块的名称、模型和提示词模板
|
||||
- **配置测试**:支持测试各项配置的有效性
|
||||
- **配置导入/导出**:支持配置的备份和恢复
|
||||
|
||||
### 4. 历史报告查询
|
||||
|
||||
系统支持查询历史生成的报告:
|
||||
|
||||
- **按市场和企业ID查询**:根据交易市场和企业ID查询历史报告
|
||||
- **报告列表**:查看所有历史报告及其状态
|
||||
- **报告详情**:查看完整的报告内容
|
||||
|
||||
## 使用指南
|
||||
|
||||
### 股票代码格式
|
||||
|
||||
不同市场的股票代码格式:
|
||||
|
||||
- **中国市场**:
|
||||
- 上交所:6 位数字,如 `600519`(系统会自动添加 `.SH` 后缀)
|
||||
- 深交所:6 位数字,如 `000001`(系统会自动添加 `.SZ` 后缀)
|
||||
- 完整格式:`600519.SH` 或 `000001.SZ`
|
||||
- **美国市场**:直接输入股票代码,如 `AAPL`、`MSFT`
|
||||
- **香港市场**:股票代码,如 `00700`
|
||||
- **日本市场**:股票代码,如 `7203`
|
||||
|
||||
### 财务数据解读
|
||||
|
||||
系统展示的财务数据按以下方式组织:
|
||||
|
||||
1. **主要指标**:核心财务指标
|
||||
- ROE(净资产收益率):衡量股东权益的盈利能力,>12% 为优秀
|
||||
- ROA(总资产收益率):衡量资产利用效率
|
||||
- ROIC(投入资本回报率):衡量资本使用效率,>12% 为优秀
|
||||
- 毛利率:反映产品或服务的盈利能力
|
||||
- 净利润率:反映整体盈利能力
|
||||
|
||||
2. **费用指标**:各项费用占收入的比例
|
||||
- 销售费用率、管理费用率、研发费用率等
|
||||
- 其他费用率:通过毛利率减去各项费用率计算得出
|
||||
|
||||
3. **资产占比**:各项资产占总资产的比例
|
||||
- 现金占比:反映资金充裕程度
|
||||
- 库存占比:反映库存管理水平
|
||||
- 应收款占比:反映应收账款风险
|
||||
- 商誉占比:反映并购活动的影响
|
||||
|
||||
4. **周转能力**:反映资产周转效率
|
||||
- 存货周转天数:存货变现的速度
|
||||
- 应收款周转天数:应收账款回收速度(>90天需注意)
|
||||
- 应付款周转天数:应付账款支付周期
|
||||
|
||||
5. **人均效率**:反映人力资源效率
|
||||
- 人均创收、人均创利:衡量员工贡献
|
||||
- 人均工资:反映员工待遇水平
|
||||
|
||||
6. **市场表现**:股票市场的表现指标
|
||||
- PE(市盈率)、PB(市净率):估值指标
|
||||
- 股东户数:反映股东结构变化
|
||||
|
||||
### 分析模块说明
|
||||
|
||||
每个分析模块都有其特定的作用:
|
||||
|
||||
- **公司简介**:自动生成公司的基本介绍和业务概况
|
||||
- **业务分析**:深度分析公司的业务模式和竞争优势
|
||||
- **财务分析**:评估公司的财务健康状况
|
||||
- **风险评估**:识别潜在的投资风险
|
||||
- **投资建议**:提供基于分析的投资建议
|
||||
|
||||
分析模块会按依赖关系顺序执行,确保每个模块都能获得所需的前置分析结果。
|
||||
|
||||
### 执行详情
|
||||
|
||||
在报告页面的"执行详情"标签页,您可以查看:
|
||||
|
||||
- **执行概况**:财务数据获取的耗时、API 调用次数等
|
||||
- **分析任务**:每个分析模块的执行状态、耗时、Token 消耗等
|
||||
- **总体统计**:总耗时、完成的任务数量、总 Token 消耗等
|
||||
|
||||
这些信息有助于了解报告的生成过程和数据来源。
|
||||
|
||||
## 常见问题
|
||||
|
||||
### Q1: 为什么有些财务数据显示为 "-"?
|
||||
|
||||
A: 可能是以下原因:
|
||||
- 该股票在对应年份没有数据
|
||||
- 数据源暂时不可用
|
||||
- 该指标不适用于该股票类型
|
||||
|
||||
### Q2: 分析模块生成失败怎么办?
|
||||
|
||||
A: 您可以:
|
||||
- 点击"重新生成分析"按钮重试
|
||||
- 检查系统配置中的 AI 服务配置是否正确
|
||||
- 查看"执行详情"中的错误信息
|
||||
|
||||
### Q3: 如何查看历史报告?
|
||||
|
||||
A:
|
||||
1. 访问"查询"页面(如果已启用)
|
||||
2. 选择交易市场和企业ID
|
||||
3. 点击"查询"按钮查看历史报告列表
|
||||
|
||||
### Q4: 股票代码输入错误怎么办?
|
||||
|
||||
A: 系统会自动识别一些常见的代码格式,但如果识别失败,请:
|
||||
- 中国市场:使用完整格式,如 `600519.SH` 或 `000001.SZ`
|
||||
- 其他市场:按照该市场的标准格式输入
|
||||
|
||||
### Q5: 如何配置系统?
|
||||
|
||||
A:
|
||||
1. 访问"配置"页面
|
||||
2. 在对应的标签页中配置各项设置
|
||||
3. 使用"测试"按钮验证配置是否正确
|
||||
4. 点击"保存所有配置"保存设置
|
||||
|
||||
### Q6: 分析报告生成需要多长时间?
|
||||
|
||||
A: 生成时间取决于:
|
||||
- 财务数据的获取速度(通常几秒钟)
|
||||
- AI 分析模块的数量和复杂度
|
||||
- AI 服务的响应速度
|
||||
|
||||
完整的报告生成通常需要 1-5 分钟。
|
||||
|
||||
### Q7: 可以同时分析多只股票吗?
|
||||
|
||||
A: 目前系统每次只能分析一只股票。如需分析多只股票,请分别提交请求。
|
||||
|
||||
### Q8: 报告数据是实时的吗?
|
||||
|
||||
A:
|
||||
- 财务数据:来自 Tushare 等数据源,更新频率取决于数据源
|
||||
- 股价图表:TradingView 提供实时股价数据
|
||||
- AI 分析:基于当前获取的财务数据实时生成
|
||||
|
||||
## 系统配置
|
||||
|
||||
### 首次使用配置
|
||||
|
||||
首次使用系统时,需要配置以下内容:
|
||||
|
||||
1. **AI 服务配置**
|
||||
- API Key:输入您的 AI 服务 API 密钥
|
||||
- Base URL:输入 API 端点地址(如使用自建服务)
|
||||
|
||||
2. **数据源配置**
|
||||
- **Tushare**:输入 Tushare API Key(中国市场必需)
|
||||
- **Finnhub**:输入 Finnhub API Key(全球市场可选)
|
||||
|
||||
### 配置注意事项
|
||||
|
||||
- **敏感信息保护**:API 密钥等敏感信息在输入框中不会显示,留空表示保持当前值
|
||||
- **配置验证**:保存前建议使用"测试"按钮验证各项配置
|
||||
- **配置备份**:建议定期使用"导出配置"功能备份配置
|
||||
- **配置恢复**:可使用"导入配置"功能恢复之前的配置
|
||||
|
||||
### 分析模块配置
|
||||
|
||||
在"配置"页面的"分析配置"标签页,您可以:
|
||||
|
||||
- **自定义模块名称**:修改分析模块的显示名称
|
||||
- **选择 AI 模型**:为每个模块指定使用的 AI 模型
|
||||
- **编辑提示词模板**:自定义每个模块的分析提示词
|
||||
- **设置模块依赖**:配置分析模块之间的依赖关系
|
||||
|
||||
配置修改后,点击"保存分析配置"即可生效。
|
||||
|
||||
## 技术支持
|
||||
|
||||
如果您在使用过程中遇到问题,可以:
|
||||
|
||||
1. 查看"执行详情"中的错误信息
|
||||
2. 检查系统配置是否正确
|
||||
3. 查看系统日志(如果已启用)
|
||||
4. 联系系统管理员获取支持
|
||||
|
||||
---
|
||||
|
||||
**最后更新**:2025年1月
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,63 +0,0 @@
|
||||
# Fundamental Analysis Platform 用户指南 (v2.0 - Vite Refactor)
|
||||
日期: 2025-11-22
|
||||
版本: 2.0
|
||||
|
||||
## 1. 简介
|
||||
Fundamental Analysis Platform 是一个基于 AI Agent 的深度基本面投研平台,旨在通过自动化工作流聚合多源金融数据,并利用 LLM(大语言模型)生成专业的财务分析报告。
|
||||
|
||||
v2.0 版本采用了全新的 Vite + React SPA 架构,提供了更流畅的交互体验和实时的分析状态可视化。
|
||||
|
||||
## 2. 核心功能
|
||||
|
||||
### 2.1 仪表盘 (Dashboard)
|
||||
平台首页,提供简洁的分析入口。
|
||||
* **股票代码**: 支持输入 A股 (如 `600519.SS`)、美股 (如 `AAPL`) 或港股代码。
|
||||
* **市场选择**: 下拉选择 CN (中国)、US (美国) 或 HK (香港)。
|
||||
* **开始分析**: 点击“生成分析报告”按钮即可启动分析流程。
|
||||
|
||||
### 2.2 分析报告页 (Report View)
|
||||
核心工作区,分为左侧状态栏和右侧详情区。
|
||||
|
||||
#### 左侧:工作流状态
|
||||
* **可视化 DAG**: 展示当前的分析任务依赖图。
|
||||
* **节点颜色**: 灰色(等待)、蓝色(运行中)、绿色(完成)、红色(失败)。
|
||||
* **动态连线**: 当任务运行时,连接线会有流光动画指示数据流向。
|
||||
* **实时日志**: 滚动展示所有后台任务的执行日志,支持实时查看数据抓取和分析进度。
|
||||
|
||||
#### 右侧:详情面板
|
||||
* **Analysis Report**: 展示由 AI 生成的最终分析报告。支持 Markdown 格式(标题、表格、加粗、引用),并带有打字机生成特效。
|
||||
* **Fundamental Data**: (开发中) 展示抓取到的原始财务数据表格。
|
||||
* **Stock Chart**: (开发中) 展示股价走势图。
|
||||
|
||||
### 2.3 系统配置 (Config)
|
||||
集中管理平台的所有外部连接和参数。
|
||||
|
||||
* **AI Provider**:
|
||||
* 管理 LLM 供应商 (OpenAI, Anthropic, Local Ollama 等)。
|
||||
* 配置 API Key 和 Base URL。
|
||||
* 刷新并选择可用的模型 (GPT-4o, Claude-3.5 等)。
|
||||
* **数据源配置**:
|
||||
* 启用/禁用金融数据源 (Tushare, Finnhub, AlphaVantage)。
|
||||
* 输入对应的 API Token。
|
||||
* 支持连接测试。
|
||||
* **分析模板**:
|
||||
* 查看当前的分析流程模板(如 "Quick Scan")。
|
||||
* 查看每个模块使用的 Prompt 模板及模型配置。
|
||||
* **系统状态**:
|
||||
* 监控微服务集群 (API Gateway, Orchestrator 等) 的健康状态。
|
||||
|
||||
## 3. 快速开始
|
||||
|
||||
1. 进入 **配置页** -> **AI Provider**,添加您的 OpenAI API Key。
|
||||
2. 进入 **配置页** -> **数据源配置**,启用 Tushare 并输入 Token。
|
||||
3. 回到 **首页**,输入 `600519.SS`,选择 `CN` 市场。
|
||||
4. 点击 **生成分析报告**,观察工作流运行及报告生成。
|
||||
|
||||
## 4. 常见问题
|
||||
|
||||
* **Q: 报告生成卡住怎么办?**
|
||||
* A: 检查左侧“实时日志”,查看是否有 API 连接超时或配额耗尽的错误。
|
||||
* **Q: 如何添加本地模型?**
|
||||
* A: 在 AI Provider 页添加新的 Provider,Base URL 填入 `http://localhost:11434/v1` (Ollama 默认地址)。
|
||||
|
||||
|
||||
@ -1,159 +0,0 @@
|
||||
---
|
||||
status: 'Active'
|
||||
created: '2025-11-16'
|
||||
last_updated: '2025-11-16'
|
||||
owner: '@lv'
|
||||
---
|
||||
|
||||
# 系统架构设计总览
|
||||
|
||||
## 1. 引言
|
||||
|
||||
### 1.1. 文档目的
|
||||
|
||||
本文档旨在为“基本面选股系统”的事件驱动微服务架构,提供一份统一的、作为“单一事实源”的核心技术蓝图。它整合并取代了多个历史设计文档,旨在清晰、准确地描述当前系统的核心架构理念、服务职责、关键设计以及数据模型。
|
||||
|
||||
### 1.2. 核心架构理念
|
||||
|
||||
本系统采用纯 Rust 构建的现代化微服务架构,其核心理念根植于“Rustic”风格的健壮性与确定性,遵循以下原则:
|
||||
|
||||
- **服务独立化**: 每个外部数据源、每个核心业务能力都被封装成独立的、可独立部署和运行的微服务。
|
||||
- **事件驱动**: 引入消息总线(Message Bus)作为服务间通信的主干,实现服务的高度解耦和异步协作。
|
||||
- **数据中心化**: 所有微服务将标准化的数据写入一个由 `data-persistence-service` 独占管理的中央数据库,实现“数据写入即共享”。
|
||||
- **契约先行**: 所有服务间的通信与数据模型,均通过 `common-contracts` 共享库进行强类型约束,确保系统的一致性与稳定性。
|
||||
|
||||
## 2. 架构图与服务职责
|
||||
|
||||
### 2.1. 目标架构图
|
||||
|
||||
```
|
||||
+-------------+ +------------------+ +---------------------------+
|
||||
| | HTTP | | | |
|
||||
| 前端 |----->| API 网关 |----->| 消息总线 (NATS) |
|
||||
| (Next.js) | | (Rust) | | |
|
||||
| | | | | |
|
||||
+-------------+ +-------+----------+ +-------------+-------------+
|
||||
| |
|
||||
(读操作) | | (发布/订阅 命令与事件)
|
||||
| |
|
||||
+-----------------v------------------+ +------v------+ +----------------+
|
||||
| | | 数据提供商A | | 数据提供商B |
|
||||
| 数据持久化服务 (Rust) |<---->| (Tushare) | | (Finnhub) |
|
||||
| | | 服务 (Rust) | | 服务 (Rust) |
|
||||
+-----------------+------------------+ +-------------+ +----------------+
|
||||
|
|
||||
v
|
||||
+-----------------------------------------------------+
|
||||
| |
|
||||
| PostgreSQL 数据库 |
|
||||
| |
|
||||
+-----------------------------------------------------+
|
||||
```
|
||||
|
||||
### 2.2. 服务职责划分
|
||||
|
||||
- **API 网关 (api-gateway)**:
|
||||
- 面向前端的唯一入口 (BFF)。
|
||||
- 负责用户请求、认证鉴权。
|
||||
- 将前端的查询请求,转化为对**数据持久化服务**的数据读取调用。
|
||||
- 将前端的操作请求(如“生成新报告”),转化为命令(Command)并发布到**消息总线**。
|
||||
|
||||
- **数据提供商服务 (`*_provider-service`)**:
|
||||
- 一组独立的微服务,每个服务对应一个外部数据 API。
|
||||
- 订阅消息总线上的相关命令(如 `FetchFinancialsRequest`)。
|
||||
- 独立调用外部 API,对返回数据进行清洗、标准化。
|
||||
- 调用**数据持久化服务**的接口,将标准化后的数据写入数据库。
|
||||
|
||||
- **数据持久化服务 (data-persistence-service)**:
|
||||
- 数据库的**唯一守门人**,是整个系统中唯一有权直接与数据库交互的服务。
|
||||
- 为所有其他内部微服务提供稳定、统一的数据库读写 HTTP 接口。
|
||||
|
||||
- **消息总线 (Message Bus)**:
|
||||
- 整个系统的神经中枢,负责所有服务间的异步通信。当前选用 **NATS** 作为具体实现。
|
||||
|
||||
## 3. `SystemModule` 核心规范
|
||||
|
||||
为确保所有微服务行为一致、可观测、易于管理,我们定义了一套名为 `SystemModule` 的设计规范。它并非真实的 Rust Trait,而是一个所有服务都必须遵守的**行为契约**。
|
||||
|
||||
**每个微服务都必须:**
|
||||
|
||||
1. **容器化**: 提供一个 `Dockerfile` 用于部署。
|
||||
2. **配置驱动**: 从环境变量或配置服务中读取配置,缺少必要配置必须启动失败。
|
||||
3. **消息契约**: 严格按照 `common-contracts` 中定义的契约进行消息的订阅和发布。
|
||||
4. **暴露标准接口**: 实现一个内置的 HTTP 服务器,并暴露**两个强制性的 API 端点**:
|
||||
- `GET /health`: 返回服务的健康状态。
|
||||
- `GET /tasks`: 返回服务当前正在处理的所有任务列表及其进度。
|
||||
|
||||
## 4. 关键服务设计:数据持久化服务
|
||||
|
||||
- **核心定位**: 整个微服务架构中**唯一的数据持久化层**。
|
||||
- **职责边界**: 严格限定在管理跨多个业务领域共享的**核心数据实体**上(如公司信息、财务数据、市场数据、AI分析结果)。
|
||||
- **API 端点摘要**:
|
||||
| Method | Endpoint | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `PUT` | `/api/v1/companies` | 创建或更新公司基本信息 |
|
||||
| `GET` | `/api/v1/companies/{symbol}` | 获取公司基本信息 |
|
||||
| `POST` | `/api/v1/market-data/financials/batch` | 批量写入时间序列财务指标 |
|
||||
| `GET` | `/api/v1/market-data/financials/{symbol}` | 查询财务指标 |
|
||||
| `POST` | `/api/v1/analysis-results` | 保存一条新的 AI 分析结果 |
|
||||
| `GET` | `/api/v1/analysis-results` | 查询分析结果列表 |
|
||||
|
||||
## 5. 数据库 Schema 设计
|
||||
|
||||
### 5.1. 设计哲学
|
||||
|
||||
采用**“为不同形态的数据建立专属的、高度优化的持久化方案”**的核心哲学,统一使用 **PostgreSQL** 及其扩展生态。
|
||||
|
||||
- **时间序列数据**: 明确采用 **TimescaleDB** 扩展,通过 Hypertables 机制保障高性能的写入与查询。
|
||||
- **其他数据**: 使用标准的关系表进行存储。
|
||||
|
||||
### 5.2. 核心表结构
|
||||
|
||||
#### `time_series_financials` (财务指标表 - TimescaleDB)
|
||||
```sql
|
||||
CREATE TABLE time_series_financials (
|
||||
symbol VARCHAR(32) NOT NULL,
|
||||
metric_name VARCHAR(64) NOT NULL,
|
||||
period_date DATE NOT NULL,
|
||||
value NUMERIC NOT NULL,
|
||||
source VARCHAR(64),
|
||||
PRIMARY KEY (symbol, metric_name, period_date)
|
||||
);
|
||||
SELECT create_hypertable('time_series_financials', 'period_date');
|
||||
```
|
||||
|
||||
#### `daily_market_data` (每日市场数据表 - TimescaleDB)
|
||||
```sql
|
||||
CREATE TABLE daily_market_data (
|
||||
symbol VARCHAR(32) NOT NULL,
|
||||
trade_date DATE NOT NULL,
|
||||
open_price NUMERIC,
|
||||
close_price NUMERIC,
|
||||
volume BIGINT,
|
||||
total_mv NUMERIC,
|
||||
PRIMARY KEY (symbol, trade_date)
|
||||
);
|
||||
SELECT create_hypertable('daily_market_data', 'trade_date');
|
||||
```
|
||||
|
||||
#### `analysis_results` (AI分析结果表)
|
||||
```sql
|
||||
CREATE TABLE analysis_results (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
symbol VARCHAR(32) NOT NULL,
|
||||
module_id VARCHAR(64) NOT NULL,
|
||||
generated_at TIMESTAMTz NOT NULL DEFAULT NOW(),
|
||||
content TEXT NOT NULL,
|
||||
meta_data JSONB
|
||||
);
|
||||
```
|
||||
|
||||
#### `company_profiles` (公司基本信息表)
|
||||
```sql
|
||||
CREATE TABLE company_profiles (
|
||||
symbol VARCHAR(32) PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
industry VARCHAR(255),
|
||||
list_date DATE
|
||||
);
|
||||
```
|
||||
@ -1,112 +0,0 @@
|
||||
# 设计文档:统一历史记录与上下文管理重构
|
||||
|
||||
## 1. 目标
|
||||
实现一个统一且一致的历史管理系统,达成以下目标:
|
||||
1. **原子化历史记录**:一个“历史记录”严格对应**一次 Workflow 执行**(由 `request_id` 标识),彻底解决历史列表重复/碎片化问题。
|
||||
2. **单一数据源**:全局上下文(VGCS/Git)作为所有文件产物(报告、日志、数据)的唯一真实存储源。
|
||||
3. **轻量化索引**:数据库(`session_data` 或新表)仅存储结构化的“索引”(Snapshot),指向 VGCS 中的 Commit 和文件路径。
|
||||
|
||||
## 2. 现状分析
|
||||
- **碎片化**:目前 `analysis_results` 表存储的是单个 Task 的结果。如果一个工作流包含 N 个分析步骤,历史列表中就会出现 N 条记录。
|
||||
- **数据冗余**:结果内容(Markdown 等)既作为文件存在 VGCS 中,又作为文本列存在 Postgres 数据库中。
|
||||
- **历史视图缺失**:缺乏一个能够代表整次执行状态(包含拓扑结构、状态、所有产物引用)的根对象,导致查询历史列表时困难。
|
||||
|
||||
## 3. 架构方案
|
||||
|
||||
### 3.1. 核心概念:工作流快照 (Workflow Snapshot)
|
||||
不再将每个 Task 视为独立的历史记录,我们定义 **Workflow Snapshot** 为历史的原子单位。
|
||||
|
||||
一个 Snapshot 包含:
|
||||
- **元数据**:`request_id`(请求ID), `symbol`(标的), `market`(市场), `template_id`(模板ID), `start_time`(开始时间), `end_time`(结束时间), `final_status`(最终状态)。
|
||||
- **拓扑结构**:DAG 结构(节点与边)。
|
||||
- **执行状态**:针对每个节点记录:
|
||||
- `status`:状态 (Completed, Failed, Skipped)
|
||||
- `output_commit`:该节点产生的 VGCS Commit Hash。
|
||||
- `artifacts`:产物映射表,Key 为产物名称,Value 为 VGCS 文件路径 (例如 `{"report": "analysis/summary.md", "log": "analysis/execution.log"}`)。
|
||||
|
||||
### 3.2. 数据存储变更
|
||||
|
||||
#### A. `workflow_history` 表 (或重构后的 `session_data`)
|
||||
我们将引入一张专用表(或规范化 `session_data` 的使用)来存储 **Workflow Manifest**。
|
||||
|
||||
```sql
|
||||
CREATE TABLE workflow_history (
|
||||
request_id UUID PRIMARY KEY,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
market VARCHAR(10) NOT NULL,
|
||||
template_id VARCHAR(50),
|
||||
status VARCHAR(20) NOT NULL, -- 'Completed', 'Failed'
|
||||
start_time TIMESTAMPTZ NOT NULL,
|
||||
end_time TIMESTAMPTZ,
|
||||
|
||||
-- Snapshot JSON 数据
|
||||
snapshot_data JSONB NOT NULL
|
||||
-- {
|
||||
-- "dag": { ... },
|
||||
-- "tasks": {
|
||||
-- "task_id_1": { "status": "Completed", "commit": "abc1234", "paths": { "report": "..." } }
|
||||
-- }
|
||||
-- }
|
||||
);
|
||||
```
|
||||
|
||||
*注:为了减少迁移摩擦,我们可以继续使用 `session_data` 表,并指定 `data_type = 'workflow_snapshot'`,但建立专用表更有利于查询和维护。*
|
||||
|
||||
#### B. VGCS (Git 上下文) 的使用规范
|
||||
- **输入**:初始 Commit 包含 `request.json`。
|
||||
- **过程**:每个 Task (Worker) 检出基础 Commit,执行工作,写入文件(报告、日志),并创建 **New Commit**。
|
||||
- **合并**:Orchestrator 负责追踪这些 Commit 的 DAG 关系。
|
||||
- **终态**:Orchestrator 创建最终的“Merge Commit”(可选,或仅引用各叶子节点的 Commit)并在 `workflow_history` 中记录。
|
||||
|
||||
### 3.3. 组件职责划分
|
||||
|
||||
#### 1. Worker 服务 (Report Gen, Providers)
|
||||
- **输入**:接收 `base_commit`, `task_id`, `output_path_config`。
|
||||
- **动作**:
|
||||
- 初始化 `WorkerContext` (VGCS)。
|
||||
- 将 `report.md` 写入 `output_path`。
|
||||
- 将 `_execution.md` (日志) 写入 `log_path`。
|
||||
- **Commit**:提交更改,生成 Commit Hash。
|
||||
- **输出**:返回 `new_commit_hash` 和 `artifact_paths` (Map<Name, Path>) 给 Orchestrator。
|
||||
- **禁止**:Worker 不再直接向数据库的 `analysis_results` 表写入数据。
|
||||
|
||||
#### 2. Workflow Orchestrator (编排器)
|
||||
- **协调**:从 `TaskCompleted` 事件中收集 `new_commit_hash` 和 `artifact_paths`。
|
||||
- **状态追踪**:更新内存中的 DAG 状态。
|
||||
- **完成处理**:
|
||||
- 当所有任务结束后,生成 **Workflow Snapshot**。
|
||||
- 调用 `persistence-service` 将 Snapshot 保存至 `workflow_history`。
|
||||
- 发送 `WorkflowCompleted` 事件。
|
||||
|
||||
#### 3. Data Persistence Service (持久化服务)
|
||||
- **新接口**:`GET /api/v1/history`
|
||||
- 返回 `workflow_history` 列表(摘要信息)。
|
||||
- **新接口**:`GET /api/v1/history/{request_id}`
|
||||
- 返回完整的 Snapshot(详情信息)。
|
||||
- **旧接口处理**:废弃 `GET /api/v1/analysis-results` 或将其重定向为查询 `workflow_history`。
|
||||
|
||||
#### 4. Frontend (前端)
|
||||
- **历史页**:调用 `/api/v1/history`。每个 `request_id` 只展示一行。
|
||||
- **报告页**:
|
||||
- 获取特定的历史详情。
|
||||
- 使用 `artifact_paths` + `commit_hash` 通过 VGCS API (或代理)以此获取文件内容。
|
||||
|
||||
## 4. 实施计划
|
||||
|
||||
1. **Schema 定义**:定义 `WorkflowSnapshot` 结构体及 SQL 迁移脚本 (`workflow_history`)。
|
||||
2. **Orchestrator 改造**:
|
||||
- 修改 `handle_task_completed` 以聚合 `artifact_paths`。
|
||||
- 实现 `finalize_workflow` 逻辑,用于构建并保存 Snapshot。
|
||||
3. **Worker 改造**:
|
||||
- 确保 `report-generator` 在 `TaskResult` 中返回结构化的 `artifact_paths`。
|
||||
- 移除 `report-generator` 中对 `create_analysis_result` 的数据库调用。
|
||||
4. **Persistence Service 改造**:
|
||||
- 实现 `workflow_history` 的 CRUD 操作。
|
||||
5. **Frontend 改造**:
|
||||
- 更新 API 调用以适配新的历史记录接口。
|
||||
|
||||
## 5. 核心收益
|
||||
- **单一事实来源**:文件存 Git,元数据存 DB,杜绝数据不同步。
|
||||
- **历史记录原子性**:一次运行 = 一条记录。
|
||||
- **可追溯性**:每个产物都精确关联到一个 Git Commit。
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
## 项目当前状态
|
||||
|
||||
### 项目目的
|
||||
- **目标**:构建一套面向A股与美股的基本面分析与研究支持系统,聚合股价、财务数据与外部资讯,结合大模型进行结构化分析与自动化报告生成,并支持历史留存与复盘。
|
||||
|
||||
### 当前功能与数据状态
|
||||
- **A股财务数据**:已可正常获取并落库/展示。
|
||||
- **每股口径(per-share)数据**:仅部分可得;个别财务指标存在异常或口径不一致的问题。
|
||||
- 相关定义、口径说明与已知问题,请参见文档:[财务数据字典](./financial_data_dictionary.md)。
|
||||
- **报告与分析**:
|
||||
- 首页输入公司代码与市场,点击“生成报告”后,应展示:
|
||||
- 公司股价
|
||||
- 财务数据
|
||||
- 大模型的分析结论
|
||||
- 目前分析步骤仅采用“提示词”的方式驱动;生成的报告支持保存到数据库。
|
||||
|
||||
### 系统运行与部署
|
||||
- **数据库与网络**:
|
||||
- 数据库部署在许晨公司内部网络环境中;需要内网或通过跳板/映射方式访问。
|
||||
- 本地运行可能存在连接限制。数据库当前未在本机配置,但可按需进行本机配置(需要对 LV 做调整,最终仍以本机配置为主)。
|
||||
- **运行方式**:
|
||||
- 可将项目打包为 Docker 运行,因此也支持纯本地部署。
|
||||
- 提供脚本位于 `scripts/`:
|
||||
- `dev.sh`:调试脚本,同时启动前端与后端
|
||||
- `run.sh`:直接运行脚本
|
||||
- **当前运行环境**:
|
||||
- 目前在许成的一台虚拟机上运行,便于访问内部数据库,并通过 LV 保垒机进行映射。
|
||||
|
||||
### 已知问题/限制
|
||||
- 每股数据覆盖面不全,部分财务指标存在疑似口径或计算问题(详见“财务数据字典”)。
|
||||
- 数据库处于内网环境,外部或本地直连存在门槛;需配置 LV/隧道或切换至本机数据库配置。
|
||||
- 大模型分析仍以提示词工程为主,未融合多源结构化信号。
|
||||
|
||||
### 后续计划(优先级由高到低)
|
||||
1. **完成美股数据获取并校验正确性**(当前最紧要)
|
||||
- 引入更多数据源以提升覆盖面与一致性(如同花顺 iFinD);如能接入 Bloomberg(蓬勃)更佳,但实现难度较高。
|
||||
2. **接入第三方大模型 API**(由许成购买的公司提供)
|
||||
- 数据范围:新闻公告、研究报告、电话会议纪要等。
|
||||
- 具体接入方式尚未确定,需进一步讨论与评估。
|
||||
3. **升级分析框架与规则**
|
||||
- 不再仅依赖提示词;需融合财务数据、股价、新闻与研报等多源信息。
|
||||
- 按模块/主题进行特征组合与权重设计;输出结构化因子与可解释结论。
|
||||
- 具体实现细节需与许成沟通确认。
|
||||
|
||||
### 待定与沟通事项
|
||||
- 本机数据库配置方案与 LV 调整细节(端口、权限、备份策略)。
|
||||
- 第三方大模型 API 的调用配额、上下文长度、费用与合规约束。
|
||||
- 多数据源融合后的字段映射、口径优先级与冲突解决策略。
|
||||
- 前端报告展示结构(股价/财务/大模型结论)的最终版式与交互细节。
|
||||
|
||||
### 参考
|
||||
- 数据口径与字段说明:[财务数据字典](./financial_data_dictionary.md)
|
||||
- 启动/运行脚本:`scripts/dev.sh`、`scripts/run.sh`
|
||||
@ -1,99 +0,0 @@
|
||||
## 2025-11-03 开发日志
|
||||
|
||||
**比较基线**
|
||||
- 上一次提交: b982cd5(2025-10-31 22:14 +0800,“更新前端配置、文档和脚本”)
|
||||
|
||||
**今日概览**
|
||||
- 共修改 20 个文件:新增约 1047 行,删除约 616 行
|
||||
- 关键主题:
|
||||
- 后端数据源抽象与路由重构(引入 DataManager 与多 Provider)
|
||||
- AI 分析模块的编排、依赖解析与流式输出接口
|
||||
- 前端接入 Prisma(PostgreSQL)并新增报告存储 API 与页面
|
||||
- 移除旧 Tushare 客户端实现,统一到新 Provider 架构
|
||||
- 配置、依赖与开发脚本同步更新
|
||||
|
||||
---
|
||||
|
||||
### 后端(FastAPI)
|
||||
|
||||
**数据源抽象与管理**
|
||||
- 新增 `backend/app/data_manager.py`:集中加载 `config/data_sources.yaml`,根据市场(CN/US/HK/JP)优先级选择 Provider;提供统一的 `get_stock_basic`、`get_financial_statements`、`get_daily_price` 等方法。
|
||||
- 新增 Provider 抽象与实现:
|
||||
- 抽象基类:`backend/app/data_providers/base.py`
|
||||
- 实现:`backend/app/data_providers/tushare.py`、`.../yfinance.py`、`.../finnhub.py`、`.../ifind.py`
|
||||
- Token 优先从环境变量读取,次选 `config/config.json`。
|
||||
- 新增配置 `config/data_sources.yaml`:定义各数据源的 `api_key_env` 和各市场的优先顺序。
|
||||
- 移除旧实现 `backend/app/services/tushare_client.py`。
|
||||
|
||||
**路由与业务逻辑重构**
|
||||
- `backend/app/routers/financial.py` 大幅重写:
|
||||
- `GET /data-sources`:返回需要密钥的数据源列表(用于前端指引)。
|
||||
- 分析编排接口:
|
||||
- `POST /china/{ts_code}/analysis`:读取分析模块配置,拓扑排序按依赖顺序执行,汇总结果。
|
||||
- `GET /china/{ts_code}/analysis/{analysis_type}`:单模块分析,自动解析依赖并注入上下文。
|
||||
- `GET /china/{ts_code}/analysis/{analysis_type}/stream`:流式输出纯文本分析内容。
|
||||
- `GET /analysis-config` 与 `PUT /analysis-config`:读取/更新 `config/analysis-config.json`。
|
||||
- `GET /china/{ts_code}`:通过 `DataManager` 批量拉取多年度报表,聚合为前端所需的 `series` 结构。
|
||||
- `GET /china/{ts_code}/company-profile`:使用 LLM 生成公司画像(非流式)。
|
||||
|
||||
**分析客户端**
|
||||
- `backend/app/services/analysis_client.py`:统一的 OpenAI 兼容客户端,支持:
|
||||
- 非流式生成:`generate_analysis(...)`
|
||||
- 流式生成:`generate_analysis_stream(...)`
|
||||
- 安全模板占位符替换与上下文合并;读写 `config/analysis-config.json`。
|
||||
|
||||
**应用入口与依赖**
|
||||
- `backend/app/main.py`:
|
||||
- 增强日志输出(自定义 Handler,开发期强制输出到 stdout)。
|
||||
- 保持 CORS 全开与路由注册(`/api/v1/config/*`、`/api/v1/financials/*`)。
|
||||
- `backend/requirements.txt`:补充 `yfinance`、`finnhub-python`、`pandas`、`PyYAML`、`asyncpg`、`greenlet` 等依赖。
|
||||
|
||||
---
|
||||
|
||||
### 配置与文档
|
||||
- `config/analysis-config.json`:更新分析模块配置(依赖、模型、模板)。
|
||||
- `docs/user-guide.md`:小幅补充。
|
||||
|
||||
---
|
||||
|
||||
### 前端(Next.js 15 / React 19)
|
||||
|
||||
**Prisma 集成与报告 API**
|
||||
- 新增 Prisma:
|
||||
- `frontend/prisma/schema.prisma` 定义 `Report { id, symbol, content(Json), createdAt }`。
|
||||
- `frontend/src/lib/prisma.ts` 提供 PrismaClient 单例。
|
||||
- 依赖更新:`@prisma/client`、`prisma` 等;`package-lock.json` 同步。
|
||||
- 新增 API:
|
||||
- `GET/POST /api/reports`:分页查询与创建报告(基本校验)。
|
||||
- `GET /api/reports/[id]`:按 ID 获取报告。
|
||||
|
||||
**页面与其他改动**
|
||||
- `frontend/src/app/reports/page.tsx`:渲染报告列表并跳转至详情页 `reports/[id]`。
|
||||
- 新增 `frontend/src/app/reports/[id]/page.tsx`:报告详情页。
|
||||
- `frontend/src/app/report/[symbol]/page.tsx`:较大调整(生成/展示逻辑整合)。
|
||||
- 其他微调:`layout.tsx`、`api/financials/[...slug]/route.ts`、`.gitignore`、`next.config.mjs`。
|
||||
- 运行脚本端口:`npm run dev` 默认 3001。
|
||||
|
||||
---
|
||||
|
||||
### 脚本
|
||||
- `scripts/dev.sh`:增强启动/开发流程。
|
||||
- 若干测试脚本小改:`scripts/test-*.py`。
|
||||
- 新增过渡脚本 `scripts/tushare_legacy_client.py`(旧 Tushare 逻辑备份/兼容)。
|
||||
|
||||
---
|
||||
|
||||
### 删除/新增(关键)
|
||||
- 删除:`backend/app/services/tushare_client.py`。
|
||||
- 新增(尚未提交的工作副本):
|
||||
- 后端:`backend/app/data_manager.py`、`backend/app/data_providers/*`
|
||||
- 配置:`config/data_sources.yaml`
|
||||
- 前端:`frontend/prisma/schema.prisma`、`frontend/src/lib/prisma.ts`、`frontend/src/app/api/reports/*`、`frontend/src/app/reports/[id]/page.tsx`
|
||||
|
||||
---
|
||||
|
||||
### 备注
|
||||
- 需确保各数据源的密钥通过环境变量或 `config/config.json` 正确配置。
|
||||
- 分析模块配置(依赖与模板)变更需同步前后端约定。
|
||||
|
||||
|
||||
@ -1,74 +0,0 @@
|
||||
## 2025-11-04 开发日志
|
||||
|
||||
**今日概览**
|
||||
- 关键主题:
|
||||
- 新增财务指标支持:在 Tushare 数据源中实现并集成了员工人数、股东户数、研发人员、所得税与利润总额关系等关键指标。
|
||||
- 全栈功能贯通:完成了从后端数据获取、API 暴露到前端报告页展示的完整开发链路。
|
||||
- 技术债清理:移除了多个陈旧的、功能单一的测试脚本,整合测试逻辑。
|
||||
- 文档同步:更新了用户手册,以反映新增功能。
|
||||
|
||||
---
|
||||
|
||||
### 后端(FastAPI)
|
||||
|
||||
**数据源 (Tushare Provider)**
|
||||
- `backend/app/data_providers/tushare.py`:
|
||||
- 新增 `get_employee_number` 方法,用于获取上市公司员工人数及构成(技术、生产、销售、行政)。
|
||||
- 新增 `get_holder_number` 方法,用于获取股东户数及变化。
|
||||
- 新增 `get_tax_to_ebt` 方法,用于计算所得税与利润总额的比例,以分析税负情况。
|
||||
- 可能对现有财务报表获取逻辑进行了优化,以支持新指标的整合。
|
||||
|
||||
**API 路由与模型 (Financial Router & Schemas)**
|
||||
- `backend/app/routers/financial.py`:
|
||||
- 在 `GET /china/{ts_code}` 聚合接口中,新增了对员工人数、股东户数、税收数据的调用和组装逻辑。
|
||||
- 确保新指标能够正确地合并到返回给前端的 `series` 数据结构中。
|
||||
- `backend/app/schemas/financial.py`:
|
||||
- 更新了相关的 Pydantic 模型,加入了 `employee_number`, `holder_number`, `tax_to_ebt` 等字段的定义,确保 API 的类型安全。
|
||||
|
||||
**数据管理器 (Data Manager)**
|
||||
- `backend/app/data_manager.py`:
|
||||
- 对 `DataManager` 进行了相应修改,使其能够统一调度 Tushare Provider 提供的新数据接口。
|
||||
|
||||
---
|
||||
|
||||
### 前端(Next.js)
|
||||
|
||||
**财务报告页面**
|
||||
- `frontend/src/app/report/[symbol]/page.tsx`:
|
||||
- 在报告页中新增了图表或表格,用于可视化展示员工人数变化、股东户数趋势以及所得税与利润总额的关系。
|
||||
- 调整了页面布局和组件,以容纳新的数据模块。
|
||||
- `frontend/src/app/reports/[id]/page.tsx`:
|
||||
- 对已保存的报告页面进行了适配,确保在加载旧报告或包含新指标的报告时能够正确渲染。
|
||||
|
||||
**工具函数与类型**
|
||||
- `frontend/src/lib/financial-utils.ts`:
|
||||
- 添加了处理新财务指标(如格式化员工数据、计算股东户数环比变化等)的辅助函数。
|
||||
- `frontend/src/types/index.ts`:
|
||||
- 更新了 TypeScript 类型定义,增加了与新后端模型对应的接口。
|
||||
|
||||
**其他**
|
||||
- `frontend/package.json`: 可能更新了某些依赖库以支持新的图表或功能。
|
||||
- `frontend/src/lib/prisma.ts`: 可能调整了 Prisma 客户端的配置或扩展。
|
||||
|
||||
---
|
||||
|
||||
### 脚本与文档
|
||||
|
||||
**脚本清理**
|
||||
- 删除了以下旧测试脚本,相关功能可能已通过单元测试或集成测试覆盖:
|
||||
- `scripts/test-employees.py`
|
||||
- `scripts/test-holder-number.py`
|
||||
- `scripts/test-holder-processing.py`
|
||||
- `scripts/test-tax-to-ebt.py`
|
||||
- `scripts/test-api-tax-to-ebt.py`
|
||||
- `scripts/test-config.py`
|
||||
- 删除了 `scripts/tushare_legacy_client.py`,完成了向新 Provider 架构的迁移。
|
||||
|
||||
**文档**
|
||||
- `docs/user-guide.md`: 更新了用户指南,加入了关于如何解读新增财务指标(员工、股东、税收)的说明。
|
||||
|
||||
---
|
||||
|
||||
### 备注
|
||||
- 本次更新丰富了公司的非财报基本面数据,为分析提供了更多维度。
|
||||
- 前端报告页的性能在增加了新图表后需要进一步观察。
|
||||
@ -1,86 +0,0 @@
|
||||
## 2025-11-06 开发日志
|
||||
|
||||
**今日概览**
|
||||
- 多市场“昨日快照”接口落地(CN/US/HK/JP)并在报告页新增快照卡片
|
||||
- 美股数据链路增强:Finnhub 支持 SDK+HTTP 兜底与标准化映射,YFinance 兼容CN代码规则
|
||||
- 财务报表聚合与 period 统一(YYYYMMDD),去重并按年限裁剪;日度行情/估值按报告期对齐
|
||||
- 分析执行体验优化:顺序流式、可停止/继续、单模块重试、耗时/进度统计
|
||||
- 配置与文档:Prisma 读取 `config/config.json` 数据库URL;补充“财务数据字典”和项目状态说明
|
||||
|
||||
---
|
||||
|
||||
### 后端(FastAPI)
|
||||
|
||||
**DataManager 与数据源策略**
|
||||
- `backend/app/data_manager.py`
|
||||
- 只从 `config/config.json` 读取各 Provider 的 `api_key`(不再读取环境变量),初始化受控更清晰
|
||||
- 依据 `config/data_sources.yaml` 的市场优先级,按序尝试 `tushare`、`yfinance`、`finnhub` 等 Provider
|
||||
- 统一 `get_data` 成功判定与异常兜底,支持多返回类型(list/dict/标量)
|
||||
- `get_financial_statements`:将扁平报表数据规范化为 `series` 结构,确保数值可序列化
|
||||
|
||||
**Finnhub Provider(美股重点)**
|
||||
- `backend/app/data_providers/finnhub.py`
|
||||
- 初始化增加 Token 掩码日志;SDK 失败时自动回退到 HTTP 接口(`profile2`、`financials-reported`)
|
||||
- `get_stock_basic` 标准化公司基本信息;`get_daily_price` 输出 `{trade_date, open, high, low, close, vol}`
|
||||
- `get_financial_statements` 将 `financials-reported` 年度数据映射为内部字段,计算派生比率:`grossprofit_margin`、`netprofit_margin`、`roa`、`roe`,并直接输出 `series`
|
||||
|
||||
**YFinance Provider(兼容与映射)**
|
||||
- `backend/app/data_providers/yfinance.py`
|
||||
- 适配中国代码:`.SH -> .SS`,`.SZ` 优先尝试无后缀
|
||||
- 规范化 `stock_basic`、日线行情与年度财务(合并利润表/资产负债表/现金流),提供基础字段重命名
|
||||
|
||||
**财务与分析路由**
|
||||
- `backend/app/routers/financial.py`
|
||||
- 新增“昨日快照”接口:
|
||||
- `GET /api/financials/china/{ts_code}/snapshot`:优先 `daily_basic`,兜底 `daily`
|
||||
- `GET /api/financials/{market}/{stock_code}/snapshot`:CN 复用上式;其他市场用日线在近10日内回看最近交易日
|
||||
- `GET /api/financials/{market}/{stock_code}`:
|
||||
- 一次性拉取并聚合年度财报 `series`;识别当年最新报告期,将估值/股价按报告期映射
|
||||
- 统一 `period`(优先 YYYYMMDD,缺失则由 `year` 映射为 `YYYY1231`),去重、排序并按 `years` 裁剪
|
||||
- 保持分析编排/单模块/流式接口与分析配置读写接口,便于前端顺序流式展示
|
||||
|
||||
---
|
||||
|
||||
### 前端(Next.js 15 / React 19)
|
||||
|
||||
**报告页体验**
|
||||
- `frontend/src/app/report/[symbol]/page.tsx`
|
||||
- 新增“昨日快照”卡片:日期、股价、PE、PB、股息率、总市值(亿元)
|
||||
- 分析执行:顺序流式、可停止/继续、总进度与耗时、单模块“重新生成分析”
|
||||
- 财务表格:统一 `period`,扩充指标(人均效率、费用率、资产占比、周转能力、市场表现等)并突出关键阈值
|
||||
|
||||
**数据获取 Hooks**
|
||||
- `frontend/src/hooks/useApi.ts`
|
||||
- 新增 `useChinaSnapshot`、`useSnapshot`;统一市场参数(china/us/hk/jp)与 SWR 策略
|
||||
- `fetcher` 强化:兼容非标准 JSON 的错误返回,统一抛出可读错误
|
||||
|
||||
**Prisma 适配**
|
||||
- `frontend/src/lib/prisma.ts`
|
||||
- 从 `config/config.json` 动态解析数据库URL;将 `postgresql+asyncpg://` 转换为 Prisma 需要的 `postgresql://`,默认追加 `schema=public`
|
||||
- 开发环境下复用单例,减少连接开销
|
||||
|
||||
---
|
||||
|
||||
### 文档
|
||||
- 新增:`docs/financial_data_dictionary.md`(统一字段口径与来源映射)
|
||||
- 更新:`docs/project-status.md`(现状、限制与后续计划);`docs/user-guide.md`(报告页、快照与分析流程)
|
||||
- 清理:删除 `docs/tasks.md`
|
||||
|
||||
---
|
||||
|
||||
### 风险与注意事项
|
||||
- Provider 密钥现仅从 `config/config.json` 读取;未配置将跳过需密钥的数据源(日志有警告)
|
||||
- 美股字段映射存在口径差异,个别指标为近似计算,需结合“财务数据字典”持续校验
|
||||
- 单模块分析尝试附带最近年度财报上下文,边界与稳定性需结合真实数据回归
|
||||
|
||||
---
|
||||
|
||||
### 验收建议
|
||||
- 快照:
|
||||
- CN: `GET /api/financials/china/600519.SH/snapshot`
|
||||
- US: `GET /api/financials/us/AAPL/snapshot`
|
||||
- 报告页:访问 `/report/600519?market=china` 验证快照卡片、顺序流式与保存按钮
|
||||
- 多源融合:调整 `config/data_sources.yaml` 优先级,观察回退与成功率日志
|
||||
- 数据库:在无 `.env` 的场景下,确认 Prisma 能从 `config/config.json` 正确加载连接串
|
||||
|
||||
|
||||
@ -1,198 +0,0 @@
|
||||
# Rust 数据持久化服务设计 (`rust_data_service_design.md`)
|
||||
|
||||
## 1. 服务定位与核心职责
|
||||
|
||||
- **服务名称**: `data-persistence-service`
|
||||
- **核心定位**: 本服务是整个微服务架构中**唯一的数据持久化层**。它是数据库的**独占所有者 (Sole Owner)**,负责管理所有与数据库的交互。
|
||||
|
||||
### 1.1. 职责边界:核心实体服务
|
||||
|
||||
本服务被设计为**核心实体数据服务**,而非一个包罗万象的、管理所有数据的“上帝服务”。它的职责被严格限定在管理那些**跨多个业务领域共享的核心数据实体**上。
|
||||
|
||||
这种设计遵循了一种务实的**混合微服务数据模式**:
|
||||
- **核心数据集中管理**: 保证了通用数据的唯一性和一致性。我们定义的核心实体包括:
|
||||
- 公司基本信息 (`company_profiles`)
|
||||
- 标准化财务数据 (`time_series_financials`)
|
||||
- 标准化市场数据 (`daily_market_data`)
|
||||
- AI分析结果 (`analysis_results`) - 作为一种可被多方消费的核心产出物。
|
||||
- **业务数据独立持久化**: 未来新增的、具有独立业务领域的微服务(例如“量化回测服务”)将被**允许并鼓励拥有和管理自己的数据库 Schema 或表**。这些新服务在需要核心实体数据时,应通过调用本服务提供的 API 来获取,而不是直接连接数据库。
|
||||
|
||||
这一策略确保了核心数据的一致性,同时为新服务的独立开发和快速迭代提供了最大的灵活性。
|
||||
|
||||
## 2. 技术选型与开发范式
|
||||
|
||||
### 2.1. 核心技术栈
|
||||
- **语言**: **Rust**
|
||||
- **开发套件**: **`service_kit`** (项目内置的一站式微服务开发套件)
|
||||
- **Web 框架**: **`axum`**
|
||||
- **数据库交互**: **`sqlx`**
|
||||
- **序列化/反序列化**: **`serde`** (由 `service_kit` 自动集成)
|
||||
|
||||
### 2.2. 开发范式:API 规范驱动
|
||||
|
||||
我们将采纳 `service_kit` 提供的、以 **OpenAPI** 规范为核心的开发范式。
|
||||
- **数据契约**: 所有的数据传输对象 (DTOs) 都将使用 `service_kit` 提供的 `#[api_dto]` 宏进行标注。此宏会自动派生 `serde` 和 `utoipa::ToSchema`,确保我们的 Rust 代码即是 API 规范的“唯一事实源”。
|
||||
- **前后端协同**: 我们将使用 `cargo forge generate-types` 命令,从服务自动生成的 OpenAPI 规范中,为前端项目生成 TypeScript 类型定义,实现端到端的类型安全。
|
||||
- **数据交换格式**: 服务间的数据交换格式依然是 **JSON**。
|
||||
|
||||
## 3. API 端点设计 (API Endpoint Design)
|
||||
|
||||
API 的设计严格服务于对核心实体的通用读写操作。
|
||||
|
||||
---
|
||||
|
||||
### 3.1. 公司信息 (`/companies`)
|
||||
|
||||
- **对应表**: `company_profiles`
|
||||
|
||||
| Method | Endpoint | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `PUT` | `/api/v1/companies` | 创建或更新(Upsert)一个公司的基本信息 |
|
||||
| `GET` | `/api/v1/companies/{symbol}` | 获取指定公司的基本信息 |
|
||||
|
||||
---
|
||||
|
||||
### 3.2. 市场与财务数据 (`/market-data`)
|
||||
|
||||
- **对应表**: `time_series_financials`, `daily_market_data`
|
||||
|
||||
| Method | Endpoint | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `POST` | `/api/v1/market-data/financials/batch` | 批量写入多条时间序列财务指标 |
|
||||
| `GET` | `/api/v1/market-data/financials/{symbol}` | 查询某公司的财务指标 (支持按 `metrics`, `start_date`, `end_date` 过滤) |
|
||||
| `POST` | `/api/v1/market-data/daily/batch` | 批量写入多条每日市场行情数据 |
|
||||
| `GET` | `/api/v1/market-data/daily/{symbol}` | 查询某公司的每日行情 (支持按 `start_date`, `end_date` 过滤) |
|
||||
|
||||
---
|
||||
|
||||
### 3.3. AI 分析结果 (`/analysis-results`)
|
||||
|
||||
- **对应表**: `analysis_results`
|
||||
|
||||
| Method | Endpoint | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `POST` | `/api/v1/analysis-results` | 保存一条新的 AI 分析结果 |
|
||||
| `GET` | `/api/v1/analysis-results` | 查询分析结果列表 (支持按 `symbol`, `module_id` 过滤) |
|
||||
| `GET` | `/api/v1/analysis-results/{id}` | 获取单条分析结果的详情 |
|
||||
|
||||
---
|
||||
|
||||
### 3.4. 系统配置 (`/system-config`)
|
||||
|
||||
- **对应表**: `system_config`
|
||||
|
||||
| Method | Endpoint | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `PUT` | `/api/v1/system-config/{key}` | 创建或更新一条键值对配置 |
|
||||
| `GET` | `/api/v1/system-config/{key}` | 获取一条键值对配置 |
|
||||
|
||||
|
||||
## 4. 数据传输对象 (DTOs)
|
||||
|
||||
所有 API 的请求体和响应体都将使用 `service_kit` 的 `#[api_dto]` 宏进行定义,以自动获得序列化、API Schema 生成和调试能力。
|
||||
|
||||
```rust
|
||||
use service_kit::macros::api_dto;
|
||||
|
||||
// 示例:用于批量写入财务数据的 DTO
|
||||
#[api_dto]
|
||||
pub struct TimeSeriesFinancialDto {
|
||||
pub symbol: String,
|
||||
pub metric_name: String,
|
||||
pub period_date: chrono::NaiveDate,
|
||||
pub value: f64,
|
||||
pub source: Option<String>,
|
||||
}
|
||||
|
||||
// 示例:用于创建 AI 分析结果的 DTO
|
||||
#[api_dto]
|
||||
pub struct NewAnalysisResultDto {
|
||||
pub symbol: String,
|
||||
pub module_id: String,
|
||||
pub model_name: Option<String>,
|
||||
pub content: String,
|
||||
pub meta_data: Option<serde_json::Value>,
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 开发流程与工具链
|
||||
|
||||
本服务将完全遵循 `service_kit` 提供的标准化开发流程。
|
||||
- **项目初始化**: 使用 `cargo generate --git <repo_url> service-template` 创建服务骨架。
|
||||
- **质量保障**:
|
||||
- 代码风格检查: `cargo forge lint`
|
||||
- 单元与集成测试: `cargo forge test`
|
||||
- **API 调试与交互**: 使用 `forge-cli` 工具,通过 `cargo forge <command>` 与正在运行的服务进行交互式 API 调用和调试。
|
||||
- **前端协同**: 在 CI/CD 流程或本地开发中,通过 `cargo forge generate-types` 命令,自动将本服务的 API 类型同步到前端项目。
|
||||
|
||||
|
||||
## 6. 项目结构(建议)
|
||||
|
||||
```
|
||||
/data-persistence-service
|
||||
├── Cargo.toml
|
||||
└── src/
|
||||
├── main.rs # 应用入口, 初始化数据库连接池, 定义路由
|
||||
├── error.rs # 统一的错误处理类型
|
||||
├── db.rs # 数据库交互逻辑 (使用 sqlx)
|
||||
├── models.rs # 数据库表对应的结构体
|
||||
├── dtos.rs # API 请求/响应对应的结构体
|
||||
└── api/
|
||||
├── mod.rs
|
||||
├── companies.rs
|
||||
├── market_data.rs
|
||||
└── analysis.rs
|
||||
```
|
||||
|
||||
## 7. 实施计划 (Implementation Plan & To-Do List)
|
||||
|
||||
本部分将开发 `data-persistence-service` 的过程分解为一系列可执行、可追踪的任务。
|
||||
|
||||
### Phase 1: 项目初始化与基础设置
|
||||
|
||||
- [x] **T1.1**: 使用 `cargo generate` 和 `service-template` 在 `services/data-persistence-service` 目录下初始化新项目。
|
||||
- [x] **T1.2**: 清理模板中的示例代码(如 `hello` 模块)。
|
||||
- [x] **T1.3**: 配置 `Cargo.toml`,添加 `sqlx` (with `postgres`, `runtime-tokio-rustls`, `chrono`, `uuid`, `json`), `axum`, `tokio`, `serde` 等核心依赖。
|
||||
- [x] **T1.4**: 设置 `.env` 文件,用于管理 `DATABASE_URL` 等环境变量。
|
||||
- [x] **T1.5**: 在 `main.rs` 中建立与 PostgreSQL 的数据库连接池 (`sqlx::PgPool`)。
|
||||
|
||||
### Phase 2: 数据库集成与迁移
|
||||
|
||||
- [x] **T2.1**: 安装 `sqlx-cli` (`cargo install sqlx-cli`)。
|
||||
- [x] **T2.2**: 使用 `sqlx-cli` 初始化迁移目录 (`sqlx migrate add create_initial_tables`)。
|
||||
- [x] **T2.3**: 在生成的迁移 SQL 文件中,编写 `CREATE TABLE` 语句,创建 `docs/database_schema_design.md` 中定义的所有表 (`company_profiles`, `time_series_financials` 等)。
|
||||
- [x] **T2.4**: 在迁移 SQL 文件中,为时序表 (`time_series_financials`, `daily_market_data`) 添加 `create_hypertable` 命令。
|
||||
- [x] **T2.5**: 运行 `sqlx migrate run` 应用迁移,并在数据库中验证表结构是否正确创建。
|
||||
- [x] **T2.6**: 在 `src/models.rs` 中,根据数据库表结构,编写对应的 Rust 结构体。
|
||||
|
||||
### Phase 3: 核心 API 实现
|
||||
|
||||
- [x] **T3.1**: **Companies API**:
|
||||
- [x] 在 `src/dtos.rs` 中创建 `CompanyProfileDto`。
|
||||
- [x] 在 `src/db.rs` 中实现 `upsert_company` 和 `get_company_by_symbol` 数据库操作函数。
|
||||
- [x] 在 `src/api/companies.rs` 中创建 `PUT /api/v1/companies` 和 `GET /api/v1/companies/{symbol}` 的 `axum` handler,并连接到 `db` 函数。
|
||||
- [x] **T3.2**: **Market Data API**:
|
||||
- [x] 在 `src/dtos.rs` 中创建 `TimeSeriesFinancialDto` 和 `DailyMarketDataDto`。
|
||||
- [x] 在 `src/db.rs` 中实现 `batch_insert_financials` 和 `get_financials_by_symbol` 函数。
|
||||
- [x] 在 `src/db.rs` 中实现 `batch_insert_daily_data` 和 `get_daily_data_by_symbol` 函数。
|
||||
- [x] 在 `src/api/market_data.rs` 中创建对应的 `axum` handlers 和路由。
|
||||
- [x] **T3.3**: **Analysis Results API**:
|
||||
- [x] 在 `src/dtos.rs` 中创建 `NewAnalysisResultDto` 和 `AnalysisResultDto`。
|
||||
- [x] 在 `src/db.rs` 中实现 `create_analysis_result` 和 `get_analysis_results` 函数。
|
||||
- [x] 在 `src/api/analysis.rs` 中创建对应的 `axum` handlers 和路由。
|
||||
- [x] **T3.4**: 在 `main.rs` 中,将所有 API 路由组合起来。
|
||||
|
||||
### Phase 4: 容器化与集成
|
||||
|
||||
- [x] **T4.1**: 编写多阶段 `Dockerfile`,优化镜像大小和构建速度。
|
||||
- [x] **T4.2**: 在根目录的 `docker-compose.yml` 中,添加 `data-persistence-service` 的定义,并配置其依赖 `postgres-db`。
|
||||
- [x] **T4.3**: 修改 `Tiltfile` 以包含新的 Rust 服务,确保 `tilt up` 可以成功构建并运行该服务。
|
||||
- [x] **T4.4**: **(集成点)** 修改现有的 Python `backend` 服务,使其不再直接连接数据库,而是通过 HTTP 请求调用 `data-persistence-service` 的 API 来读写数据。
|
||||
|
||||
### Phase 5: 测试与文档
|
||||
|
||||
- [x] **T5.1**: 为 `db.rs` 中的每个数据库操作函数编写单元测试(需要 `sqlx` 的 test-macros 特性)。
|
||||
- [x] **T5.2**: 为每个 API 端点编写集成测试。
|
||||
- [ ] **T5.3**: 使用 `#[api_dto]` 宏确保所有 DTO 都已正确集成到 OpenAPI 规范中。
|
||||
- [ ] **T5.4**: 运行 `cargo forge generate-types`,验证能否成功生成 TypeScript 类型文件。
|
||||
- [ ] **T5.5**: 编写 `README.md`,说明如何本地启动、配置和测试该服务。
|
||||
@ -1,166 +0,0 @@
|
||||
# 微服务架构重构计划
|
||||
|
||||
## 1. 引言
|
||||
|
||||
### 1.1. 文档目的
|
||||
|
||||
本文档旨在为“基本面选股系统”从单体架构向微服务架构的演进提供一个全面的设计蓝图和分阶段的实施路线图。它将详细阐述目标架构、服务划分、技术栈选型以及具体的迁移步骤,作为后续开发工作的核心指导文件。
|
||||
|
||||
### 1.2. 重构目标与收益
|
||||
|
||||
当前系统采用的是经典的前后端分离的单体架构。为了应对未来更复杂的需求、提升系统的可维护性、可扩展性并实现关键模块的独立部署与扩缩容,我们决定将其重构为微服务架构。
|
||||
|
||||
主要收益包括:
|
||||
- **高内聚、低耦合**: 每个服务只关注单一业务职责,易于理解和维护。
|
||||
- **独立部署与交付**: 可以对单个服务进行修改、测试和部署,而不影响整个系统,加快迭代速度。
|
||||
- **技术异构性**: 未来可以为不同的服务选择最适合的技术栈。
|
||||
- **弹性伸缩**: 可以根据负载情况,对高负荷的服务(如AI分析服务)进行独立扩容。
|
||||
- **故障隔离**: 单个服务的故障不会导致整个系统崩溃。
|
||||
|
||||
## 2. 目标架构设计
|
||||
|
||||
我们将采用以 `API网关` 为核心的微服务架构模式。前端应用将通过网关与后端一系列独立的微服务进行通信。
|
||||
|
||||

|
||||
|
||||
### 2.1. 服务划分 (Service Breakdown)
|
||||
|
||||
现有的后端应用将被拆分为以下几个核心微服务:
|
||||
|
||||
| 服务名称 | 容器名 (`docker-compose`) | 核心职责 |
|
||||
| :--- | :--- | :--- |
|
||||
| **前端应用** | `frontend-web` | **(保持不变)** Next.js UI,负责用户交互。 |
|
||||
| **API网关** | `api-gateway` | **(新增)** 系统统一入口。负责请求路由、认证、限流、日志聚合。将前端请求转发到正确的内部服务。 |
|
||||
| **报告编排器** | `report-orchestrator` | **(由原后端演变)** 负责报告生成的业务工作流。接收请求,调用数据、分析等服务,编排整个流程。 |
|
||||
| **数据聚合服务**| `data-aggregator` | **(新增)** 封装所有对第三方数据源(Tushare, Finnhub等)的API调用,并提供统一的数据接口,内置缓存逻辑。 |
|
||||
| **AI分析服务** | `analysis-service` | **(新增)** 专门负责与大语言模型(Gemini)交互。将其独立出来便于未来单独扩容或部署到GPU服务器。 |
|
||||
| **配置服务** | `config-service` | **(新增)** 集中管理并提供所有动态配置(API密钥、Prompt模板等),实现配置的动态更新与统一管理。 |
|
||||
| **数据库** | `postgres-db` | **(保持不变)** 独立的PostgreSQL数据库容器,为所有服务提供持久化存储。 |
|
||||
|
||||
### 2.2. 技术栈与开发环境
|
||||
|
||||
- **容器化**: `Docker`
|
||||
- **服务编排**: `Docker Compose`
|
||||
- **开发环境管理**: `Tilt`
|
||||
- **服务间通信**: 同步通信采用轻量级的 `RESTful API (HTTP)`。对于长任务,未来可引入 `RabbitMQ` 或 `Redis Stream` 等消息队列实现异步通信。
|
||||
|
||||
### 2.3. 项目根目录清洁化 (Root Directory Cleanup)
|
||||
|
||||
根据约定,项目根目录应保持整洁,只存放与整个项目和微服务编排直接相关的顶级文件和目录。所有业务代码、独立应用的配置和脚本工具都应被归纳到合适的子目录中。
|
||||
|
||||
- **`services/` 目录**: 所有微服务(包括 `frontend` 和 `backend`)的代码都将迁移至此目录下。
|
||||
- **`deployment/` 目录**: 用于存放与生产环境部署相关的配置文件(例如,`pm2.config.js`)。
|
||||
- **`scripts/` 目录**: 用于存放各类开发、构建、工具类脚本(例如,`dev.py`, 根目录的 `package.json` 等)。
|
||||
- **`.gitignore`**: 应添加规则以忽略开发者个人工具和二进制文件(例如,`portwardenc-amd64`)。
|
||||
|
||||
## 3. 分阶段实施计划
|
||||
|
||||
我们将采用增量、迭代的方式进行重构,确保每一步都是可验证、低风险的。
|
||||
|
||||
### 阶段 0: 容器化现有单体应用
|
||||
|
||||
**目标**: 在不修改任何业务代码的前提下,将现有的 `frontend` 和 `backend` 应用容器化,并使用 `docker-compose` 和 `Tilt` 运行起来。这是验证容器化环境和开发流程的第一步。
|
||||
|
||||
**任务**:
|
||||
1. 在项目根目录创建 `docker-compose.yml` 文件,定义 `frontend`, `backend`, `postgres-db` 三个服务。
|
||||
2. 分别为 `frontend` 和 `backend` 目录创建 `Dockerfile`。
|
||||
3. 在项目根目录创建 `Tiltfile`,并配置其加载 `docker-compose.yml`。
|
||||
4. 调整配置文件(如 `NEXT_PUBLIC_BACKEND_URL` 和 `DATABASE_URL` 环境变量),使其适应Docker内部网络。
|
||||
5. **验证**: 运行 `tilt up`,整个应用能够像在本地一样正常启动和访问。
|
||||
|
||||
---
|
||||
|
||||
### 阶段 1: 拆分配置服务 (`config-service`)
|
||||
|
||||
**目标**: 将配置管理逻辑从主后端中剥离,创建第一个真正的微服务。这是一个理想的起点,因为它依赖项少,风险低。
|
||||
|
||||
**任务**:
|
||||
1. 创建新目录 `services/config-service`。
|
||||
2. 在该目录中初始化一个新的、轻量级的FastAPI应用。
|
||||
3. 将原 `backend` 中所有读取 `config/` 目录文件的逻辑(如 `ConfigManager`) 迁移至 `config-service`。
|
||||
4. 在 `config-service` 中暴露API端点,例如 `GET /api/v1/system`, `GET /api/v1/analysis-modules`。
|
||||
5. 在 `docker-compose.yml` 中新增 `config-service` 的定义。
|
||||
6. 修改原 `backend` 应用,移除本地文件读取逻辑,改为通过HTTP请求从 `config-service` 获取配置。
|
||||
|
||||
---
|
||||
|
||||
### 阶段 2: 拆分数据聚合服务 (`data-aggregator`)
|
||||
|
||||
**目标**: 将所有与外部数据源的交互逻辑隔离出来。
|
||||
|
||||
**任务**:
|
||||
1. 创建新目录 `services/data-aggregator`。
|
||||
2. 将原 `backend/app/data_providers` 目录及相关的数据获取和处理逻辑整体迁移到新服务中。
|
||||
3. 为新服务定义清晰的API,例如 `GET /api/v1/financials/{symbol}`。
|
||||
4. 在 `docker-compose.yml` 中新增 `data-aggregator` 服务的定义。
|
||||
5. 修改原 `backend` 应用,将调用本地数据模块改为通过HTTP请求调用 `data-aggregator` 服务。
|
||||
|
||||
---
|
||||
|
||||
### 阶段 3: 拆分AI分析服务 (`analysis-service`)
|
||||
|
||||
**目标**: 隔离计算密集型且可能需要特殊硬件资源的AI调用逻辑。
|
||||
|
||||
**任务**:
|
||||
1. 创建新目录 `services/analysis-service`。
|
||||
2. 将原 `backend/app/services/analysis_client.py` 及相关的Gemini API调用逻辑迁移到新服务中。
|
||||
3. 定义API,例如 `POST /api/v1/analyze`,接收上下文数据和prompt,返回分析结果。
|
||||
4. 在 `docker-compose.yml` 中新增 `analysis-service` 的定义。
|
||||
5. 修改原 `backend` 应用,将直接调用SDK改为通过HTTP请求调用 `analysis-service`。
|
||||
|
||||
---
|
||||
|
||||
### 阶段 4: 引入API网关 (`api-gateway`) 并重塑编排器
|
||||
|
||||
**目标**: 建立统一的外部入口,并正式将原 `backend` 的角色明确为 `report-orchestrator`。
|
||||
|
||||
**任务**:
|
||||
1. 创建新目录 `services/api-gateway`,并初始化一个FastAPI应用。
|
||||
2. 在 `api-gateway` 中配置路由规则,将来自前端的请求(如 `/api/config/*`, `/api/financials/*`)代理到对应的内部微服务 (`config-service`, `report-orchestrator` 等)。
|
||||
3. 更新 `docker-compose.yml`,将前端端口暴露给主机,而其他后端服务仅在内部网络可达。
|
||||
4. 修改 `frontend` 的 `NEXT_PUBLIC_BACKEND_URL` 指向 `api-gateway`。
|
||||
5. 此时,原 `backend` 的代码已经精简,主要剩下编排逻辑。我们可以考虑将其目录重命名为 `services/report-orchestrator`,以准确反映其职责。
|
||||
|
||||
## 4. 最终项目目录结构(设想)
|
||||
|
||||
重构完成后,项目目录结构将演变为:
|
||||
|
||||
```
|
||||
/home/lv/nvm/works/Fundamental_Analysis/
|
||||
├── docker-compose.yml
|
||||
├── Tiltfile
|
||||
├── README.md
|
||||
├── .gitignore
|
||||
├── services/
|
||||
│ ├── frontend/
|
||||
│ │ └── Dockerfile
|
||||
│ ├── api-gateway/
|
||||
│ │ ├── app/
|
||||
│ │ └── Dockerfile
|
||||
│ ├── config-service/
|
||||
│ │ ├── app/
|
||||
│ │ └── Dockerfile
|
||||
│ ├── data-aggregator/
|
||||
│ │ ├── app/
|
||||
│ │ └── Dockerfile
|
||||
│ ├── analysis-service/
|
||||
│ │ ├── app/
|
||||
│ │ └── Dockerfile
|
||||
│ └── report-orchestrator/ # 由原 backend 演变而来
|
||||
│ ├── app/
|
||||
│ └── Dockerfile
|
||||
├── deployment/
|
||||
│ └── pm2.config.js
|
||||
├── scripts/
|
||||
│ ├── dev.py
|
||||
│ └── package.json # 原根目录的 package.json
|
||||
├── config/ # 静态配置文件,由 config-service 读取
|
||||
└── docs/
|
||||
└── microservice_refactoring_plan.md
|
||||
```
|
||||
|
||||
## 5. 结论
|
||||
|
||||
本计划提供了一个从单体到微服务的清晰、可行的演进路径。通过分阶段、增量式的重构,我们可以平稳地完成架构升级,同时确保在每个阶段结束后,系统都处于可工作、可验证的状态。
|
||||
|
||||
请您审阅此计划。如有任何疑问或建议,我们可以随时讨论和调整。
|
||||
@ -1,67 +0,0 @@
|
||||
# 美国市场数据集成任务清单
|
||||
|
||||
本文档用于跟踪和管理为项目集成美国市场数据(使用 Finnhub 作为数据源)所需的各项开发任务。
|
||||
|
||||
## 任务列表
|
||||
|
||||
- [x] **后端:实现 FinnhubProvider 数据映射**
|
||||
- **目标**:根据 `docs/financial_data_dictionary.md` 中的定义,在 `backend/app/data_providers/finnhub.py` 文件中,完成从 Finnhub API 原始数据到系统标准字段的完整映射。
|
||||
- **关键点**:
|
||||
- [x] 处理直接映射的字段。
|
||||
- [x] 实现所有需要通过计算得出的衍生指标。
|
||||
- [x] 确保处理 `null` 或空值,避免计算错误。
|
||||
- [x] 验证返回的数据结构符合 `DataManager` 的预期。
|
||||
|
||||
- [x] **后端:按市场分段的 API 路由**
|
||||
- **目标**:在 `backend/app/routers/financial.py` 中,将现有的 `/api/v1/financials/china/{ts_code}` 改为按市场分段:`/api/v1/financials/{market}/{stock_code}`(示例:`/api/v1/financials/us/AAPL`,`/api/v1/financials/cn/600519.SH`)。
|
||||
- **关键点**:
|
||||
- [x] 去除硬编码的 `china`,新增路径参数 `market`,并对取值做校验(`cn/us/hk/jp`)。
|
||||
- [x] 使用单一处理函数,根据 `market` 分派到相应的数据提供方与代码格式规范。
|
||||
|
||||
- [x] **前端:更新 API 调用**
|
||||
- **目标**:修改前端调用,基于用户选择的市场与股票代码,请求新的按市场分段路由。
|
||||
- **关键点**:
|
||||
- [x] 替换 `useChinaFinancials`,新增通用 `useFinancials(market, stockCode, years)`。
|
||||
- [x] 将请求路径改为 `/api/financials/{market}/{stock_code}?years=...`(代理到后端对应的 `/api/v1/financials/{market}/{stock_code}`)。
|
||||
- [ ] 确保展示与错误处理兼容美国、香港、日本等市场。
|
||||
|
||||
- [ ] **测试与验证**
|
||||
- **目标**:对整个流程进行端到端测试,确保两个市场的功能都稳定可靠。
|
||||
- **关键点**:
|
||||
- [ ] **中国市场回归测试**:使用多个中国 A 股代码测试,确保原有功能不受影响。
|
||||
- [ ] **美国市场功能测试**:使用多个美国股票代码(如 `AAPL`, `MSFT`)测试,验证报告能否成功生成。
|
||||
- [ ] **数据一致性验证**:抽样对比 Finnhub 返回的数据和前端展示的数据,确保映射和计算的准确性。
|
||||
- [ ] **错误处理测试**:测试无效的股票代码,检查系统是否能给出清晰的错误提示。
|
||||
|
||||
- **前置条件**:
|
||||
- [ ] 在 `config/config.json` 或环境变量中配置 `FINNHUB_API_KEY`。
|
||||
- [ ] 后端已启动(默认 `http://127.0.0.1:8000/api`),前端已启动(默认 `http://127.0.0.1:3000`)。
|
||||
|
||||
- **接口用例(后端)**:
|
||||
- [ ] GET `/api/v1/financials/cn/600519.SH?years=10`
|
||||
- 期望:`200`;返回 `ts_code`、`name`、`series`(含 `revenue`、`n_income` 等关键指标,period/年序列齐全)。
|
||||
- [ ] GET `/api/v1/financials/cn/000001.SZ?years=5`
|
||||
- 期望:`200`;返回与上同,近 5 年序列。
|
||||
- [ ] GET `/api/v1/financials/us/AAPL?years=10`
|
||||
- 期望:`200`;`series` 至少包含:`revenue`、`n_income`、`total_assets`、`total_hldr_eqy_exc_min_int`、`__free_cash_flow`、`grossprofit_margin`、`netprofit_margin`、`roe`、`roa`。
|
||||
- [ ] GET `/api/v1/financials/us/MSFT?years=10`
|
||||
- 期望:`200`;字段与口径同 AAPL。
|
||||
- [ ] GET `/api/v1/financials/us/INVALID?years=10`
|
||||
- 期望:`4xx/5xx`;`detail.message` 含可读错误。
|
||||
|
||||
- **页面用例(前端)**:
|
||||
- [ ] 打开 `/report/600519.SH?market=cn`
|
||||
- 期望:基本信息与“昨日快照”显示;“财务数据(来自 Tushare)”表格展示 10 期内主要指标。
|
||||
- [ ] 打开 `/report/000001.SZ?market=cn`
|
||||
- 期望:与上同;代码规范化逻辑(无后缀时自动补 `.SZ/.SH`)正常。
|
||||
- [ ] 打开 `/report/AAPL?market=us`
|
||||
- 期望:“股价图表”正常;“财务数据”表格展示主要指标(含自由现金流、毛利率、净利率、ROA、ROE)。
|
||||
- [ ] 打开 `/report/MSFT?market=us`
|
||||
- 期望:与上同。
|
||||
- [ ] 打开 `/report/INVALID?market=us`
|
||||
- 期望:顶部状态为“读取失败”并有错误提示文案。
|
||||
|
||||
- **验收标准**:
|
||||
- [ ] 中国市场功能无回归;美国市场关键指标齐全、值域合理(百分比类 ∈ [-1000%, 1000%],金额类为有限数)。
|
||||
- [ ] 报错信息清晰可读;网络/密钥缺失时提示明确。
|
||||
- [ ] 页内主要表格不出现 `NaN/Infinity`;空值以 `-` 展示。
|
||||
@ -1,154 +0,0 @@
|
||||
---
|
||||
status: 'Pending'
|
||||
created: '2025-11-16'
|
||||
owner: '@lv'
|
||||
---
|
||||
|
||||
# 任务:重构LLM Provider架构 (V2 - 数据库中心化)
|
||||
|
||||
## 1. 任务目标
|
||||
|
||||
为解决当前系统大语言模型(LLM)配置的僵化问题,本次任务旨在重构LLM的配置和调用工作流。我们将实现一个以数据库为中心的、支持多供应商的、结构化的配置体系。该体系将允许每个分析模块都能按需选择其所需的LLM供应商和具体模型,同时保证整个系统的类型安全和数据一致性。
|
||||
|
||||
## 2. 新架构设计:配置即数据
|
||||
|
||||
我们将废弃所有基于本地文件的配置方案 (`analysis-config.json`, `llm-providers.json`),并将所有配置信息作为结构化数据存入数据库。
|
||||
|
||||
### 2.1. 核心原则:Schema-in-Code
|
||||
|
||||
- **不新增数据表**: 我们将利用现有的 `system_config` 表及其 `JSONB` 字段来存储所有配置,无需修改数据库Schema。
|
||||
- **强类型约束**: 所有配置的JSON结构,其“单一事实源”都将是在 **`common-contracts`** crate中定义的Rust Structs。所有服务都必须依赖这些共享的Structs来序列化和反序列化配置数据,从而在应用层面实现强类型约束。
|
||||
|
||||
### 2.2. `common-contracts`中的数据结构定义
|
||||
|
||||
将在`common-contracts`中创建一个新模块(例如 `config_models.rs`),定义如下结构:
|
||||
|
||||
```rust
|
||||
// In: common-contracts/src/config_models.rs
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// 单个启用的模型
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct LlmModel {
|
||||
pub model_id: String, // e.g., "gpt-4o"
|
||||
pub name: Option<String>, // 别名,用于UI显示
|
||||
pub is_active: bool, // 是否在UI中可选
|
||||
}
|
||||
|
||||
// 单个LLM供应商的完整配置
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct LlmProvider {
|
||||
pub name: String, // "OpenAI 官方"
|
||||
pub api_base_url: String,
|
||||
pub api_key: String, // 直接明文存储
|
||||
pub models: Vec<LlmModel>, // 该供应商下我们启用的模型列表
|
||||
}
|
||||
|
||||
// 整个LLM Provider注册中心的数据结构
|
||||
pub type LlmProvidersConfig = HashMap<String, LlmProvider>; // Key: provider_id, e.g., "openai_official"
|
||||
|
||||
// 单个分析模块的配置
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct AnalysisModuleConfig {
|
||||
pub name: String, // "看涨分析"
|
||||
pub provider_id: String, // 引用 LlmProvidersConfig 的 Key
|
||||
pub model_id: String, // 引用 LlmModel 中的 model_id
|
||||
pub prompt_template: String,
|
||||
pub dependencies: Vec<String>,
|
||||
}
|
||||
|
||||
// 整个分析模块配置集合的数据结构
|
||||
pub type AnalysisModulesConfig = HashMap<String, AnalysisModuleConfig>; // Key: module_id, e.g., "bull_case"
|
||||
```
|
||||
|
||||
### 2.3. `system_config` 表中的数据存储
|
||||
|
||||
我们将使用两个`config_key`来存储这些结构序列化后的JSON:
|
||||
1. **Key: `"llm_providers"`**: 其`config_value`是一个序列化后的`LlmProvidersConfig`。
|
||||
2. **Key: `"analysis_modules"`**: 其`config_value`是一个序列化后的`AnalysisModulesConfig`。
|
||||
|
||||
## 3. 实施步骤
|
||||
|
||||
### 步骤 1: 更新 `common-contracts` (地基)
|
||||
|
||||
1. 在`common-contracts/src/`下创建`config_models.rs`文件。
|
||||
2. 将上述所有Rust Structs定义添加到该文件中,并确保它们在`lib.rs`中被正确导出。
|
||||
|
||||
### 步骤 2: 重构 `data-persistence-service` (配置守门人)
|
||||
|
||||
1. **移除 `config-service-rs`**: 该服务的功能将被`data-persistence-service`完全吸收和取代,可以准备将其从`docker-compose.yml`中移除。
|
||||
2. **实现新的CRUD API**:
|
||||
- `GET /api/v1/configs/llm_providers`: 读取并返回`system_config`中key为`llm_providers`的JSON文档。
|
||||
- `PUT /api/v1/configs/llm_providers`: 接收一个`LlmProvidersConfig`的JSON payload,**使用`common-contracts`中的Structs进行反序列化验证**,验证通过后,将其存入数据库。
|
||||
- `GET /api/v1/configs/analysis_modules`: 读取并返回key为`analysis_modules`的JSON文档。
|
||||
- `PUT /api/v1/configs/analysis_modules`: 接收一个`AnalysisModulesConfig`的JSON payload,进行验证后存入数据库。
|
||||
|
||||
### 步骤 3: 重构 `frontend` (管理UI)
|
||||
|
||||
1. **创建LLM Provider管理页面**:
|
||||
- 提供一个表单,用于新增/编辑`LlmProvider`(对应`llm_providers`JSON中的一个顶级条目)。
|
||||
- 在每个Provider下,提供一个子表单来管理其`models`列表(增、删、改、切换`is_active`状态)。
|
||||
- 实现“自动发现模型”功能,调用`api-gateway`的模型发现端点,让用户可以从中选择模型加入列表。
|
||||
2. **更新分析模块配置页面**:
|
||||
- 为每个分析模块提供两个级联下拉框:
|
||||
1. 第一个下拉框选择`Provider` (数据来自`GET /api/v1/configs/llm_providers`)。
|
||||
2. 第二个下拉框根据第一个的选择,动态加载该Provider下所有`is_active: true`的`Model`。
|
||||
- 更新保存逻辑,以调用`PUT /api/v1/configs/analysis_modules`。
|
||||
|
||||
### 步骤 4: 更新 `api-gateway`
|
||||
|
||||
1. **移除对`config-service-rs`的代理**。
|
||||
2. **代理新的配置API**: 将所有`/api/v1/configs/*`的请求正确地代理到`data-persistence-service`。
|
||||
3. **实现模型发现端点**:
|
||||
- 创建`GET /api/v1/discover-models/{provider_id}`。
|
||||
- 该端点会先调用`data-persistence-service`获取指定provider的`api_base_url`和`api_key`。
|
||||
- 然后使用这些信息向LLM供应商的官方`/models`接口发起请求,并将结果返回给前端。
|
||||
|
||||
### 步骤 5: 重构 `report-generator-service` (最终消费者)
|
||||
|
||||
1. **移除旧配置**:
|
||||
- 修改`docker-compose.yml`,移除所有旧的`LLM_*`环境变量。
|
||||
2. **重构工作流**:
|
||||
- 当收到任务时(例如`bull_case`),它将:
|
||||
a. 并行调用`data-persistence-service`的`GET /api/v1/configs/llm_providers`和`GET /api/v1/configs/analysis_modules`接口,获取完整的配置。
|
||||
b. **使用`common-contracts`中的Structs反序列化**这两个JSON响应,得到类型安全的`LlmProvidersConfig`和`AnalysisModulesConfig`对象。
|
||||
c. 通过`analysis_config["bull_case"]`找到`provider_id`和`model_id`。
|
||||
d. 通过`providers_config[provider_id]`找到对应的`api_base_url`和`api_key`。
|
||||
e. 动态创建`LlmClient`实例,并执行任务。
|
||||
|
||||
## 4. 验收标准
|
||||
|
||||
- ✅ `common-contracts` crate中包含了所有新定义的配置Structs。
|
||||
- ✅ `data-persistence-service`提供了稳定、类型安全的API来管理存储在`system_config`表中的配置。
|
||||
- ✅ `config-service-rs`服务已安全移除。
|
||||
- ✅ 前端提供了一个功能完善的UI,用于管理LLM Providers、Models,并能将它们正确地指派给各个分析模块。
|
||||
- ✅ `report-generator-service`能够正确地、动态地使用数据库中的配置,为不同的分析模块调用不同的LLM Provider和模型。
|
||||
|
||||
## 6. 任务实施清单 (TODO List)
|
||||
|
||||
### 阶段一:定义数据契约 (`common-contracts`)
|
||||
- [x] 在 `src` 目录下创建 `config_models.rs` 文件。
|
||||
- [x] 在 `config_models.rs` 中定义 `LlmModel`, `LlmProvider`, `LlmProvidersConfig`, `AnalysisModuleConfig`, `AnalysisModulesConfig` 等所有Structs。
|
||||
- [x] 在 `lib.rs` 中正确导出 `config_models` 模块,使其对其他服务可见。
|
||||
|
||||
### 阶段二:实现配置的持久化与服务 (`data-persistence-service`)
|
||||
- [x] **[API]** 实现 `GET /api/v1/configs/llm_providers` 端点。
|
||||
- [x] **[API]** 实现 `PUT /api/v1/configs/llm_providers` 端点,并确保使用 `common-contracts` 中的Structs进行反序列化验证。
|
||||
- [x] **[API]** 实现 `GET /api/v1/configs/analysis_modules` 端点。
|
||||
- [x] **[API]** 实现 `PUT /api/v1/configs/analysis_modules` 端点,并进行相应的验证。
|
||||
- [x] **[系统]** 从 `docker-compose.yml` 中安全移除 `config-service-rs` 服务,因其功能已被本服务吸收。
|
||||
|
||||
### 阶段三:更新API网关与前端 (`api-gateway` & `frontend`)
|
||||
- [x] **[api-gateway]** 更新路由配置,将所有 `/api/v1/configs/*` 的请求代理到 `data-persistence-service`。
|
||||
- [x] **[api-gateway]** 实现 `GET /api/v1/discover-models/{provider_id}` 模型发现代理端点。
|
||||
- [x] **[frontend]** 创建全新的“LLM Provider管理”页面UI骨架。
|
||||
- [x] **[frontend]** 实现调用新配置API对LLM Providers和Models进行增、删、改、查的完整逻辑。
|
||||
- [x] **[frontend]** 在Provider管理页面上,实现“自动发现模型”的功能按钮及其后续的UI交互。
|
||||
- [x] **[frontend]** 重构“分析模块配置”页面,使用级联下拉框来选择Provider和Model。
|
||||
|
||||
### 阶段四:重构报告生成服务 (`report-generator-service`)
|
||||
- [x] **[配置]** 从 `docker-compose.yml` 中移除所有旧的、全局的 `LLM_*` 环境变量。
|
||||
- [x] **[核心逻辑]** 重构服务的工作流,实现从 `data-persistence-service` 动态获取`LlmProvidersConfig`和`AnalysisModulesConfig`。
|
||||
- [x] **[核心逻辑]** 实现动态创建 `LlmClient` 实例的逻辑,使其能够根据任务需求使用不同的Provider配置。
|
||||
@ -1,542 +0,0 @@
|
||||
# 设计文档: 面向Rust的事件驱动数据微服务架构
|
||||
|
||||
## 1. 引言
|
||||
|
||||
### 1.1. 文档目的
|
||||
|
||||
本文档旨在为“基本面选股系统”设计一个**完全基于Rust的、事件驱动的、去中心化的微服务架构**。此设计将作为彻底替换现有Python组件、并构建下一代数据处理生态系统的核心技术蓝图。
|
||||
|
||||
新的架构目标是:
|
||||
1. **服务独立化**:将每个外部数据源(Tushare, Finnhub等)封装成独立的、可独立部署和运行的微服务。
|
||||
2. **事件驱动**:引入消息总线(Message Bus)作为服务间通信的主干,实现服务的高度解耦和异步协作。
|
||||
3. **数据中心化**:所有微服务将标准化的数据写入一个由`data-persistence-service`独占管理的中央数据库,实现“数据写入即共享”。
|
||||
4. **纯Rust生态**:从前端网关到最末端的数据提供商,整个后端生态系统将100%使用Rust构建,确保端到端的类型安全、高性能和健壮性。
|
||||
|
||||
### 1.2. 核心架构理念
|
||||
|
||||
- **独立单元 (Independent Units)**: 每个服务都是一个完整的、自包含的应用程序,拥有自己的配置、逻辑和生命周期。
|
||||
- **异步协作 (Asynchronous Collaboration)**: 服务之间通过发布/订阅消息进行通信,而非紧耦合的直接API调用。
|
||||
- **单一事实源 (Single Source of Truth)**: 数据库是所有结构化数据的唯一事实源。服务通过向数据库写入数据来“广播”其工作成果。
|
||||
|
||||
## 2. 目标架构 (Target Architecture)
|
||||
|
||||
### 2.1. 架构图
|
||||
|
||||
```
|
||||
+-------------+ +------------------+ +---------------------------+
|
||||
| | HTTP | | | |
|
||||
| Frontend |----->| API Gateway |----->| Message Bus |
|
||||
| (Next.js) | | (Rust) | | (e.g., RabbitMQ, NATS) |
|
||||
| | | | | |
|
||||
+-------------+ +-------+----------+ +-------------+-------------+
|
||||
| |
|
||||
(Read operations) | | (Pub/Sub Commands & Events)
|
||||
| |
|
||||
+-----------------v------------------+ +------v------+ +----------------+ +----------------+
|
||||
| | | Tushare | | Finnhub | | iFind |
|
||||
| Data Persistence Service (Rust) |<---->| Provider | | Provider | | Provider |
|
||||
| | | Service | | Service | | Service |
|
||||
+-----------------+------------------+ | (Rust) | | (Rust) | | (Rust) |
|
||||
| +-------------+ +----------------+ +----------------+
|
||||
v
|
||||
+-----------------------------------------------------+
|
||||
| |
|
||||
| PostgreSQL Database |
|
||||
| |
|
||||
+-----------------------------------------------------+
|
||||
```
|
||||
|
||||
### 2.2. 服务职责划分
|
||||
|
||||
- **API Gateway (Rust)**:
|
||||
- 面向前端的唯一入口 (BFF - Backend for Frontend)。
|
||||
- 负责处理用户请求、认证鉴权。
|
||||
- 将前端的查询请求转化为对`Data Persistence Service`的数据读取调用。
|
||||
- 将前端的操作请求(如“生成新报告”)转化为命令(Command)并发布到**Message Bus**。
|
||||
|
||||
- **`*_provider-service` (Rust)**:
|
||||
- **一组**独立的微服务,每个服务对应一个外部数据API(如`tushare-provider-service`)。
|
||||
- 订阅Message Bus上的相关命令(如`FetchFinancialsRequest`)。
|
||||
- 独立调用外部API,对返回数据进行清洗、标准化。
|
||||
- 调用`Data Persistence Service`的接口,将标准化后的数据写入数据库。
|
||||
- 操作完成后,可以向Message Bus发布事件(Event),如`FinancialsDataReady`。
|
||||
|
||||
- **Data Persistence Service (Rust)**:
|
||||
- **(职责不变)** 数据库的唯一守门人。
|
||||
- 为所有其他内部微服务提供稳定、统一的数据库读写gRPC/HTTP接口。
|
||||
|
||||
- **Message Bus (e.g., RabbitMQ, NATS)**:
|
||||
- 整个系统的神经中枢,负责所有服务间的异步通信。
|
||||
- 传递命令(“做什么”)和事件(“发生了什么”)。
|
||||
|
||||
## 3. 核心抽象与数据契约
|
||||
|
||||
### 3.1. `DataProvider` Trait (内部实现蓝图)
|
||||
|
||||
此Trait依然是构建**每个独立Provider微服务内部逻辑**的核心蓝图。它定义了一个Provider应该具备的核心能力。
|
||||
|
||||
```rust
|
||||
// This trait defines the internal logic blueprint for each provider microservice.
|
||||
#[async_trait]
|
||||
pub trait DataProvider: Send + Sync {
|
||||
// ... (trait definition remains the same as previous version) ...
|
||||
fn get_id(&self) -> &'static str;
|
||||
async fn get_company_profile(&self, symbol: &str) -> Result<CompanyProfile, DataProviderError>;
|
||||
async fn get_historical_financials(&self, symbol: &str, years: &[u16]) -> Result<Vec<FinancialStatement>, DataProviderError>;
|
||||
// ... etc ...
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2. 标准化数据模型 (共享的数据契约)
|
||||
|
||||
这些模型是服务间共享的“通用语言”,也是存入数据库的最终形态,其重要性在新架构下更高。
|
||||
|
||||
```rust
|
||||
// These structs are the shared "Data Contracts" across all services.
|
||||
// Their definitions remain the same as the previous version.
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)] // Add Serialize/Deserialize for messaging
|
||||
pub struct CompanyProfile { ... }
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FinancialStatement { ... }
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MarketDataPoint { ... }
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RealtimeQuote { ... }
|
||||
```
|
||||
|
||||
### 3.3. 消息/事件定义 (Message/Event Contracts)
|
||||
|
||||
这是新架构的核心,定义了在Message Bus上传递的消息格式。
|
||||
|
||||
```rust
|
||||
use uuid::Uuid;
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
// --- Commands (Instructions to do something) ---
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FetchCompanyDataCommand {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: String,
|
||||
pub market: String, // To help providers route to the correct API endpoint
|
||||
}
|
||||
|
||||
// --- Events (Notifications that something has happened) ---
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct CompanyProfilePersistedEvent {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: String,
|
||||
// We don't need to carry the full data, as it's now in the database.
|
||||
// Interested services can query it.
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FinancialsPersistedEvent {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: String,
|
||||
pub years_updated: Vec<u16>,
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 数据工作流示例 (Example Data Workflow)
|
||||
|
||||
1. **请求发起**: 用户在前端请求`AAPL`的分析报告。请求到达`API Gateway`。
|
||||
2. **命令发布**: `API Gateway`生成一个唯一的`request_id`,然后向Message Bus发布一个`FetchCompanyDataCommand`命令。
|
||||
3. **命令消费**: `tushare-provider`、`finnhub-provider`等所有订阅了此命令的服务都会收到消息。
|
||||
4. **独立执行**:
|
||||
- `finnhub-provider`根据`market`和`symbol`,调用Finnhub API获取公司简介、财务、行情数据。
|
||||
- 数据获取成功后,它将数据转换为标准化的`CompanyProfile`, `Vec<FinancialStatement>`等模型。
|
||||
- 它调用`Data Persistence Service`的接口,将这些标准化的数据写入数据库。
|
||||
- 写入成功后,它向Message Bus发布`CompanyProfilePersistedEvent`和`FinancialsPersistedEvent`等事件。
|
||||
- `tushare-provider`收到命令后,可能因为市场不匹配而直接忽略该消息。
|
||||
5. **下游响应**: 一个潜在的`report-generator-service`(图中未画出,属于业务层)可以订阅`...PersistedEvent`。当它收到了生成一份完整报告所需的所有数据事件后,便开始从数据库中拉取这些数据,进行AI分析,并将最终报告存回数据库。
|
||||
6. **前端轮询/通知**: `API Gateway`可以通过WebSocket或长轮询等方式,将最终报告的完成状态通知给前端。
|
||||
|
||||
## 5. 实施路线图 (Roadmap) - 更新于 2025-11-15
|
||||
|
||||
**基于对项目现状的调研,本路线图已更新,明确标识了已完成的工作和接下来的行动计划。**
|
||||
|
||||
---
|
||||
|
||||
### **✔ 阶段-1:容器化与初步服务拆分 (已完成)**
|
||||
- [x] **核心服务已容器化**: `data-persistence-service` 已完全开发、容器化,并通过Docker Compose与数据库和现有Python后端集成。
|
||||
- [x] **数据库已初始化**: Rust服务的 `migrations` 目录证实了数据库表结构已通过 `sqlx-cli` 创建和管理。
|
||||
- [x] **Python后端部分重构**: Python `backend` 服务已经作为客户端,通过HTTP API调用`data-persistence-service`来读写数据。
|
||||
- [x] **配置服务已拆分**: `config-service` 作为一个独立的Python微服务也已存在并运行。
|
||||
- [x] **开发环境已建立**: 整个系统可以通过`Docker Compose`和`Tilt`一键启动。
|
||||
|
||||
---
|
||||
|
||||
### **阶段〇:奠定新架构的基石 (Laying the New Foundation)**
|
||||
- [x] **1. 部署消息总线**: 在`docker-compose.yml`中添加一个消息总线服务 (`NATS`)。这是实现事件驱动架构的**先决条件**。
|
||||
- [x] **2. 创建共享契约库 (`common-contracts`)**: 在`services/`下创建一个新的Rust `common-contracts` crate。
|
||||
- 将`data-persistence-service/src/dtos.rs` 和 `models.rs`中的核心数据结构(如`CompanyProfile`, `FinancialStatement`等)迁移至此。
|
||||
- 添加`architecture_module_specification.md`中定义的消息契约 (`FetchCompanyDataCommand`等) 和可观测性结构 (`HealthStatus`, `TaskProgress`)。
|
||||
- [x] **3. 升级 `data-persistence-service`**:
|
||||
- 使其依赖新的`common-contracts` crate,替换掉本地的数据模型定义。
|
||||
- 为其实现`SystemModule`规范,即添加`/health`和`/tasks`端点。
|
||||
|
||||
---
|
||||
|
||||
### **阶段一:开发 `alphavantage-provider-service` (精确实现蓝图)**
|
||||
|
||||
**目标**: 创建并实现 `alphavantage-provider-service`,使其成为我们新架构下的第一个功能完备、可独立运行的数据提供商微服务。
|
||||
|
||||
- [x] **1. 项目初始化与依赖配置**
|
||||
- [x] **任务**: 基于我们的微服务模板,创建新的Rust项目 `services/alphavantage-provider-service`。
|
||||
- [x] **任务**: 在其`Cargo.toml`中添加核心依赖。
|
||||
```toml
|
||||
# Cargo.toml
|
||||
[dependencies]
|
||||
# ... other dependencies like axum, tokio, etc.
|
||||
common-contracts = { path = "../common-contracts" }
|
||||
|
||||
# Generic MCP Client
|
||||
rmcp = "0.8.5"
|
||||
|
||||
# Message Queue (NATS)
|
||||
async-nats = "0.33"
|
||||
```
|
||||
- [x] **验收标准**: 项目可以成功编译 (`cargo check`)。
|
||||
|
||||
- [x] **2. 实现 `SystemModule` 规范**
|
||||
- [x] **任务**: 在`main.rs`中启动一个Axum HTTP服务器。
|
||||
- [x] **任务**: 实现强制的`/health`端点,返回当前服务的健康状态。
|
||||
- [x] **任务**: 实现强制的`/tasks`端点。此端点需要从一个线程安全的内存存储(例如 `Arc<DashMap<Uuid, TaskProgress>>`)中读取并返回所有正在进行的任务。
|
||||
- [x] **验收标准**: 启动服务后,可以通过`curl`或浏览器访问`http://localhost:port/health`和`http://localhost:port/tasks`并得到正确的JSON响应。
|
||||
|
||||
- [x] **3. 实现核心业务逻辑:事件驱动的数据处理**
|
||||
- [x] **任务**: 实现连接到Message Bus并订阅`FetchCompanyDataCommand`命令的逻辑。
|
||||
- [x] **任务**: 当收到`FetchCompanyDataCommand`命令时,执行以下异步工作流:
|
||||
1. 在任务存储中创建并插入一个新的`TaskProgress`记录。
|
||||
2. 从配置中读取`ALPHAVANTAGE_API_KEY`,并构建MCP端点URL。
|
||||
3. 初始化通用的`rmcp`客户端: `let client = rmcp::mcp::Client::new(mcp_endpoint_url);`
|
||||
4. 使用`tokio::try_join!`**并行**执行多个数据获取任务。**注意:函数名是字符串,返回的是`serde_json::Value`。**
|
||||
```rust
|
||||
// 伪代码示例
|
||||
let symbol = &command.symbol;
|
||||
|
||||
let overview_task = client.query("OVERVIEW", &[("symbol", symbol)]);
|
||||
let income_task = client.query("INCOME_STATEMENT", &[("symbol", symbol)]);
|
||||
// ... 其他任务
|
||||
|
||||
match tokio::try_join!(overview_task, income_task, /*...*/) {
|
||||
Ok((overview_json, income_json, /*...*/)) => {
|
||||
// overview_json and income_json are of type serde_json::Value
|
||||
// ... 进入步骤 4
|
||||
},
|
||||
Err(e) => { /* ... */ }
|
||||
}
|
||||
```
|
||||
5. 在`try_join!`前后,精确地更新内存中`TaskProgress`的状态。
|
||||
- [x] **验收标准**: 在Message Bus中发布命令后,服务的日志能正确打印出从Alpha Vantage获取到的原始JSON数据。
|
||||
|
||||
- [x] **4. 实现数据转换与持久化 (强类型映射)**
|
||||
- [x] **任务**: **(关键变更)** 实现 `TryFrom<serde_json::Value>` Trait,完成从动态JSON到我们`common-contracts`模型的**带错误处理的**转换。
|
||||
```rust
|
||||
// alphavantage-provider-service/src/mapping.rs
|
||||
use serde_json::Value;
|
||||
use common_contracts::models as our;
|
||||
|
||||
impl TryFrom<Value> for our::CompanyProfile {
|
||||
type Error = anyhow::Error; // Or a more specific parsing error
|
||||
|
||||
fn try_from(v: Value) -> Result<Self, Self::Error> {
|
||||
Ok(our::CompanyProfile {
|
||||
symbol: v["Symbol"].as_str().ok_or_else(|| anyhow!("Missing Symbol"))?.to_string(),
|
||||
name: v["Name"].as_str().ok_or_else(|| anyhow!("Missing Name"))?.to_string(),
|
||||
// ... 其他字段的安全解析和转换
|
||||
})
|
||||
}
|
||||
}
|
||||
```
|
||||
- [x] **任务**: 创建一个类型化的HTTP客户端 (Data Persistence Client),用于与`data-persistence-service`通信。
|
||||
- [x] **任务**: 在所有数据转换成功后,调用上述客户端进行持久化。
|
||||
- [x] **验收标准**: 数据库中查询到的数据,结构完全符合`common-contracts`定义。
|
||||
|
||||
- [x] **5. 实现事件发布与任务完成**
|
||||
- [x] **任务**: 在数据成功持久化到数据库后,向Message Bus发布相应的数据就绪事件,如`CompanyProfilePersistedEvent`和`FinancialsPersistedEvent`。
|
||||
- [x] **任务**: 在所有流程执行完毕(无论成功或失败)后,从内存存储中移除对应的`TaskProgress`对象(或将其标记为“已完成”并设置TTL)。
|
||||
- [x] **验收标准**: 能够在Message Bus中监听到本服务发布的事件。`/tasks`接口不再显示已完成的任务。
|
||||
|
||||
---
|
||||
|
||||
### **阶段二:重构API网关与请求流程 (精确实现蓝图)**
|
||||
|
||||
**目标**: 创建一个纯Rust的`api-gateway`服务,它将作为前端的唯一入口(BFF),负责发起数据获取任务、查询持久化数据以及追踪分布式任务进度。
|
||||
|
||||
- [x] **1. 项目初始化与 `SystemModule` 规范实现**
|
||||
- [x] **任务**: 基于我们的微服务模板,创建新的Rust项目 `services/api-gateway`。
|
||||
- [x] **任务**: 在其`Cargo.toml`中添加核心依赖: `axum`, `tokio`, `common-contracts`, `async-nats`, `reqwest`, `tracing`, `config`。
|
||||
- [x] **任务**: 实现强制的`/health`端点。
|
||||
- [x] **任务**: 实现强制的`/tasks`端点。由于网关本身是无状态的、不执行长任务,此端点当前可以简单地返回一个空数组`[]`。
|
||||
- [x] **验收标准**: `api-gateway`服务可以独立编译和运行,并且`/health`接口按预期工作。
|
||||
|
||||
- [x] **2. 实现数据触发流程 (发布命令)**
|
||||
- [x] **任务**: 在`api-gateway`中创建一个新的HTTP端点 `POST /v1/data-requests`,它应接收一个JSON体,例如: `{"symbol": "AAPL", "market": "US"}`。
|
||||
- [x] **任务**: 为此端点实现处理逻辑:
|
||||
1. 生成一个全局唯一的 `request_id` (UUID)。
|
||||
2. 创建一个`common_contracts::messages::FetchCompanyDataCommand`消息,填入请求参数和`request_id`。
|
||||
3. 连接到Message Bus,并将此命令发布到`data_fetch_commands`队列。
|
||||
4. 向前端立即返回 `202 Accepted` 状态码,响应体中包含 `{ "request_id": "..." }`,以便前端后续追踪。
|
||||
- [x] **验收标准**: 通过工具(如Postman)调用此端点后,能够在NATS的管理界面看到相应的消息被发布,同时`alphavantage-provider-service`的日志显示它已接收并开始处理该命令。
|
||||
|
||||
- [x] **3. 实现数据查询流程 (读取持久化数据)**
|
||||
- [x] **任务**: 在`api-gateway`中创建一个类型化的HTTP客户端 (Persistence Client),用于与`data-persistence-service`通信。
|
||||
- [x] **任务**: 实现 `GET /v1/companies/{symbol}/profile` 端点。该端点接收股票代码,通过Persistence Client调用`data-persistence-service`的相应接口,并将查询到的`CompanyProfile`数据返回给前端。
|
||||
- [x] **任务**: (可选) 根据需要,实现查询财务报表、行情数据等其他数据类型的端点。
|
||||
- [x] **验收标准**: 在`alphavantage-provider-service`成功写入数据后,通过浏览器或`curl`调用这些新端点,可以查询到预期的JSON数据。
|
||||
|
||||
- [x] **4. 实现分布式任务进度追踪**
|
||||
- [x] **任务**: 在`api-gateway`的配置中,增加一个`provider_services`字段,用于列出所有数据提供商服务的地址,例如: `["http://alphavantage-provider-service:8000"]`。
|
||||
- [x] **任务**: 实现 `GET /v1/tasks/{request_id}` 端点。
|
||||
- [x] **任务**: 该端点的处理逻辑需要:
|
||||
1. 读取配置中的`provider_services`列表。
|
||||
2. 使用`tokio::join!`或`futures::future::join_all`,**并行地**向所有provider服务的`/tasks`端点发起HTTP GET请求。
|
||||
3. 聚合所有服务的返回结果(一个`Vec<Vec<TaskProgress>>`),并从中线性搜索与URL路径中`request_id`匹配的`TaskProgress`对象。
|
||||
4. 如果找到匹配的任务,将其作为JSON返回。如果遍历完所有结果都未找到,则返回`404 Not Found`。
|
||||
- [x] **验收标准**: 当`alphavantage-provider-service`正在处理一个任务时,通过`api-gateway`的这个新端点(并传入正确的`request_id`),能够实时查询到该任务的进度详情。
|
||||
|
||||
---
|
||||
|
||||
### **阶段三:逐步迁移与替换 (精确实现蓝图)**
|
||||
|
||||
**目标**: 将前端应用无缝对接到新的Rust `api-gateway`,并随着数据提供商的逐步完善,最终彻底移除旧的Python `backend`服务,完成整个系统的架构升级。
|
||||
|
||||
- [x] **1. 将 `api-gateway` 集成到开发环境**
|
||||
- [x] **任务**: 在根目录的 `docker-compose.yml` 文件中,为我们新创建的 `api-gateway` 服务添加入口定义。
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
# ... other services
|
||||
api-gateway:
|
||||
build:
|
||||
context: ./services/api-gateway
|
||||
dockerfile: Dockerfile
|
||||
container_name: api-gateway
|
||||
environment:
|
||||
# 注入所有必要的配置
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
# 注意: provider_services需要包含所有provider的内部地址
|
||||
PROVIDER_SERVICES: '["http://alphavantage-provider-service:8000"]'
|
||||
ports:
|
||||
- "14000:4000"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- alphavantage-provider-service
|
||||
```
|
||||
- [x] **任务**: (如果尚未完成) 在`docker-compose.yml`中添加`nats`服务。
|
||||
- [x] **验收标准**: 运行 `docker-compose up` (或 `tilt up`) 后,`api-gateway` 服务能够成功启动并连接到消息总线。
|
||||
|
||||
- [x] **2. 迁移前端应用的API调用逻辑**
|
||||
- [x] **任务**: 修改前端项目的环境变量,将API请求的目标从旧`backend`指向新`api-gateway`。
|
||||
```
|
||||
# frontend/.env.local (or in docker-compose.yml)
|
||||
NEXT_PUBLIC_BACKEND_URL=http://api-gateway:4000/v1
|
||||
```
|
||||
- [x] **任务**: 重构前端的数据获取Hooks(例如 `useApi.ts`)。
|
||||
- **旧逻辑**: 发起一个长轮询GET请求,等待完整数据返回。
|
||||
- **新逻辑**:
|
||||
1. **触发**: 发起一个 `POST` 请求到 `/data-requests`,并从响应中获取 `request_id`。
|
||||
2. **轮询**: 使用 `useSWR` 或 `react-query` 的轮询功能,每隔2-3秒调用一次 `GET /tasks/{request_id}` 端点来获取任务进度。
|
||||
3. **展示**: 根据任务进度更新UI(例如,显示加载条和状态信息)。
|
||||
4. **完成**: 当任务状态变为 "completed" (或类似状态),或 `GET /tasks/{request_id}` 返回 `404` 时,停止轮询,并调用 `GET /companies/{symbol}/profile` 等数据查询端点来获取最终数据并渲染。
|
||||
- [x] **验收标准**: 在前端页面输入股票代码并点击“生成报告”后,能够触发新的异步工作流,并在UI上看到实时进度的更新,最终成功展示由 `alphavantage-provider-service` 获取的数据。
|
||||
|
||||
---
|
||||
|
||||
### **阶段四:数据提供商生态系统扩展 (Data Provider Ecosystem Expansion)**
|
||||
|
||||
**目标**: 将现有Python `backend`中的核心`data_providers`,逐一重写为独立的Rust微服务,丰富我们的数据维度。
|
||||
|
||||
- [ ] **0. (前置任务) 完成所有Provider的适配性分析**
|
||||
- [ ] **任务**: 在开始大规模编码前,完成 **附录A** 中所有待迁移数据提供商的适配性分析,确保`common-contracts`模型的完备性,并明确每个Provider的实现关键点。
|
||||
|
||||
- [x] **1. 实现 `tushare-provider-service` (中国市场核心)**
|
||||
- [x] **任务**: 基于 `alphavantage-provider-service` 模板,创建并实现服务的基本框架。
|
||||
- [x] **任务**: 完成 Tushare 8个核心API的并行数据获取、聚合与报告期筛选逻辑。
|
||||
- [x] **任务**: 在 `mapping.rs` 中,精确复刻Python版本 `_calculate_derived_metrics` 方法中近20个派生财务指标的计算逻辑。
|
||||
- [x] **任务**: 在 `docker-compose.yml`中添加此服务,并将其地址加入到`api-gateway`的`PROVIDER_SERVICES`环境变量中。
|
||||
- [x] **验收标准**: 收到`market: "CN"`的`FetchCompanyDataCommand`命令时,该服务能被触发,并成功将与Python版本逻辑一致的A股数据(**包含所有派生指标**)写入数据库。
|
||||
|
||||
- [x] **2. (可选) 迁移其他数据提供商**
|
||||
- [x] **任务**: 基于各自的适配性分析,创建并实现`finnhub-provider-service`。
|
||||
- [x] **任务**: 基于各自的适配性分析,创建并实现`yfinance-provider-service`。
|
||||
- [ ] **任务**: 基于各自的适配性分析,创建并实现`ifind-provider-service`。
|
||||
|
||||
---
|
||||
|
||||
### **阶段五:业务逻辑迁移与最终替换 (Business Logic Migration & Final Replacement)**
|
||||
|
||||
**目标**: 将Python `backend`中剩余的AI分析和配置管理逻辑迁移到Rust生态,并最终彻底下线Python服务。
|
||||
|
||||
- [x] **1. 创建 `report-generator-service`**
|
||||
- [x] **任务**: 创建一个新的Rust微服务`report-generator-service`。
|
||||
- [x] **任务**: 实现对Message Bus事件(如`FinancialsPersistedEvent`)的订阅。
|
||||
- [x] **任务**: 将原Python `backend`中的`analysis_client.py`和`company_profile_client.py`的逻辑迁移至此服务。
|
||||
- [x] **验收标准**: 当所有数据提供商完成数据写入后,此服务能被自动触发,并成功生成AI分析报告。
|
||||
|
||||
- [x] **2. (可选) 创建 `config-service-rs`**
|
||||
- [x] **任务**: 用Rust重写现有的Python `config-service`。
|
||||
- [x] **验收标准**: 所有Rust微服务都能从新的配置服务中获取配置并正常启动。
|
||||
|
||||
- [x] **3. 光荣退役:下线所有Python服务**
|
||||
- [x] **前提条件**: 所有数据获取和AI分析功能均已由新的Rust微服务完全承载。
|
||||
- [x] **任务**: 在 `docker-compose.yml` 中,删除 `backend` 和 `config-service` 的服务定义。
|
||||
- [x] **任务**: 将`backend/`和`services/config-service/`目录移动至`archive/python/`进行归档保留。
|
||||
- [x] **验收标准**: 整个系统在没有任何Python组件的情况下,依然能够完整、正常地运行所有核心功能。架构升级正式完成。
|
||||
|
||||
---
|
||||
|
||||
## 附录A: 数据提供商适配性分析
|
||||
|
||||
本附录用于详细记录每个待迁移的数据提供商API与我们`common-contracts`标准模型之间的适配性。
|
||||
|
||||
### A.1 Tushare 适配性分析
|
||||
|
||||
**核心结论**: 适配**完全可行**,但**计算逻辑复杂**。`common-contracts`无需调整。迁移工作的核心是精确复刻Python版本中近400行的财务数据聚合与派生指标计算逻辑。
|
||||
|
||||
**1. 数据模型适配概览**
|
||||
|
||||
| `common-contracts` 模型 | 适配可行性 | 关键实现要点 |
|
||||
| :--- | :--- | :--- |
|
||||
| **`CompanyProfile`** | ✅ **高** | 使用 `stock_basic` 和 `stock_company` 接口。 |
|
||||
| **`DailyMarketData`** | ✅ **高** | 关联 `daily` 和 `daily_basic` 接口。 |
|
||||
| **`RealtimeQuote`** | ⚠️ **中** | Tushare无直接对应接口,可使用最新日线数据作为“准实时”替代。 |
|
||||
| **`FinancialStatement`** | ✅ **高,但复杂** | **(核心难点)** 需聚合 `balancesheet`, `income`, `cashflow`, `fina_indicator` 等8个API,并复刻近20个派生指标的计算。 |
|
||||
|
||||
**2. 关键迁移逻辑**
|
||||
|
||||
- **多表聚合**: Rust版本需实现并行调用多个Tushare API,并以`end_date`为主键将结果聚合。
|
||||
- **报告期筛选**: 需复刻“今年的最新报告 + 往年所有年报”的筛选逻辑。
|
||||
- **派生指标计算**: 必须用Rust精确实现`_calculate_derived_metrics`方法中的所有计算公式。
|
||||
|
||||
---
|
||||
|
||||
### A.2 Finnhub 适配性分析
|
||||
|
||||
**核心结论**: 适配**可行**。Finnhub作为美股和全球市场的主要数据源,数据较为规范,但同样涉及**多API聚合**和**少量派生计算**。`common-contracts`无需调整。
|
||||
|
||||
**1. 数据模型适配概览**
|
||||
|
||||
| `common-contracts` 模型 | 适配可行性 | 关键实现要点 |
|
||||
| :--- | :--- | :--- |
|
||||
| **`CompanyProfile`** | ✅ **高** | 使用 `/stock/profile2` 接口。 |
|
||||
| **`DailyMarketData`** | ✅ **高** | 使用 `/stock/candle` 接口获取OHLCV,使用 `/stock/metric` 获取PE/PB等指标。 |
|
||||
| **`RealtimeQuote`** | ✅ **高** | 使用 `/quote` 接口。 |
|
||||
| **`FinancialStatement`** | ✅ **高,但需聚合** | 需聚合 `/stock/financials-reported` (按`ic`, `bs`, `cf`查询)返回的三张报表,并进行少量派生计算。 |
|
||||
|
||||
**2. 关键迁移逻辑**
|
||||
|
||||
- **多API聚合**: `FinancialStatement`的构建需要组合`/stock/financials-reported`接口的三次调用结果。`DailyMarketData`的构建也需要组合`/stock/candle`和`/stock/metric`。
|
||||
- **派生指标计算**: Python代码 (`finnhub.py`) 中包含了自由现金流 (`__free_cash_flow`) 和其他一些比率的计算,这些需要在Rust中复刻。
|
||||
- **字段名映射**: Finnhub返回的字段名(如`netIncome`)需要被映射到我们标准模型的字段名(如`net_income`)。
|
||||
|
||||
---
|
||||
|
||||
### A.3 YFinance 适配性分析
|
||||
|
||||
**核心结论**: 适配**可行**,主要作为**行情数据**的补充或备用源。`yfinance`库的封装使得数据获取相对简单。
|
||||
|
||||
**1. 数据模型适配概览**
|
||||
|
||||
| `common-contracts` 模型 | 适配可行性 | 关键实现要点 |
|
||||
| :--- | :--- | :--- |
|
||||
| **`CompanyProfile`** | ✅ **中** | `ticker.info` 字典提供了大部分信息,但字段不如Finnhub或Tushare规范。 |
|
||||
| **`DailyMarketData`** | ✅ **高** | `ticker.history()` 方法是主要数据来源,可直接提供OHLCV。 |
|
||||
| **`RealtimeQuote`** | ⚠️ **低** | `yfinance`本身不是为实时流式数据设计的,获取的数据有延迟。 |
|
||||
| **`FinancialStatement`** | ✅ **中** | `ticker.financials`, `ticker.balance_sheet`, `ticker.cashflow` 提供了数据,但需要手动将多年度的数据列转换为按年份的记录行。 |
|
||||
|
||||
**2. 关键迁移逻辑**
|
||||
|
||||
- **数据结构转换**: `yfinance`返回的DataFrame需要被转换为我们期望的`Vec<Record>`结构。特别是财务报表,需要将列式(多年份)数据转换为行式(单年份)记录。
|
||||
- **库的替代**: Rust中没有`yfinance`库。我们需要找到一个替代的Rust库 (如 `yahoo_finance_api`),或者直接模拟其HTTP请求来获取数据。这将是迁移此模块的主要工作。
|
||||
|
||||
---
|
||||
|
||||
- [ ] **2. (可选) 迁移其他数据提供商**
|
||||
- [x] **任务**: 基于各自的适配性分析,创建并实现`finnhub-provider-service`。
|
||||
- [x] **任务**: 基于各自的适配性分析,创建并实现`yfinance-provider-service`。
|
||||
- [ ] **任务**: **(已暂停/待独立规划)** 实现`ifind-provider-service`。
|
||||
|
||||
---
|
||||
|
||||
### A.4 iFind 适配性分析 - **更新于 2025-11-16**
|
||||
|
||||
**核心结论**: **当前阶段纯Rust迁移复杂度极高,任务已暂停**。iFind的Python接口 (`iFinDPy.py`) 是一个基于 `ctypes` 的薄封装,它直接调用了底层的C/C++动态链接库 (`.so` 文件)。这意味着没有任何可见的网络协议或HTTP请求可供我们在Rust中直接模拟。
|
||||
|
||||
**1. 迁移路径评估**
|
||||
|
||||
基于对 `ref/ifind` 库文件的调研,我们确认了迁移此模块面临两个选择:
|
||||
|
||||
1. **HTTP API方案 (首选,待调研)**:
|
||||
- **描述**: 您提到iFind存在一个HTTP API版本。这是最符合我们纯Rust、去中心化架构的理想路径。
|
||||
- **工作量评估**: **中等**。如果该HTTP API文档齐全且功能满足需求,那么开发此服务的工作量将与 `finnhub-provider-service` 类似。
|
||||
- **规划**: 此路径应作为一个**独立的、后续的调研与开发任务**。当前置于暂停状态。
|
||||
|
||||
2. **FFI方案 (备选,不推荐)**:
|
||||
- **描述**: 在Rust服务中通过FFI(如 `pyo3` 或 `rust-cpython` crate)嵌入Python解释器,直接调用 `iFinDPy` 库。
|
||||
- **工作量评估**: **高**。虽然可以复用逻辑,但这会引入技术栈污染,破坏我们纯Rust的目标,并显著增加部署和维护的复杂度(需要在容器中管理Python环境和iFind的二进制依赖)。这与我们“rustic”的确定性原则相悖。
|
||||
|
||||
**2. 最终决定**
|
||||
|
||||
- **暂停实现**: `ifind-provider-service` 的开发工作已**正式暂停**。
|
||||
- **更新路线图**: 在主路线图中,此任务已被标记为“已暂停/待独立规划”。
|
||||
- **未来方向**: 当项目进入下一阶段时,我们将启动一个独立的任务来**专门调研其HTTP API**,并基于调研结果决定最终的实现策略。
|
||||
|
||||
---
|
||||
|
||||
## 附录B: 业务逻辑模块迁移分析
|
||||
|
||||
本附录用于分析`backend/app/services/`中包含的核心业务逻辑,并为将其迁移至Rust服务制定策略。
|
||||
|
||||
### B.1 `analysis_client.py` & `company_profile_client.py`
|
||||
|
||||
- **核心功能**: 这两个模块是AI分析的核心,负责与大语言模型(如Gemini)API进行交互。
|
||||
- `analysis_client.py`: 提供一个**通用**的分析框架,可以根据不同的`prompt_template`执行任意类型的分析。它还包含一个`SafeFormatter`来安全地填充模板。
|
||||
- `company_profile_client.py`: 是一个**特化**的版本,包含了用于生成公司简介的、具体的、硬编码的长篇Prompt。
|
||||
|
||||
- **迁移策略**:
|
||||
1. **统一并重写为 `report-generator-service`**: 这两个模块的功能应被合并并迁移到一个全新的Rust微服务——`report-generator-service`中。
|
||||
2. **订阅事件**: 该服务将订阅Message Bus上的数据就绪事件(如`FinancialsPersistedEvent`),而不是被HTTP直接调用。
|
||||
3. **Prompt管理**: 硬编码在`company_profile_client.py`中的Prompt,以及`analysis_client.py`所依赖的、从`analysis-config.json`加载的模板,都应该由`report-generator-service`统一管理。在初期,可以从配置文件加载;未来,可以由Rust版的`config-service-rs`提供。
|
||||
4. **复刻`SafeFormatter`**: Python版本中用于安全填充模板的`SafeFormatter`逻辑需要在Rust中被等价复刻,以确保在上下文不完整时系统的健壮性。
|
||||
5. **AI客户端**: 使用`reqwest`或其他HTTP客户端库在Rust中重新实现与大模型API的交互逻辑。
|
||||
|
||||
- **结论**: 迁移**完全可行**。核心工作是将Python中的Prompt管理和API调用逻辑,用Rust的异步方式重写。这将使AI分析任务成为一个独立的、可扩展的、事件驱动的后台服务。
|
||||
|
||||
---
|
||||
|
||||
### B.2 `config_manager.py`
|
||||
|
||||
- **核心功能**: 作为Python `backend`内部的一个组件,它负责从`config-service`拉取配置,并与本地`config.json`文件进行合并。它还包含了测试各种配置有效性的逻辑(如测试数据库连接、Tushare Token等)。
|
||||
|
||||
- **迁移策略**:
|
||||
- **功能分散化**: `ConfigManager`本身不会作为一个独立的Rust服务存在,它的功能将被**分散**到每个需要它的微服务中。
|
||||
- **配置拉取**: 每个Rust微服务(`api-gateway`, `tushare-provider`等)在启动时,都将负责**独立地**从环境变量或未来的`config-service-rs`中获取自己的配置。我们为每个服务编写的`config.rs`模块已经实现了这一点。
|
||||
- **配置测试逻辑**: 测试配置的逻辑(如`_test_database`, `_test_tushare`)非常有用,但不属于运行时功能。这些逻辑可以被迁移到:
|
||||
1. **独立的CLI工具**: 创建一个Rust CLI工具,专门用于测试和验证整个系统的配置。
|
||||
2. **服务的`/health`端点**: 在每个服务的`/health`检查中,可以包含对其依赖服务(数据库、外部API)连通性的检查,从而在运行时提供健康状况反馈。
|
||||
|
||||
- **结论**: `ConfigManager`的功能将被**“肢解”并吸收**到新的Rust微服务生态中,而不是直接迁移。
|
||||
|
||||
---
|
||||
|
||||
### B.3 `data_persistence_client.py`
|
||||
|
||||
- **核心功能**: 这是一个HTTP客户端,用于让Python `backend`与其他微服务(`data-persistence-service`)通信。
|
||||
|
||||
- **迁移策略**:
|
||||
- **模式复用**: 这个模块本身就是我们新架构模式的一个成功范例。
|
||||
- **Rust等价实现**: 我们在`alphavantage-provider-service`中创建的`persistence.rs`客户端,以及即将在`api-gateway`和`report-generator-service`中创建的类似客户端,正是`data_persistence_client.py`的Rust等价物。
|
||||
- **最终废弃**: 当Python `backend`最终被下线时,这个客户端模块也将随之被废弃。
|
||||
|
||||
- **结论**: 该模块**无需迁移**,其设计思想已被我们的Rust服务所采纳和实现。
|
||||
@ -1,245 +0,0 @@
|
||||
---
|
||||
status: "Active"
|
||||
date: "2025-11-17"
|
||||
author: "AI 助手"
|
||||
---
|
||||
|
||||
# 设计文档:可配置的分析模板与编排器
|
||||
|
||||
## 1. 概述与目标
|
||||
|
||||
### 1.1. 问题陈述
|
||||
|
||||
我们当前基于 Rust 的后端缺少执行智能、多步骤财务分析所需的核心业务逻辑。`report-generator-service` 作为此逻辑的载体,其内部实现尚不完整。更重要的是,当前的系统设计缺少一个清晰的、可扩展的方式来管理和复用成套的分析流程,并且在配置初始化方面存在对本地文件的依赖,这不符合我们健壮的系统设计原则。
|
||||
|
||||
### 1.2. 目标
|
||||
|
||||
本任务旨在我们的 Rust 微服务架构中,设计并实现一个以**分析模板集(Analysis Template Sets)**为核心的、健壮的、可配置的**分析模块编排器**。该系统将允许我们创建、管理和执行多套独立的、包含复杂依赖关系的分析工作流。
|
||||
|
||||
为达成此目标,需要完成以下任务:
|
||||
1. **引入分析模板集**:在系统顶层设计中引入“分析模板集”的概念,每个模板集包含一套独立的分析模块及其配置。
|
||||
2. **实现前端模板化管理**:在前端配置中心实现对“分析模板集”的完整 CRUD 管理,并允许在每个模板集内部对分析模块进行 CRUD 管理。
|
||||
3. **构建健壮的后端编排器**:在 `report-generator-service` 中实现一个能够执行指定分析模板集的后端编排器,该编排器需基于拓扑排序来处理模块间的依赖关系。
|
||||
4. **实现无文件依赖的数据初始化**:通过在服务二进制文件中嵌入默认配置的方式,实现系统首次启动时的数据播种(Seeding),彻底移除对本地配置文件的依赖。
|
||||
|
||||
## 2. 新数据模型 (`common-contracts`)
|
||||
|
||||
为了支持“分析模板集”的概念,我们需要定义新的数据结构。
|
||||
|
||||
```rust
|
||||
// common-contracts/src/config_models.rs
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// 整个系统的分析模板配置,作为顶级对象存储在数据库中
|
||||
// Key: 模板ID (e.g., "standard_fundamentals")
|
||||
pub type AnalysisTemplateSets = HashMap<String, AnalysisTemplateSet>;
|
||||
|
||||
// 单个分析模板集,代表一套完整的分析流程
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct AnalysisTemplateSet {
|
||||
pub name: String, // 人类可读的模板名称, e.g., "标准基本面分析"
|
||||
// 该模板集包含的所有分析模块
|
||||
// Key: 模块ID (e.g., "fundamental_analysis")
|
||||
pub modules: HashMap<String, AnalysisModuleConfig>,
|
||||
}
|
||||
|
||||
// 单个分析模块的配置 (与之前定义保持一致)
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct AnalysisModuleConfig {
|
||||
pub name: String,
|
||||
pub provider_id: String,
|
||||
pub model_id: String,
|
||||
pub prompt_template: String,
|
||||
// 依赖关系列表,其中的字符串必须是同一个模板集内其他模块的ID
|
||||
pub dependencies: Vec<String>,
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 系统架构与数据流
|
||||
|
||||
### 3.1. 高层数据流
|
||||
|
||||
1. **配置流程**:
|
||||
* **用户** 在 **前端** 与配置页面交互,创建或修改一个“分析模板集”。
|
||||
* **前端** 向 **API 网关** 发送 `PUT /api/v1/configs/analysis_template_sets` 请求。
|
||||
* **API 网关** 将请求代理至 **数据持久化服务**,由其将序列化后的 `AnalysisTemplateSets` 对象完整保存到数据库中。
|
||||
|
||||
2. **执行流程**:
|
||||
* **用户** 在 **前端** 选择一个**分析模板集**,然后为特定的股票代码触发分析。
|
||||
* **前端** 向 **API 网关** 发送 `POST /api/v1/analysis-requests/{symbol}` 请求,请求体中包含所选的 `template_id`。
|
||||
* **API 网关** 验证请求,并向 **NATS 消息总线** 发布一条包含 `symbol`, `template_id` 和 `request_id` 的 `GenerateReportCommand` 消息。
|
||||
* **报告生成服务** 订阅该消息,并根据 `template_id` 启动指定的编排工作流。
|
||||
|
||||
## 4. 前端实施计划 (`/config` 页面)
|
||||
|
||||
前端配置页面需要重构为两级结构:
|
||||
|
||||
1. **第一级:模板集管理**
|
||||
* 显示一个包含所有“分析模板集”的列表。
|
||||
* 提供“创建新模板集”、“重命名”、“删除模板集”的功能。
|
||||
* 用户选择一个模板集后,进入第二级管理界面。
|
||||
|
||||
2. **第二级:分析模块管理 (在选定的模板集内)**
|
||||
* **主界面**: 进入模板集后,主界面将以列表形式展示该模板集内所有的分析模块。每个模块将以一个独立的“卡片”形式呈现。
|
||||
* **创建 (Create)**:
|
||||
* 在模块列表的顶部或底部,将设置一个“新增分析模块”按钮。
|
||||
* 点击后,将展开一个表单,要求用户输入新模块的**模块ID**(唯一的、机器可读的英文标识符)和**模块名称**(人类可读的显示名称)。
|
||||
* **读取 (Read)**:
|
||||
* 每个模块卡片默认会显示其**模块名称**和**模块ID**。
|
||||
* 卡片可以被展开,以显示其详细配置。
|
||||
* **更新 (Update)**:
|
||||
* 在展开的模块卡片内,所有配置项均可编辑:
|
||||
* **LLM Provider**: 一个下拉菜单,选项为系统中所有已配置的LLM供应商。
|
||||
* **Model**: 一个级联下拉菜单,根据所选的Provider动态加载其可用模型。
|
||||
* **提示词模板**: 一个多行文本输入框,用于编辑模块的核心Prompt。
|
||||
* **依赖关系**: 一个复选框列表,该列表**仅显示当前模板集内除本模块外的所有其他模块**,用于勾选依赖项。
|
||||
* **删除 (Delete)**:
|
||||
* 每个模块卡片的右上角将设置一个“删除”按钮。
|
||||
* 点击后,会弹出一个确认对话框,防止用户误操作。
|
||||
|
||||
## 6. 数据库与数据结构设计
|
||||
|
||||
为了支撑上述功能,我们需要在 `data-persistence-service` 中明确两个核心的数据存储模型:一个用于存储**配置**,一个用于存储**结果**。
|
||||
|
||||
### 6.1. 配置存储:`system_config` 表
|
||||
|
||||
我们将利用现有的 `system_config` 表来存储整个分析模板集的配置。
|
||||
|
||||
- **用途**: 作为所有分析模板集的“单一事实来源”。
|
||||
- **存储方式**:
|
||||
- 表中的一条记录。
|
||||
- `config_key` (主键): `analysis_template_sets`
|
||||
- `config_value` (类型: `JSONB`): 存储序列化后的 `AnalysisTemplateSets` (即 `HashMap<String, AnalysisTemplateSet>`) 对象。
|
||||
- **对应数据结构 (`common-contracts`)**: 我们在第2节中定义的 `AnalysisTemplateSets` 类型是此记录的直接映射。
|
||||
|
||||
### 6.2. 结果存储:`analysis_results` 表 (新)
|
||||
|
||||
为了存储每次分析工作流执行后,各个模块生成的具体内容,我们需要一张新表。
|
||||
|
||||
- **表名**: `analysis_results`
|
||||
- **用途**: 持久化存储每一次分析运行的产出,便于历史追溯和未来查询。
|
||||
- **SQL Schema**:
|
||||
```sql
|
||||
CREATE TABLE analysis_results (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
request_id UUID NOT NULL, -- 关联单次完整分析请求的ID
|
||||
symbol VARCHAR(32) NOT NULL, -- 关联的股票代码
|
||||
template_id VARCHAR(64) NOT NULL, -- 使用的分析模板集ID
|
||||
module_id VARCHAR(64) NOT NULL, -- 产出此结果的模块ID
|
||||
content TEXT NOT NULL, -- LLM生成的分析内容
|
||||
meta_data JSONB, -- 存储额外元数据 (e.g., model_name, tokens, elapsed_ms)
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- 建立索引以优化查询
|
||||
INDEX idx_analysis_results_request_id (request_id),
|
||||
INDEX idx_analysis_results_symbol_template (symbol, template_id)
|
||||
);
|
||||
```
|
||||
- **对应数据结构 (`common-contracts`)**:
|
||||
```rust
|
||||
// common-contracts/src/dtos.rs
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct NewAnalysisResult {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: String,
|
||||
pub template_id: String,
|
||||
pub module_id: String,
|
||||
pub content: String,
|
||||
pub meta_data: serde_json::Value,
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 后端实施计划
|
||||
|
||||
### 5.1. `data-persistence-service`
|
||||
|
||||
- **数据初始化 (无文件依赖)**: 实现一次性的、基于硬编码的启动逻辑。
|
||||
1. 在 `data-persistence-service` 的代码中,将 `config/analysis-config.json` 的内容硬编码为一个 Rust 字符串常量。
|
||||
2. 在服务启动时,检查 `system_config` 表中是否存在键为 `analysis_template_sets` 的记录。
|
||||
3. 如果**不存在**,则:
|
||||
a. 解析硬编码的字符串,构建一个默认的 `AnalysisTemplateSet` (例如,ID为 `default`, 名称为 “默认分析模板”)。
|
||||
b. 将这个默认模板集包装进一个 `AnalysisTemplateSets` 的 HashMap 中。
|
||||
c. 将序列化后的 `AnalysisTemplateSets` 对象写入数据库。
|
||||
4. 此机制确保系统在首次部署时,无需任何外部文件即可拥有一套功能完备的默认分析模板。
|
||||
- **新职责**: 实现对 `analysis_results` 表的CRUD操作API。
|
||||
|
||||
### 5.2. `api-gateway`
|
||||
|
||||
- **端点更新**: `POST /api/v1/analysis-requests/{symbol}`。
|
||||
- **逻辑变更**:
|
||||
* 该端点现在需要从请求体中解析出 `template_id`。
|
||||
* 它构建的 `GenerateReportCommand` 消息中,必须包含 `template_id` 字段。
|
||||
|
||||
### 5.3. `report-generator-service` (核心任务)
|
||||
|
||||
`worker.rs` 中的编排逻辑需要进行如下调整和实现:
|
||||
|
||||
1. **消息消费者**: 订阅的 `GenerateReportCommand` 消息现在会包含 `template_id`。
|
||||
|
||||
2. **编排逻辑 (`run_report_generation_workflow`)**:
|
||||
* **获取配置**: 从 `data-persistence-service` 获取完整的 `AnalysisTemplateSets` 对象。
|
||||
* **选择模板**: 根据传入的 `template_id`,从 `AnalysisTemplateSets` 中选择出本次需要执行的 `AnalysisTemplateSet`。如果找不到,则记录错误并终止。
|
||||
* **构建依赖图**: 使用所选模板集中的 `modules` 来构建有向图。强烈推荐使用 `petgraph` crate。
|
||||
* **拓扑排序**: 对该图执行拓扑排序,**必须包含循环检测**。
|
||||
* **顺序执行**: 遍历排序后的模块列表,后续的上下文注入、LLM调用和结果持久化逻辑与之前设计一致,但操作范围仅限于当前模板集内的模块。
|
||||
|
||||
3. **补全缺失逻辑**:
|
||||
* **实现结果持久化**: 调用 `data-persistence-service` 提供的API,将每个模块生成的 `NewAnalysisResult` 存入 `analysis_results` 表。
|
||||
|
||||
## 6. 未来工作
|
||||
|
||||
### 6.1. 演进至 "Deep Research" 模块
|
||||
|
||||
此设计为未来的 "Deep Research" 模块演进奠定了坚实的基础。当该模块准备就绪时,我们可以创建一个新的“分析模板集”,其中的某些模块(如 `news_analysis`)将不再直接调用 LLM,而是调用 Deep Research 服务。Deep Research 服务将执行复杂的数据挖掘,并将高度精炼的结果返回给编排器,再由编排器注入到后续的 LLM 调用中,从而实现“数据驱动”的分析范式。
|
||||
|
||||
### 6.2. 引入工具调用框架 (Tool Calling Framework)
|
||||
|
||||
为了以一种更通用和可扩展的方式向提示词模板中注入多样化的上下文数据,我们规划引入“工具调用”框架。
|
||||
|
||||
- **概念**: “工具”是指一段独立的、用于获取特定类型数据的程序(例如,获取财务数据、获取实时股价、获取最新新闻等)。
|
||||
- **配置**: 在前端的模块配置界面,除了依赖关系外,我们还将为每个模块提供一个“可用工具”的复选框列表。用户可以为模块勾选需要调用的一个或多个工具。
|
||||
- **执行**:
|
||||
1. 在 `report-generator-service` 的编排器执行一个模块前,它会先检查该模块配置中启用了哪些“工具”。
|
||||
2. 编排器将按顺序执行这些工具。
|
||||
3. 每个工具的输出(例如,格式化为Markdown的财务数据表格)将被注入到一个统一的上下文字段中。
|
||||
- **首个工具**: 我们设想的第一个工具就是 **`财务数据注入工具`**。它将负责获取并格式化财务报表,其实现逻辑与本文档旧版本中描述的“核心逻辑细化”部分一致。
|
||||
|
||||
通过此框架,我们可以将数据注入的逻辑与编排器的核心逻辑解耦,使其更易于维护和扩展。**此项为远期规划,不在本轮实施范围之内。**
|
||||
|
||||
## 8. 实施清单 (Step-by-Step To-do List)
|
||||
|
||||
以下是为完成本项目所需的、按顺序排列的开发任务清单。
|
||||
|
||||
### 阶段一:数据模型与持久化层准备
|
||||
|
||||
- [x] **任务 1.1**: 在 `common-contracts` crate 中,创建或更新 `src/config_models.rs`,定义 `AnalysisTemplateSets`, `AnalysisTemplateSet`, `AnalysisModuleConfig` 等新的数据结构。
|
||||
- [x] **任务 1.2**: 在 `common-contracts` crate 中,创建或更新 `src/dtos.rs`,定义用于写入分析结果的 `NewAnalysisResult` 数据传输对象 (DTO)。
|
||||
- [x] **任务 1.3**: 在 `data-persistence-service` 中,创建新的数据库迁移文件 (`migrations/`),用于新增 `analysis_results` 表,其 schema 遵循本文档第6.2节的定义。
|
||||
- [x] **任务 1.4**: 在 `data-persistence-service` 中,实现 `analysis_results` 表的 CRUD API (至少需要 `create` 方法)。
|
||||
- [x] **任务 1.5**: 在 `data-persistence-service` 中,实现数据播种(Seeding)逻辑:在服务启动时,将硬编码的默认分析模板集写入数据库(如果尚不存在)。
|
||||
|
||||
### 阶段二:后端核心逻辑实现 (`report-generator-service`)
|
||||
|
||||
- [x] **任务 2.1**: 为 `report-generator-service` 添加 `petgraph` crate 作为依赖,用于构建和处理依赖图。
|
||||
- [x] **任务 2.2**: 重构 `worker.rs` 中的 `run_report_generation_workflow` 函数,使其能够接收包含 `template_id` 的消息。
|
||||
- [x] **任务 2.3**: 在 `worker.rs` 中,**实现完整的拓扑排序算法**,用以替代当前简陋的循环实现。此算法必须包含循环依赖检测。
|
||||
- [x] **任务 2.4**: 更新编排器逻辑,使其能够根据 `template_id` 从获取到的 `AnalysisTemplateSets` 中选择正确的工作流进行处理。
|
||||
- [x] **任务 2.5**: 实现调用 `data-persistence-service` 的逻辑,将每个模块成功生成的 `NewAnalysisResult` 持久化到 `analysis_results` 表中。
|
||||
|
||||
### 阶段三:服务集成与端到端打通
|
||||
|
||||
- [x] **任务 3.1**: 在 `api-gateway` 中,新增 `POST /api/v1/analysis-requests/{symbol}` 端点。
|
||||
- [x] **任务 3.2**: 在 `api-gateway` 的新端点中,实现接收前端请求(包含 `template_id`),并向 NATS 发布 `GenerateReportCommand` 消息的逻辑。
|
||||
- [x] **任务 3.3**: 在 `report-generator-service` 中,更新其 NATS 消费者,使其能够正确订阅和解析新的 `GenerateReportCommand` 消息。
|
||||
- [x] **任务 3.4**: 进行端到端集成测试,确保从前端触发的请求能够正确地启动 `report-generator-service` 并执行完整的分析流程(此时可不关心前端UI)。
|
||||
|
||||
### 阶段四:前端 UI 实现
|
||||
|
||||
- [x] **任务 4.1**: 重构 `frontend/src/app/config/page.tsx` 页面,实现两级管理结构:先管理“分析模板集”。
|
||||
- [x] **任务 4.2**: 实现“分析模板集”的创建、重命名和删除功能,并调用对应的后端API。
|
||||
- [x] **任务 4.3**: 实现模板集内部的“分析模块”管理界面,包括模块的创建、更新(所有字段)和删除功能。
|
||||
- [x] **任务 4.4**: 确保在分析请求发起的页面(例如主查询页面),用户可以选择使用哪个“分析模板集”来执行分析。
|
||||
- [x] **任务 4.5**: 更新前端调用 `api-gateway` 的逻辑,在分析请求的 body 中附带上用户选择的 `template_id`。
|
||||
@ -1,98 +0,0 @@
|
||||
# 任务文档:配置管理重构——统一API凭证管理
|
||||
|
||||
- **状态**: Active
|
||||
- **创建日期**: 2025-11-17
|
||||
- **负责人**: @AI-Assistant
|
||||
- **审查人**: @lv
|
||||
|
||||
---
|
||||
|
||||
## 1. 背景与目标
|
||||
|
||||
### 1.1. 当前问题
|
||||
|
||||
当前系统对外部服务(如 Tushare, Finnhub)API Token 的管理方式存在两个主要问题:
|
||||
|
||||
1. **配置方式分裂**:
|
||||
- **敏感凭证 (API Tokens)**: 通过启动时的**环境变量**注入。这种方式虽然安全,但缺乏灵活性,每次修改都需要重新部署或重启服务。
|
||||
- **业务逻辑配置 (AI模型选择等)**: 通过**数据库**统一管理,并支持UI动态调整。
|
||||
- 这种分裂的管理模式增加了系统的运维复杂性,与我们追求的“单一事实源”架构理念不符。
|
||||
|
||||
2. **服务韧性不足**:
|
||||
- 依赖环境变量的服务采取“快速失败” (Fail-Fast) 策略。如果启动时未提供有效的 API Token,服务会立即崩溃退出。
|
||||
- 这种模式虽然能尽早暴露问题,但在一个动态的、持续运行的系统中显得过于“僵硬”。我们期望的行为是:服务在缺少非核心配置时,应能进入一个“降级”状态,待配置就绪后再自动恢复工作,而不是直接停止运行。
|
||||
|
||||
### 1.2. 改造目标
|
||||
|
||||
本次重构旨在将所有外部服务的 API Token 配置,从环境变量迁移到数据库中,实现与业务逻辑配置的统一管理。具体目标如下:
|
||||
|
||||
- **统一配置源**: 将 `system_config` 数据库表作为所有可变配置(包括API Tokens)的唯一事实源。
|
||||
- **提升易用性**: 允许用户通过前端UI界面,集中管理和更新所有数据源的 API Token。
|
||||
- **增强服务韧性**: 改造数据提供商服务,使其在缺少 API Token 时不会崩溃,而是进入“降级模式”,并能在 Token 被提供后自动恢复正常工作。
|
||||
- **简化部署**: 移除对多个环境变量的依赖,使服务的部署和运维过程更加简洁。
|
||||
|
||||
---
|
||||
|
||||
## 2. 实施方案
|
||||
|
||||
本次改造将遵循“后端 -> 服务 -> 前端”的顺序分层实施,确保每一步都有坚实的基础。
|
||||
|
||||
### 2.1. 数据模型与持久化层
|
||||
|
||||
我们将通过复用 `system_config` 表中现有的 `(config_key, config_value)` 存储模式,来扩展配置管理的能力,使其能够安全地存储和检索数据源的配置。
|
||||
|
||||
1. **定义数据结构**: 在 `common-contracts` 共享库中,定义一个清晰的、用于描述数据源配置的 `DataSourceConfig` 结构体。它将包含 `provider_id`, `api_token`, `api_url` 等字段。
|
||||
2. **复用现有表结构**: 我们将向 `system_config` 表中插入一条新的记录,其 `config_key` 固定为 `"data_sources"`,并将所有数据源的配置集合(一个 `HashMap<String, DataSourceConfig>`)序列化后存入该记录的 `config_value` 字段中。
|
||||
3. **扩展API**: 在 `data-persistence-service` 中增加新的 HTTP API 端点,用于对数据源配置进行增、删、改、查(CRUD)操作。例如:
|
||||
- `GET /api/v1/configs/data-sources`: 获取所有数据源的配置列表。
|
||||
- `PUT /api/v1/configs/data-sources`: 创建或更新所有数据源的配置。
|
||||
|
||||
### 2.2. 微服务改造:引入“降级与恢复”模式
|
||||
|
||||
这是本次重构的核心。所有依赖外部 API Token 的数据提供商服务 (`finnhub`, `tushare`, `alphavantage`) 都将进行如下改造:
|
||||
|
||||
1. **移除启动时检查**: 删除 `config.rs` 中检查环境变量并导致程序崩溃的逻辑。
|
||||
2. **引入内部状态机**: 每个服务内部将维护一个状态(例如 `State<ServiceOperationalStatus>`),包含 `Active` 和 `Degraded(reason: String)` 两种状态。
|
||||
3. **动态配置加载**: 服务将不再从环境变量读取 Token,而是在内部启动一个**后台任务**(轮询器),该任务会:
|
||||
- 在服务启动时,以及之后每隔一段时间(例如 60 秒),调用 `data-persistence-service` 的新 API 来获取自己的配置。
|
||||
- 如果成功获取到有效的 Token,则更新服务内部的 API 客户端,并将服务状态设置为 `Active`。此时,服务正常订阅和处理来自 NATS 的消息。
|
||||
- 如果未能获取 Token(或 Token 为空),则将服务状态设置为 `Degraded`,并附上原因(如 "API Token not configured")。在这种状态下,服务**不会**订阅 NATS 消息队列,避免接收无法处理的任务。
|
||||
4. **更新健康检查**: 服务的 `/health` 端点将反映其内部状态。当处于 `Degraded` 状态时,健康检查接口应返回相应的状态码和信息,以便监控系统能够清晰地了解服务当前是否可用。
|
||||
|
||||
### 2.3. 前端UI实现
|
||||
|
||||
为了让用户能够方便地管理这些配置,我们将在前端进行如下调整:
|
||||
|
||||
1. **创建新UI组件**: 在 `/config` 页面,新增一个名为“数据源配置”的管理面板。
|
||||
2. **功能实现**: 该面板将提供一个表单或列表,允许用户:
|
||||
- 查看当前所有数据源(Tushare, Finnhub 等)的配置状态。
|
||||
- 为每个数据源输入或更新其 API Token。
|
||||
- 保存更改。点击保存后,前端将调用 `data-persistence-service` 的新 API,将更新后的配置持久化到数据库中。
|
||||
|
||||
---
|
||||
|
||||
## 3. 详细任务清单
|
||||
|
||||
### 第一阶段:后端基础
|
||||
|
||||
- [x] ~~**任务 BE-1**: 在 `common-contracts` 中定义 `DataSourceConfig` 和 `DataSourceProvider` 等共享数据结构。~~
|
||||
- [x] ~~**任务 BE-3**: 在 `data-persistence-service` 中实现对数据源配置的 CRUD 业务逻辑。~~
|
||||
- [x] ~~**任务 BE-4**: 在 `data-persistence-service` 中暴露 `GET /api/v1/configs/data-sources` 和 `PUT /api/v1/configs/data-sources` 这两个 API 端点。~~
|
||||
|
||||
### 第二阶段:微服务改造
|
||||
|
||||
- [x] ~~**任务 SVC-1**: **(Finnhub)** 重构 `finnhub-provider-service`:~~
|
||||
- [x] ~~移除 `config.rs` 中的 `FINNHUB_API_KEY` 环境变量加载逻辑。~~
|
||||
- [x] ~~实现内部状态机 (`Active`/`Degraded`) 和动态配置轮询器。~~
|
||||
- [x] ~~修改 `/health` 端点以反映内部状态。~~
|
||||
- [x] ~~调整 NATS 消息订阅逻辑,只在 `Active` 状态下进行订阅。~~
|
||||
- [x] ~~**任务 SVC-2**: **(Tushare)** 以 `finnhub-provider-service` 为模板,对 `tushare-provider-service` 进行相同的重构。~~
|
||||
- [x] ~~**任务 SVC-3**: **(Alphavantage)** 以 `finnhub-provider-service` 为模板,对 `alphavantage-provider-service` 进行相同的重构。~~
|
||||
- [x] ~~**任务 SVC-4**: **(审查)** 审查 `report-generator-service` 的 LLM 配置加载逻辑,确保其与新的动态配置模式在设计理念上保持一致。~~
|
||||
|
||||
### 第三阶段:前端实现
|
||||
|
||||
- [x] **任务 FE-1**: 在 `/config` 页面设计并实现“数据源配置”UI 组件。
|
||||
- [x] **任务 FE-2**: 实现 `useApi.ts` 中用于获取和更新数据源配置的 hooks。
|
||||
- [x] **任务 FE-3**: 将 UI 组件与 API hooks 连接,完成前端的完整功能。
|
||||
- [x] **任务 FE-4**: 调整 `/llm-config` 页面,使其在UI/UX风格上与新的“数据源配置”面板保持一致性。
|
||||
@ -1,62 +0,0 @@
|
||||
# [待处理] 实现 AlphaVantage 服务连接测试功能
|
||||
|
||||
**日期**: 2025-11-18
|
||||
|
||||
**状态**: 待处理 (Pending)
|
||||
|
||||
**负责人**: AI Assistant
|
||||
|
||||
## 1. 需求背景
|
||||
|
||||
目前,系统配置中心的数据源配置页面中,Tushare 和 Finnhub 模块均提供了“测试”按钮,用于验证用户填写的 API Key 和 URL 是否有效。然而,AlphaVantage 模块缺少此功能。由于 AlphaVantage 的数据是通过 MCP (Meta-protocol Computation Platform) 协议间接调用的,其连接健康状态的检查尤为重要。
|
||||
|
||||
本任务旨在为 AlphaVantage 模块添加一个功能完善的“测试”按钮,以提升系统的健壮性和用户体验。
|
||||
|
||||
## 2. 技术方案与执行细节
|
||||
|
||||
该功能的实现需要贯穿前端、API网关和后端的 AlphaVantage 服务,形成一个完整的调用链路。
|
||||
|
||||
### 2.1. 前端 (Frontend)
|
||||
|
||||
* **文件**: `/frontend/src/app/config/page.tsx`
|
||||
* **任务**:
|
||||
1. **新增UI元素**: 在 AlphaVantage 配置卡片中,仿照其他服务,添加一个“测试 AlphaVantage”按钮。
|
||||
2. **创建事件处理器**:
|
||||
* 实现 `handleTestAlphaVantage` 函数。
|
||||
* 该函数将从组件的本地状态 (`localDataSources`) 中读取 `alphavantage` 的 `api_key` 和 `api_url` (此 URL 为 MCP Endpoint)。
|
||||
* 调用通用的 `handleTest('alphavantage', { ...config })` 函数,将请求发送至 Next.js 后端 API 路由。
|
||||
|
||||
### 2.2. Next.js API 路由
|
||||
|
||||
* **文件**: `/frontend/src/app/api/configs/test/route.ts` (推测)
|
||||
* **任务**:
|
||||
1. **新增处理分支**: 在 `POST` 请求处理逻辑中,为 `type: 'alphavantage'` 增加一个新的 `case`。
|
||||
2. **请求转发**: 该分支将把收到的测试请求(包含配置信息)原样转发到后端的 API 网关。
|
||||
|
||||
### 2.3. API 网关 (API Gateway)
|
||||
|
||||
* **文件**: `/services/api-gateway/src/api.rs` (或相关路由模块)
|
||||
* **任务**:
|
||||
1. **更新路由规则**: 修改处理配置测试的路由逻辑。
|
||||
2. **分发请求**: 当识别到请求类型为 `alphavantage` 时,将该请求精准地转发到 `alphavantage-provider-service` 的新测试接口。
|
||||
|
||||
### 2.4. AlphaVantage 服务 (alphavantage-provider-service)
|
||||
|
||||
这是实现测试逻辑的核心。
|
||||
|
||||
* **文件**: `/services/alphavantage-provider-service/src/api.rs` (或新建的模块)
|
||||
* **任务**:
|
||||
1. **创建新接口**: 在服务中创建一个新的 HTTP `POST /test` 接口,用于接收来自网关的测试请求。
|
||||
2. **实现核心测试逻辑**:
|
||||
* 接口从请求体中解析出 `api_url` 和 `api_key`。
|
||||
* **动态 MCP 客户端**: 使用传入的 `api_url` 动态地、临时地创建一个 MCP 客户端实例。这确保了测试的是用户当前输入的配置,而不是服务启动时加载的旧配置。
|
||||
* **调用 `list_capability`**: 利用此临时客户端,调用 MCP 服务标准工具集中的 `list_capability` 工具。`api_key` 将作为认证凭证传递给此调用。
|
||||
* **响应处理**:
|
||||
* **成功**: 如果 `list_capability` 调用成功返回,意味着 MCP Endpoint 可达、服务正常、API Key 有效。此时,接口返回 `{"success": true, "message": "MCP connection successful."}`。
|
||||
* **失败**: 如果调用过程中出现任何错误(网络问题、认证失败、超时等),接口将捕获异常并返回 `{"success": false, "message": "MCP connection failed: [具体错误信息]"}`。
|
||||
|
||||
## 3. 预期成果
|
||||
|
||||
* 用户可以在配置中心页面点击按钮来测试 AlphaVantage 的连接配置。
|
||||
* 系统能够通过调用 MCP 的 `list_capability` 接口,实时验证配置的有效性。
|
||||
* 前端能够清晰地展示测试成功或失败的结果,为用户提供明确的反馈。
|
||||
@ -1,130 +0,0 @@
|
||||
# 分析模板集成设计文档
|
||||
|
||||
## 1. 概述
|
||||
系统正在从单一、固定的分析配置架构向多模板架构迁移。目前,后端已支持 `AnalysisTemplateSets` 并能执行特定的模板。然而,前端在渲染报告标签页(Tabs)和触发分析时,仍然依赖于过时的 `AnalysisModulesConfig`(单一配置集)。
|
||||
|
||||
本文档概述了将“分析模板”完全集成到用户工作流中所需的变更,具体包括:
|
||||
1. **触发分析**:在启动新的分析任务时选择特定的模板。
|
||||
2. **报告展示**:根据分析所使用的模板,动态渲染标签页和内容。
|
||||
|
||||
## 2. 当前状态 vs. 目标状态
|
||||
|
||||
| 功能特性 | 当前状态 | 目标状态 |
|
||||
| :--- | :--- | :--- |
|
||||
| **配置管理** | `useAnalysisConfig` (过时,单一模块列表) | `useAnalysisTemplateSets` (多套具名模板) |
|
||||
| **触发分析** | `trigger(symbol, market)` (无模板选择) | `trigger(symbol, market, templateId)` |
|
||||
| **报告标签页** | 硬编码遍历过时的 `analysis_modules` keys | 根据报告使用的**特定模板**动态生成标签页 |
|
||||
| **模块名称** | 从全局默认配置获取 | 从所使用的特定模板配置中获取 |
|
||||
|
||||
## 3. 详细设计
|
||||
|
||||
### 3.1. 后端变更
|
||||
|
||||
#### 3.1.1. API Gateway (`api-gateway`)
|
||||
* **Endpoint**: `POST /api/data-requests`
|
||||
* **变更**: 更新请求 DTO (Data Transfer Object) 以接收可选参数 `template_id: String`。
|
||||
* **逻辑**: 将此 `template_id` 通过 `GenerateReportCommand` 向下传递给 `report-generator-service`。
|
||||
|
||||
#### 3.1.2. 数据持久化 / 报告数据
|
||||
* **需求**: 前端需要知道生成某份报告时具体使用了*哪个*模板,以便正确渲染标签页(包括标题和顺序)。
|
||||
* **变更**: 确保 `GET /api/financials/...` 或 `GET /api/reports/...` 的响应数据中,在 metadata 中包含 `template_id`。
|
||||
* **实现**: 前端 `route.ts` 聚合层通过查询 `analysis-results` 获取最新的 `template_id` 并注入到 `meta` 中。
|
||||
|
||||
### 3.2. 前端变更
|
||||
|
||||
#### 3.2.1. API Hooks (`useApi.ts`)
|
||||
* **`useDataRequest`**: 更新 `trigger` 函数签名:
|
||||
```typescript
|
||||
trigger(symbol: string, market: string, templateId?: string)
|
||||
```
|
||||
* **`useAnalysisTemplateSets`**: 确保此 hook 可用(目前代码中已存在)。
|
||||
|
||||
#### 3.2.2. 触发 UI (报告页侧边栏 / 查询页)
|
||||
* **组件**: 在“触发分析”按钮旁增加一个 `TemplateSelector` (选择框/下拉菜单)。
|
||||
* **数据源**: `useAnalysisTemplateSets`。
|
||||
* **默认值**: 自动选中第一个可用的模板,或者标记为 "default" 的模板。
|
||||
|
||||
#### 3.2.3. 报告页面 (`frontend/src/app/report/[symbol]/page.tsx`)
|
||||
这是最复杂的变更部分。我们需要重构标签页(Tabs)的生成逻辑。
|
||||
|
||||
1. **移除旧逻辑**:
|
||||
* 移除对 `useAnalysisConfig` (全局默认配置) 的依赖。
|
||||
* 弃用/移除 `runAnalysesSequentially` (旧的前端编排流程)。
|
||||
|
||||
2. **识别模板**:
|
||||
* 从获取到的财务/报告数据中读取 `template_id` (例如 `financials.meta.template_id` 或类似位置)。
|
||||
* **Strict Mode**: 如果缺失 `template_id`,则视为严重数据错误,前端直接报错停止渲染,**绝不进行默认值回退或自动推断**。
|
||||
|
||||
3. **动态标签页**:
|
||||
* 使用 `useAnalysisTemplateSets` 获取 `templateSets`。
|
||||
* 从 `templateSets[currentTemplateId].modules` 中推导出 `activeModules` 列表。
|
||||
* 遍历 `activeModules` 来生成 `TabsTrigger` 和 `TabsContent`。
|
||||
* **显示名称**: 使用 `moduleConfig.name`。
|
||||
* **排序**: 严格遵循模板中定义的顺序(或依赖顺序)。
|
||||
|
||||
### 3.3. 数据流
|
||||
|
||||
1. **用户**选择 "标准分析模板 V2" (Standard Analysis V2) 并点击 "运行"。
|
||||
2. **前端**调用 `POST /api/data-requests`,载荷为 `{ ..., template_id: "standard_v2" }`。
|
||||
3. **后端**使用 "standard_v2" 中定义的模块生成报告。
|
||||
4. **前端**轮询任务进度。
|
||||
5. **前端**获取完成的数据。数据包含元数据 `meta: { template_id: "standard_v2" }`。
|
||||
6. **前端**查询 "standard_v2" 的配置详情。
|
||||
7. **前端**渲染标签页:如 "公司简介"、"财务健康"(均来自 V2 配置)。
|
||||
|
||||
## 4. 实施步骤
|
||||
|
||||
1. **后端更新**:
|
||||
* 验证 `api-gateway` 是否正确传递 `template_id`。
|
||||
* 验证报告 API 是否在 metadata 中返回 `template_id`。
|
||||
|
||||
2. **前端 - 触发**:
|
||||
* 更新 `useDataRequest` hook。
|
||||
* 在 `ReportPage` 中添加 `TemplateSelector` 组件。
|
||||
|
||||
3. **前端 - 展示**:
|
||||
* 重构 `ReportPage` 以使用 `templateSets`。
|
||||
* 根据报告中的 `template_id` 动态计算 `analysisTypes`。
|
||||
|
||||
## 5. 待办事项列表 (To-Do List)
|
||||
|
||||
### Phase 1: 后端与接口 (Backend & API)
|
||||
- [x] **1.1 更新请求 DTO (api-gateway)**
|
||||
- 目标: `api-gateway` 的 `DataRequest` 结构体
|
||||
- 动作: 增加 `template_id` 字段 (Option<String>)
|
||||
- 验证: `curl` 请求带 `template_id` 能被解析
|
||||
- [x] **1.2 传递 Command (api-gateway -> report-service)**
|
||||
- 目标: `GenerateReportCommand` 消息
|
||||
- 动作: 确保 `template_id` 被正确透传到消息队列或服务调用中
|
||||
- [x] **1.3 验证报告元数据 (data-persistence)**
|
||||
- 目标: `GET /api/financials/...` 接口
|
||||
- 动作: 检查返回的 JSON 中 `meta` 字段是否包含 `template_id`
|
||||
- 备注: 已通过 frontend `route.ts` 聚合实现
|
||||
|
||||
### Phase 2: 前端逻辑 (Frontend Logic)
|
||||
- [x] **2.1 更新 API Hook**
|
||||
- 文件: `frontend/src/hooks/useApi.ts`
|
||||
- 动作: 修改 `useDataRequest` 的 `trigger` 方法签名,支持 `templateId` 参数
|
||||
- [x] **2.2 移除旧版依赖**
|
||||
- 文件: `frontend/src/app/report/[symbol]/page.tsx`
|
||||
- 动作: 移除 `useAnalysisConfig` 及相关旧版逻辑 (`runAnalysesSequentially`)
|
||||
|
||||
### Phase 3: 前端界面 (Frontend UI)
|
||||
- [x] **3.1 实现模板选择器**
|
||||
- 文件: `frontend/src/app/report/[symbol]/page.tsx` (侧边栏)
|
||||
- 动作: 添加 `<Select>` 组件,数据源为 `useAnalysisTemplateSets`
|
||||
- 逻辑: 默认选中第一个模板,点击"触发分析"时传递选中的 ID
|
||||
- [x] **3.2 动态渲染标签页**
|
||||
- 文件: `frontend/src/app/report/[symbol]/page.tsx` (主区域)
|
||||
- 动作:
|
||||
1. 从 `financials.meta.template_id` 获取当前报告的模板 ID
|
||||
2. 若 ID 缺失直接抛出错误 (Strict Mode)
|
||||
3. 根据 ID 从 `templateSets` 获取模块列表
|
||||
4. 遍历模块列表渲染 `<TabsTrigger>` 和 `<TabsContent>`
|
||||
5. 内容从 `useAnalysisResults` hook 获取
|
||||
|
||||
### Phase 4: 验证与清理 (Verification)
|
||||
- [ ] **4.1 端到端测试**
|
||||
- 动作: 创建新模板 -> 选择该模板触发分析 -> 验证报告页只显示该模板定义的模块
|
||||
- [x] **4.2 代码清理**
|
||||
- 动作: 删除未使用的旧版配置 Hook 和类型定义
|
||||
@ -1,90 +0,0 @@
|
||||
# 分析流程优化与数据缓存机制修复
|
||||
|
||||
## 1. 问题背景 (Problem Statement)
|
||||
|
||||
根据系统日志分析与代码排查,当前系统存在以下关键问题:
|
||||
|
||||
1. **数据源重复请求 (Missing Cache Logic)**:
|
||||
* `yfinance-provider-service` (及其他数据服务) 在接收到任务指令时,未检查本地数据库是否存在有效数据,而是直接向外部 API 发起请求。
|
||||
* 这导致每次用户点击,都会触发耗时约 1.5s 的外部抓取,既慢又浪费资源。
|
||||
|
||||
2. **任务依赖与执行时序错乱 (Race Condition)**:
|
||||
* `api-gateway` 在接收到请求时,**同时**触发了数据抓取 (`DATA_FETCH_QUEUE`) 和分析报告生成 (`ANALYSIS_COMMANDS_QUEUE`)。
|
||||
* 导致 `report-generator-service` 在数据还没抓回来时就启动了,读到空数据(或旧数据)后瞬间完成,导致 Token 消耗为 0,报告内容为空。
|
||||
|
||||
3. **前端无法展示数据 (Frontend Data Visibility)**:
|
||||
* **根本原因**: API Gateway 路由缺失与路径映射错误。
|
||||
* 前端 BFF (`frontend/src/app/api/financials/...`) 试图请求 `${BACKEND_BASE}/market-data/financial-statements/...`。
|
||||
* 然而,`api-gateway` **并未暴露** 此路由(仅暴露了 `/v1/companies/{symbol}/profile`)。
|
||||
* 因此,前端获取财务数据的请求全部 404 失败,导致界面始终显示 "暂无可展示的数据",即使用户多次运行也无效。
|
||||
|
||||
## 2. 目标 (Goals)
|
||||
|
||||
1. **实现"读写穿透"缓存策略**: 数据服务在抓取前必须先检查本地数据库数据的时效性。
|
||||
2. **构建事件驱动的依赖工作流**: 分析服务必须严格等待数据服务完成后触发(通过 NATS 事件链)。
|
||||
3. **修复数据访问层**: 确保 API Gateway 正确暴露并转发财务数据接口,使前端可见。
|
||||
4. **定义数据时效性标准**: 针对不同类型数据实施差异化的缓存过期策略。
|
||||
|
||||
## 3. 详细技术方案 (Technical Plan)
|
||||
|
||||
### 3.1. 数据时效性与缓存策略 (Data Freshness Policy)
|
||||
|
||||
针对基本面分析场景,不同数据的更新频率和时效性要求如下:
|
||||
|
||||
| 数据类型 | 内容示例 | 更新频率 | 建议 TTL (缓存有效期) | 更新策略 |
|
||||
| :--- | :--- | :--- | :--- | :--- |
|
||||
| **公司概况 (Profile)** | 名称、行业、简介、高管 | 极低 (年/不定期) | **30 天** | **Stale-While-Revalidate (SWR)**<br>过期后先返回旧数据,后台异步更新。 |
|
||||
| **财务报表 (Financials)** | 营收、利润、资产负债 (季/年) | 季度 (4/8/10月) | **24 小时** | **Cache-Aside**<br>每次请求先查库。若 `updated_at > 24h`,则强制 Fetch;否则直接返回库中数据。<br>*注: 对于同一天内的重复请求,将直接命中缓存,0延迟。* |
|
||||
| **市场数据 (Market Data)** | PE/PB/市值/价格 | 实时/日频 | **1 小时** | 基本面分析不需要秒级价格。取最近一小时内的快照即可。若需实时价格,使用专用实时接口。 |
|
||||
|
||||
### 3.2. 数据服务层 (Providers)
|
||||
* **涉及服务**: `yfinance-provider-service`, `alphavantage-provider-service`, `finnhub-provider-service`.
|
||||
* **逻辑变更**:
|
||||
1. 订阅 `FetchCommand`。
|
||||
2. **Step 1 (Check DB)**: 调用 `PersistenceClient` 获取目标 Symbol 数据的 `updated_at`。
|
||||
3. **Step 2 (Decision)**:
|
||||
* 若 `now - updated_at < TTL`: **Hit Cache**. Log "Cache Hit", 跳过外部请求,直接进入 Step 4。
|
||||
* 若数据不存在 或 `now - updated_at > TTL`: **Miss Cache**. Log "Cache Miss", 执行外部 API 抓取。
|
||||
4. **Step 3 (Upsert)**: 将抓取的数据存入 DB (Update `updated_at` = now)。
|
||||
5. **Step 4 (Publish Event)**: 发布 `CompanyDataPersistedEvent` (包含 symbol, data_types: ["profile", "financials"])。
|
||||
|
||||
### 3.3. 工作流编排 (Workflow Orchestration)
|
||||
* **API Gateway**:
|
||||
* 移除 `POST /data-requests` 中自动触发 Analysis 的逻辑。
|
||||
* 只发布 `FetchCompanyDataCommand`。
|
||||
* **Report Generator**:
|
||||
* **不再监听** `StartAnalysisCommand` (作为触发源)。
|
||||
* 改为监听 `CompanyDataPersistedEvent`。
|
||||
* 收到事件后,检查事件中的 `request_id` 是否关联了待处理的分析任务(或者简单的:收到数据更新就检查是否有待跑的分析模板)。
|
||||
* *临时方案*: 为了简化,可以在 API Gateway 发送 Fetch 命令时,在 payload 里带上 `trigger_analysis: true` 和 `template_id`。Data Provider 在发出的 `PersistedEvent` 里透传这些字段。Report Generator 看到 `trigger_analysis: true` 才执行。
|
||||
|
||||
### 3.4. API 修复 (Fixing Visibility)
|
||||
* **Backend (API Gateway)**:
|
||||
* 在 `create_v1_router` 中新增路由:
|
||||
* `GET /v1/market-data/financial-statements/{symbol}` -> 转发至 Data Persistence Service。
|
||||
* `GET /v1/market-data/quotes/{symbol}` -> 转发至 Data Persistence Service (可选)。
|
||||
* **Frontend (Next.js API Route)**:
|
||||
* 修改 `frontend/src/app/api/financials/[...slug]/route.ts`。
|
||||
* 将请求路径从 `${BACKEND_BASE}/market-data/...` 修正为 `${BACKEND_BASE}/v1/market-data/...` (匹配 Gateway 新路由)。
|
||||
* 或者直接修正为 Data Persistence Service 的正确路径 (但最佳实践是走 Gateway)。
|
||||
|
||||
## 4. 执行计划 (Action Items)
|
||||
|
||||
### Phase 1: API & Frontend 可见性修复 (立即执行)
|
||||
1. [x] **API Gateway**: 添加 `/v1/market-data/financial-statements/{symbol}` 路由。
|
||||
2. [x] **Frontend**: 修正 `route.ts` 中的后端请求路径。(通过修正 Gateway 路由适配前端)
|
||||
3. [ ] **验证**: 打开页面,应能看到(哪怕是旧的)财务图表数据,不再显示 404/无数据。
|
||||
|
||||
### Phase 2: 缓存与时效性逻辑 (核心)
|
||||
4. [x] **Data Providers**: 在 `worker.rs` 中实现 TTL 检查逻辑 (Profile: 30d, Financials: 24h)。(YFinance 已实现,其他 Provider 已适配事件)
|
||||
5. [x] **Persistence Service**: 确保 `get_company_profile` 和 `get_financials` 返回 `updated_at` 字段(如果还没有的话)。
|
||||
|
||||
### Phase 3: 事件驱动工作流 (解决 Race Condition)
|
||||
6. [x] **Contracts**: 定义新事件 `CompanyDataPersistedEvent` (含 `trigger_analysis` 标记)。
|
||||
7. [x] **API Gateway**: 停止直接发送 Analysis 命令,将其参数打包进 Fetch 命令。
|
||||
8. [x] **Data Providers**: 完成任务后发布 `PersistedEvent`。
|
||||
9. [x] **Report Generator**: 监听 `PersistedEvent` 触发分析。
|
||||
|
||||
## 5. 待确认
|
||||
* 是否需要为每个数据源单独设置 TTL?(暂定统一策略)
|
||||
* 前端是否需要显示数据的"上次更新时间"?(建议加上,增强用户信任)
|
||||
@ -1,193 +0,0 @@
|
||||
# 前端报告页面重构设计文档 (Frontend Refactoring Design Doc)
|
||||
|
||||
**日期**: 2025-11-19
|
||||
**状态**: 待评审 (Draft)
|
||||
**目标**: 重构 `app/report/[symbol]` 页面,消除历史技术债务,严格对齐 V2 后端微服务架构。
|
||||
|
||||
## 1. 核心原则
|
||||
|
||||
1. **单一数据源 (SSOT)**: 前端不再维护任务进度、依赖关系或倒计时。所有状态严格来自后端 API (`/api/tasks/{id}`, `/api/analysis-results`).
|
||||
2. **无隐式逻辑 (No Implicit Logic)**: 严格按照用户选择的 Template ID 渲染,后端未返回的数据即视为不存在,不进行客户端推断或 Fallback。
|
||||
3. **真·流式传输 (True Streaming)**: 废弃数据库轮询方案。采用 **Server-Sent Events (SSE)** 技术。
|
||||
* 后端在内存中维护 `tokio::sync::broadcast` 通道。
|
||||
* LLM 生成的 Token 实时推送到通道,直达前端。
|
||||
* 数据库只负责存储**最终完成**的分析结果 (Persistence),不参与流式传输过程。
|
||||
|
||||
## 2. 页面布局设计
|
||||
|
||||
页面采用“固定框架 + 动态内容”的布局模式。
|
||||
|
||||
```text
|
||||
+-----------------------------------------------------------------------+
|
||||
| [Header Area] |
|
||||
| Symbol: AAPL | Market: US | Price: $230.5 (Snapshot) | [Status Badge]|
|
||||
| Control: [ Template Select Dropdown [v] ] [ Trigger Analysis Button ]|
|
||||
+-----------------------------------------------------------------------+
|
||||
| |
|
||||
| [ Tab Navigation Bar ] |
|
||||
| +-----------+ +--------------+ +------------+ +------------+ +-----+ |
|
||||
| | 股价图表 | | 基本面数据 | | 分析模块A | | 分析模块B | | ... | |
|
||||
| +-----------+ +--------------+ +------------+ +------------+ +-----+ |
|
||||
| | |
|
||||
+-----------------------------------------------------------------------+
|
||||
| [ Main Content Area ] |
|
||||
| |
|
||||
| (Content changes based on selected Tab) |
|
||||
| |
|
||||
| SCENARIO 1: Stock Chart Tab |
|
||||
| +-------------------------------------------------+ |
|
||||
| | [ PLACEHOLDER: TradingView / K-Line Chart ] | |
|
||||
| | (Future: Connect to Time-Series DB) | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
| SCENARIO 2: Fundamental Data Tab |
|
||||
| +-------------------------------------------------+ |
|
||||
| | Status: Waiting for Providers (2/3)... | |
|
||||
| | --------------------------------------------- | |
|
||||
| | [Tushare]: OK (JSON/Table Dump) | |
|
||||
| | [Finnhub]: OK (JSON/Table Dump) | |
|
||||
| | [AlphaV ]: Pending... | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
| SCENARIO 3: Analysis Module Tab (e.g., Valuation) |
|
||||
| +-------------------------------------------------+ |
|
||||
| | [Markdown Renderer] | |
|
||||
| | ## Valuation Analysis | |
|
||||
| | Based on the PE ratio of 30... | |
|
||||
| | (Streaming Cursor) _ | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
+-----------------------------------------------------------------------+
|
||||
| [ Execution Details Footer / Tab ] |
|
||||
| Total Time: 12s | Tokens: 4050 | Cost: $0.02 |
|
||||
+-----------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
## 3. 数据流与状态机
|
||||
|
||||
### 3.1 固定 Tab 定义
|
||||
无论选择何种模板,以下 Tab 始终存在(Fixed Tabs):
|
||||
|
||||
1. **股价图表 (Stock Chart)**
|
||||
* **数据源**: 独立的实时行情 API / 时间序列数据库。
|
||||
* **当前实现**: 占位符 (Placeholder)。
|
||||
2. **基本面数据 (Fundamental Data)**
|
||||
* **定义**: 所有已启用的 Data Providers 返回的原始数据聚合。
|
||||
* **状态逻辑**:
|
||||
* 此 Tab 代表“数据准备阶段”。
|
||||
* 必须等待后端 `FetchCompanyDataCommand` 对应的 Task 状态为 Completed/Partial/Failed。
|
||||
* UI 展示所有 Provider 的回执。只有当所有 Provider 都有定论(成功或失败),此阶段才算结束。
|
||||
* **作为后续分析的“门控”**: 此阶段未完成前,后续分析 Tab 处于“等待中”状态。
|
||||
3. **执行详情 (Execution Details)**
|
||||
* **定义**: 工作流的元数据汇总。
|
||||
* **内容**: 耗时统计、Token 消耗、API 调用清单。
|
||||
|
||||
### 3.2 动态 Tab 定义 (Analysis Modules)
|
||||
* **来源**: 根据当前选中的 `Template ID` 从后端获取 `AnalysisTemplateConfig`。
|
||||
* **生成逻辑**:
|
||||
* Template 中定义了 Modules: `[Module A, Module B, Module C]`.
|
||||
* 前端直接映射为 Tab A, Tab B, Tab C。
|
||||
* **渲染**:
|
||||
* **Loading**: 后端 `AnalysisResult` 状态为 `processing`。
|
||||
* **Streaming**: 通过 SSE (`/api/analysis-results/stream`) 接收增量内容。
|
||||
* **Done**: 后端流结束,或直接从 DB 读取完整内容。
|
||||
|
||||
### 3.3 状态机 (useReportEngine Hook)
|
||||
|
||||
我们将废弃旧的 Hook,实现一个纯粹的 `useReportEngine`。
|
||||
|
||||
```typescript
|
||||
interface ReportState {
|
||||
// 1. 配置上下文
|
||||
symbol: string;
|
||||
templateId: string;
|
||||
templateConfig: AnalysisTemplateSet | null; // 用于生成动态 Tab
|
||||
|
||||
// 2. 阶段状态
|
||||
fetchStatus: 'idle' | 'fetching' | 'complete' | 'error'; // 基本面数据阶段
|
||||
analysisStatus: 'idle' | 'running' | 'complete'; // 分析阶段
|
||||
|
||||
// 3. 数据持有
|
||||
fundamentalData: any[]; // 来自各个 Provider 的原始数据
|
||||
analysisResults: Record<string, AnalysisResultDto>; // Key: ModuleID
|
||||
|
||||
// 4. 进度
|
||||
executionMeta: {
|
||||
startTime: number;
|
||||
elapsed: number;
|
||||
tokens: number;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 交互流程
|
||||
|
||||
1. **初始化**:
|
||||
* 用户进入页面 -> 加载 `api/configs/analysis_template_sets` -> 填充下拉框。
|
||||
* 如果 URL 或历史数据中有 `template_id`,自动选中。
|
||||
|
||||
2. **触发 (Trigger)**:
|
||||
* 用户点击“开始分析”。
|
||||
* 前端 POST `/api/data-requests` (payload: `{ symbol, template_id }`)。
|
||||
* **前端重置所有动态 Tab 内容为空**。
|
||||
* 进入 `fetchStatus: fetching`。
|
||||
|
||||
3. **阶段一:基本面数据获取**:
|
||||
* 前端轮询 `/api/tasks/{request_id}`。
|
||||
* **基本面 Tab** 高亮/显示 Spinner。
|
||||
* 展示各个 Provider 的子任务进度。
|
||||
* 当 Task 状态 = Completed -> 进入阶段二。
|
||||
|
||||
4. **阶段二:流式分析 (SSE)**:
|
||||
* 前端建立 EventSource 连接 `/api/analysis-results/stream?request_id={id}`。
|
||||
* **智能切换 Tab**: (可选) 当某个 Module 开始生成 (收到 SSE 事件 `module_start`) 时,UI 可以自动切换到该 Tab。
|
||||
* **渲染**: 收到 `content` 事件,追加到对应 Module 的内容中。
|
||||
* **持久化**: 只有当 SSE 收到 `DONE` 事件时,后端才保证数据已落库。
|
||||
|
||||
5. **完成**:
|
||||
* SSE 连接关闭。
|
||||
* 状态转为 `complete`。
|
||||
|
||||
## 5. 架构设计 (Architecture Design)
|
||||
|
||||
为了实现真流式传输,后端架构调整如下:
|
||||
|
||||
1. **内存状态管理 (In-Memory State)**:
|
||||
* `AppState` 中增加 `stream_manager: StreamManager`。
|
||||
* `StreamManager` 维护 `HashMap<RequestId, BroadcastSender<StreamEvent>>`。
|
||||
* 这消除了对数据库的中间状态写入压力。
|
||||
2. **Worker 职责**:
|
||||
* Worker 执行 LLM 请求。
|
||||
* 收到 Token -> 写入 `BroadcastSender` (Fire and forget)。
|
||||
* 同时将 Token 累积在内存 Buffer 中。
|
||||
* 生成结束 -> 将完整 Buffer 写入数据库 (PostgreSQL) -> 广播 `ModuleDone` 事件。
|
||||
3. **API 职责**:
|
||||
* `GET /stream`:
|
||||
* 检查内存中是否有对应的 `BroadcastSender`?
|
||||
* **有**: 建立 SSE 连接,订阅并转发事件。
|
||||
* **无**: 检查数据库是否已完成?
|
||||
* **已完成**: 一次性返回完整内容 (模拟 SSE 或直接返回 JSON)。
|
||||
* **未开始/不存在**: 返回 404 或等待。
|
||||
|
||||
## 6. 迁移计划 (Action Items)
|
||||
|
||||
### 6.1 清理与归档 (Cleanup)
|
||||
- [x] 创建 `frontend/archive/v1_report` 目录。
|
||||
- [x] 移动 `app/report/[symbol]/components` 下的旧组件(`ExecutionDetails.tsx`, `TaskStatus.tsx`, `ReportHeader.tsx`, `AnalysisContent.tsx`)到 archive。
|
||||
- [x] 移动 `app/report/[symbol]/hooks` 下的 `useAnalysisRunner.ts` 和 `useReportData.ts` 到 archive。
|
||||
|
||||
### 6.2 核心构建 (Core Scaffolding)
|
||||
- [x] 创建 `hooks/useReportEngine.ts`: 实现上述状态机,严格对接后端 API。
|
||||
- [x] 创建 `components/ReportLayout.tsx`: 实现新的布局框架(Header + Tabs + Content)。
|
||||
- [x] 创建 `components/RawDataViewer.tsx`: 用于展示基本面原始数据(JSON View)。
|
||||
- [x] 创建 `components/AnalysisViewer.tsx`: 用于展示分析结果(Markdown Streaming)。
|
||||
|
||||
### 6.3 页面集成 (Integration)
|
||||
- [x] 重写 `app/report/[symbol]/page.tsx`: 引入 `useReportEngine` 和新组件。
|
||||
- [ ] 验证全流程:Trigger -> Task Fetching -> Analysis Streaming -> Finish。
|
||||
|
||||
### 6.4 后端重构 (Backend Refactoring) - NEW
|
||||
- [x] **State Upgrade**: 更新 `AppState` 引入 `tokio::sync::broadcast` 用于流式广播。
|
||||
- [x] **Worker Update**: 修改 `run_report_generation_workflow`,不再生成完才写库,也不中间写库,而是**中间发广播,最后写库**。
|
||||
- [x] **API Update**: 新增 `GET /api/analysis-results/stream` (SSE Endpoint),对接广播通道。
|
||||
- [x] **Frontend Update**: 修改 `useReportEngine.ts`,将轮询 `analysis-results` 改为 `EventSource` 连接。
|
||||
@ -1,148 +0,0 @@
|
||||
# 供应商隔离的数据新鲜度与缓存设计方案
|
||||
|
||||
## 1. 背景 (Background)
|
||||
|
||||
当前系统使用 `company_profiles` 表中的全局 `updated_at` 时间戳来判断某个股票的数据是否“新鲜”(例如:过去 24 小时内更新过)。
|
||||
|
||||
**现有问题:**
|
||||
这种方法在多供应商(Multi-Provider)环境中会导致严重的竞态条件(Race Condition):
|
||||
1. **Tushare**(A股数据源)通常响应较快,获取数据并更新了 `company_profiles` 表的 `updated_at`。
|
||||
2. `updated_at` 时间戳被更新为 `NOW()`。
|
||||
3. **YFinance** 或 **AlphaVantage**(全球数据源)稍后启动任务。
|
||||
4. 它们检查 `company_profiles` 表,发现 `updated_at` 非常新,因此错误地认为**自己的**数据也是最新的。
|
||||
5. 结果:YFinance/AlphaVantage 跳过执行,导致这些特定字段的数据为空或陈旧。
|
||||
|
||||
## 2. 目标 (Objective)
|
||||
|
||||
实现一个**供应商隔离的缓存机制**,允许每个数据供应商(Tushare, YFinance, AlphaVantage, Finnhub)能够:
|
||||
1. 独立追踪其最后一次成功更新数据的时间。
|
||||
2. 仅根据**自己的**数据新鲜度来决定是否执行任务。
|
||||
3. 避免干扰其他供应商的执行逻辑。
|
||||
|
||||
## 3. 设计原则 (Design Principles)
|
||||
|
||||
1. **不新增数据表**:利用数据库现有的文档-关系混合特性(Document-Relational)。具体来说,使用 `company_profiles` 表中的 `additional_info` (JSONB) 字段。
|
||||
2. **服务层抽象**:解析和管理这些元数据的复杂性应封装在 `Data Persistence Service` 内部,向各 Provider Service 暴露简洁的 API。
|
||||
3. **并发安全**:确保不同供应商的并发更新不会覆盖彼此的元数据状态。
|
||||
|
||||
## 4. 数据结构设计 (Data Structure Design)
|
||||
|
||||
我们将利用现有的 `company_profiles.additional_info` 字段(类型:`JSONB`)来存储一个供应商状态字典。
|
||||
|
||||
### `additional_info` JSON Schema 设计
|
||||
|
||||
```json
|
||||
{
|
||||
"provider_status": {
|
||||
"tushare": {
|
||||
"last_updated": "2025-11-19T10:00:00Z",
|
||||
"data_version": "v1",
|
||||
"status": "success"
|
||||
},
|
||||
"yfinance": {
|
||||
"last_updated": "2025-11-18T09:30:00Z",
|
||||
"status": "success"
|
||||
},
|
||||
"alphavantage": {
|
||||
"last_updated": "2025-11-15T14:00:00Z",
|
||||
"status": "partial_success" // 例如:触发了速率限制
|
||||
}
|
||||
},
|
||||
"other_metadata": "..." // 保留其他现有元数据
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 实施计划 (Implementation Plan)
|
||||
|
||||
### 5.1. 数据持久化服务更新 (Data Persistence Service)
|
||||
|
||||
我们需要扩展 `PersistenceClient` 及其底层 API,以支持细粒度的元数据更新。
|
||||
|
||||
**新增/更新 API 端点:**
|
||||
|
||||
1. **`PUT /companies/{symbol}/providers/{provider_id}/status`** (新增)
|
||||
* **目的**:原子更新特定供应商的状态,无需读取/写入完整的 profile。
|
||||
* **实现**:使用 Postgres 的 `jsonb_set` 函数,直接更新 JSON 路径 `['provider_status', provider_id]`。
|
||||
* **Payload**:
|
||||
```json
|
||||
{
|
||||
"last_updated": "2025-11-19T12:00:00Z",
|
||||
"status": "success"
|
||||
}
|
||||
```
|
||||
|
||||
2. **`GET /companies/{symbol}/providers/{provider_id}/status`** (新增)
|
||||
* **目的**:辅助接口,用于获取特定供应商的当前缓存状态。
|
||||
|
||||
### 5.2. 供应商服务工作流更新 (Provider Service)
|
||||
|
||||
每个 Provider Service(例如 `yfinance-provider-service`)将修改其 `worker.rs` 中的逻辑:
|
||||
|
||||
**现有逻辑(有缺陷):**
|
||||
```rust
|
||||
let profile = client.get_company_profile(symbol).await?;
|
||||
if profile.updated_at > 24h_ago { return; } // 全局检查
|
||||
```
|
||||
|
||||
**新逻辑:**
|
||||
```rust
|
||||
// 1. 检查 Provider 专属缓存
|
||||
let status = client.get_provider_status(symbol, "yfinance").await?;
|
||||
if let Some(s) = status {
|
||||
if s.last_updated > 24h_ago {
|
||||
info!("YFinance 数据较新,跳过执行。");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 获取并持久化数据
|
||||
// ... fetch ...
|
||||
client.upsert_company_profile(profile).await?; // 更新基本信息
|
||||
client.batch_insert_financials(financials).await?;
|
||||
|
||||
// 3. 更新 Provider 状态
|
||||
client.update_provider_status(symbol, "yfinance", ProviderStatus {
|
||||
last_updated: Utc::now(),
|
||||
status: "success"
|
||||
}).await?;
|
||||
```
|
||||
|
||||
## 6. 风险管理与迁移 (Risk Management & Migration)
|
||||
|
||||
* **竞态条件 (Race Conditions)**:通过在数据库层使用 `jsonb_set` 进行部分更新,我们避免了“读-改-写”的竞态条件,确保 Provider A 的更新不会覆盖 Provider B 同时写入的状态。
|
||||
* **数据迁移 (Migration)**:
|
||||
* **策略**:**Lazy Migration (懒迁移)**。
|
||||
* 现有数据中没有 `provider_status` 字段。代码将优雅地处理 `null` 或缺失键的情况(将其视为“陈旧/从未运行”,触发重新获取)。
|
||||
* **无需**编写专门的 SQL 迁移脚本去清洗历史数据。旧数据会随着新的抓取任务运行而自动补充上状态信息。
|
||||
* 如果必须清理,可以直接执行 `UPDATE company_profiles SET additional_info = additional_info - 'provider_status';` 来重置所有缓存状态。
|
||||
|
||||
## 7. 实施清单 (Implementation Checklist)
|
||||
|
||||
- [x] **Phase 1: Common Contracts & DTOs**
|
||||
- [x] 在 `services/common-contracts/src/dtos.rs` 中定义 `ProviderStatusDto`.
|
||||
|
||||
- [x] **Phase 2: Data Persistence Service API**
|
||||
- [x] 实现 DB 层逻辑: `get_provider_status` (读取 JSONB).
|
||||
- [x] 实现 DB 层逻辑: `update_provider_status` (使用 `jsonb_set`).
|
||||
- [x] 添加 API Handler: `GET /companies/{symbol}/providers/{provider_id}/status`.
|
||||
- [x] 添加 API Handler: `PUT /companies/{symbol}/providers/{provider_id}/status`.
|
||||
- [x] 注册路由并测试接口.
|
||||
|
||||
- [x] **Phase 3: Client Logic Update**
|
||||
- [x] 更新各服务中的 `PersistenceClient` (如 `services/yfinance-provider-service/src/persistence.rs` 等),增加 `get_provider_status` 和 `update_provider_status` 方法.
|
||||
|
||||
- [x] **Phase 4: Provider Services Integration**
|
||||
- [x] **Tushare Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **YFinance Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **AlphaVantage Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **Finnhub Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
|
||||
- [ ] **Phase 5: Verification (验证)**
|
||||
- [ ] 运行 `scripts/test_data_fetch.py` 验证全流程.
|
||||
- [ ] 验证不同 Provider 的状态互不干扰.
|
||||
|
||||
- [ ] **Phase 6: Caching Logic Abstraction (缓存逻辑抽象 - 智能客户端)**
|
||||
- [ ] 将 `PersistenceClient` 迁移至 `services/common-contracts/src/persistence_client.rs`(或新建 `service-sdk` 库),消除重复代码。
|
||||
- [ ] 在共享客户端中实现高层方法 `should_fetch_data(symbol, provider, ttl)`。
|
||||
- [ ] 重构所有 Provider Service 以使用共享的 `PersistenceClient`。
|
||||
- [ ] 验证所有 Provider 的缓存逻辑是否一致且无需手动实现。
|
||||
@ -1,128 +0,0 @@
|
||||
# 报告生成优化与 UI 状态反馈改进设计文档
|
||||
|
||||
**状态**: Draft
|
||||
**日期**: 2025-11-19
|
||||
**涉及模块**: Report Generator Service (Backend), Frontend (UI)
|
||||
|
||||
## 1. 背景与问题分析
|
||||
|
||||
当前系统的报告生成流程存在两个主要痛点,导致用户体验不佳且生成内容质量低下:
|
||||
|
||||
1. **数据注入缺失 (Data Injection Gap)**:
|
||||
* 后端在执行 Prompt 渲染时,`financial_data` 被硬编码为 `"..."`。
|
||||
* 大模型(LLM)缺乏上下文输入,导致输出“幻觉”内容(如自我介绍、复读指令)或通用废话。
|
||||
* 依赖链条虽然在拓扑排序上是正确的,但由于上游(如“基本面分析”)输出无效内容,下游(如“最终结论”)的输入也随之失效。
|
||||
|
||||
2. **UI 状态反馈缺失 (UI/UX Gap)**:
|
||||
* 前端仅有简单的“有数据/无数据”判断。
|
||||
* 点击“重新生成”时,UI 往往显示旧的缓存数据,缺乏“生成中”或“进度更新”的实时反馈。
|
||||
* 用户无法区分“旧报告”和“正在生成的新报告”。
|
||||
|
||||
## 2. 后端优化设计 (Report Generator Service)
|
||||
|
||||
### 2.1 数据注入逻辑修复 (Fixing Financial Data Injection)
|
||||
|
||||
我们将把当前的“基本面数据获取”视为一个**内置的基础工具(Native Tool)**。
|
||||
|
||||
* **当前逻辑**: 直接透传数据库 Raw Data。
|
||||
* **改进逻辑**: 在 `worker.rs` 中实现一个数据格式化器,将 `Vec<TimeSeriesFinancialDto>` 转换为 LLM 易读的 Markdown 表格或结构化文本。
|
||||
|
||||
**实现细节**:
|
||||
1. **格式化函数**: 实现 `format_financials_to_markdown(financials: &[TimeSeriesFinancialDto]) -> String`。
|
||||
* 按年份/季度降序排列。
|
||||
* 提取关键指标(营收、净利润、ROE、毛利率等)。
|
||||
* 生成 Markdown Table。
|
||||
2. **注入 Context**:
|
||||
* 在 `Tera` 模板渲染前,调用上述函数。
|
||||
* 替换占位符: `context.insert("financial_data", &formatted_data);`。
|
||||
3. **上游依赖注入 (保持不变)**:
|
||||
* 继续保留现有的 `generated_results` 注入逻辑,确保上游模块(如 `market_analysis`)的输出能正确传递给下游(如 `final_conclusion`)。
|
||||
|
||||
### 2.2 执行状态管理 (Execution Status Management)
|
||||
|
||||
为了支持前端的“实时状态”,后端需要能够区分“排队中”、“生成中”和“已完成”。
|
||||
|
||||
* **现状**: 只有生成完成后才写入 `analysis_results` 表。
|
||||
* **改进**: 引入任务状态流转。
|
||||
|
||||
**方案 A (基于数据库 - 推荐 MVP)**:
|
||||
利用现有的 `analysis_results` 表或新建 `analysis_tasks` 表。
|
||||
1. **任务开始时**:
|
||||
* Worker 开始处理某个 `module_id` 时,立即写入/更新一条记录。
|
||||
* `status`: `PROCESSING`
|
||||
* `content`: 空或 "Analysis in progress..."
|
||||
2. **任务完成时**:
|
||||
* 更新记录。
|
||||
* `status`: `COMPLETED`
|
||||
* `content`: 实际生成的 Markdown。
|
||||
3. **任务失败时**:
|
||||
* `status`: `FAILED`
|
||||
* `content`: 错误信息。
|
||||
|
||||
### 2.3 未来扩展性:工具模块 (Future Tool Module)
|
||||
|
||||
* 当前设计中,`financial_data` 是硬编码注入的。
|
||||
* **未来规划**: 在 Prompt 模板配置中,增加 `tools` 字段。
|
||||
```json
|
||||
"tools": ["financial_aggregator", "news_search", "calculator"]
|
||||
```
|
||||
* Worker 在渲染 Prompt 前,先解析 `tools` 配置,并行执行对应的工具函数(如 Python 数据清洗脚本),获取输出后注入 Context。当前修复的 `financial_data` 本质上就是 `financial_aggregator` 工具的默认实现。
|
||||
|
||||
## 3. 前端优化设计 (Frontend)
|
||||
|
||||
### 3.1 状态感知与交互
|
||||
|
||||
**目标**: 让用户清晰感知到“正在生成”。
|
||||
|
||||
1. **重新生成按钮行为**:
|
||||
* 点击“重新生成”后,**立即**将当前模块的 UI 状态置为 `GENERATING`。
|
||||
* **视觉反馈**:
|
||||
* 方案一(简单):清空旧内容,显示 Skeleton(骨架屏)+ 进度条/Spinner。
|
||||
* 方案二(平滑):保留旧内容,但在上方覆盖一层半透明遮罩,并显示“正在更新分析...”。(推荐方案二,避免内容跳动)。
|
||||
|
||||
2. **状态轮询 (Polling)**:
|
||||
* 由于后端暂未实现 SSE (Server-Sent Events),前端需采用轮询机制。
|
||||
* 当状态为 `GENERATING` 时,每隔 2-3 秒调用一次 API 检查该 `module_id` 的状态。
|
||||
* 当后端返回状态变更为 `COMPLETED` 时,停止轮询,刷新显示内容。
|
||||
|
||||
### 3.2 组件结构调整
|
||||
|
||||
修改 `AnalysisContent.tsx` 组件:
|
||||
|
||||
```typescript
|
||||
interface AnalysisState {
|
||||
status: 'idle' | 'loading' | 'success' | 'error';
|
||||
data: string | null; // Markdown content
|
||||
isStale: boolean; // 标记当前显示的是否为旧缓存
|
||||
}
|
||||
```
|
||||
|
||||
* **Idle**: 初始状态。
|
||||
* **Loading**: 点击生成后,显示加载动画。
|
||||
* **Success**: 获取到新数据。
|
||||
* **IsStale**: 点击重新生成瞬间,将 `isStale` 设为 true。UI 上可以给旧文本加灰色滤镜,直到新数据到来。
|
||||
|
||||
## 4. 实施计划 (Action Plan)
|
||||
|
||||
### Phase 1: 后端数据修正 (Backend Core)
|
||||
- [ ] 修改 `services/report-generator-service/src/worker.rs`。
|
||||
- [ ] 实现 `format_financial_data` 辅助函数。
|
||||
- [ ] 将格式化后的数据注入 Tera Context。
|
||||
- [ ] 验证大模型输出不再包含“幻觉”文本。
|
||||
|
||||
### Phase 2: 后端状态透出 (Backend API)
|
||||
- [ ] 确认 `NewAnalysisResult` 或相关 DTO 是否支持状态字段。
|
||||
- [ ] 在 Worker 开始处理模块时,写入 `PROCESSING` 状态到数据库。
|
||||
- [ ] 确保 API 查询接口能返回 `status` 字段。
|
||||
|
||||
### Phase 3: 前端体验升级 (Frontend UI)
|
||||
- [ ] 修改 `AnalysisContent.tsx`,增加对 `status` 字段的处理。
|
||||
- [ ] 实现“重新生成”时的 UI 遮罩或 Loading 状态,不再单纯依赖 `useQuery` 的缓存。
|
||||
- [ ] 优化 Markdown 渲染区的用户体验。
|
||||
|
||||
## 5. 验收标准 (Acceptance Criteria)
|
||||
|
||||
1. **内容质量**: 市场分析、基本面分析报告中包含具体的财务数字(如营收、利润),且引用正确,不再出现“请提供数据”的字样。
|
||||
2. **流程闭环**: 点击“重新生成”,UI 显示加载状态 -> 后端处理 -> UI 自动刷新为新内容。
|
||||
3. **无闪烁**: 页面不会因为轮询而频繁闪烁,状态切换平滑。
|
||||
|
||||
@ -1,225 +0,0 @@
|
||||
# 架构重构设计文档:引入 Workflow Orchestrator
|
||||
|
||||
## 1. 背景与目标
|
||||
当前系统存在 `api-gateway` 职责过载、业务逻辑分散、状态机隐式且脆弱、前后端状态不同步等核心问题。为了彻底解决这些架构痛点,本设计提出引入 **Workflow Orchestrator Service**,作为系统的“大脑”,负责集中管理业务流程、状态流转与事件协调。
|
||||
|
||||
### 核心目标
|
||||
1. **解耦 (Decoupling)**: 将业务协调逻辑从 `api-gateway` 剥离,Gateway 回归纯粹的流量入口和连接管理职责。
|
||||
2. **状态一致性 (Consistency)**: 建立单一事实来源 (Single Source of Truth),所有业务状态由 Orchestrator 统一维护并广播。
|
||||
3. **细粒度任务编排 (Fine-Grained Orchestration)**: 废除粗粒度的“阶段”概念,转向基于 DAG (有向无环图) 的任务编排。后端只负责执行任务和广播每个任务的状态,前端根据任务状态自由决定呈现逻辑。
|
||||
|
||||
## 2. 架构全景图 (Architecture Overview)
|
||||
|
||||
### 2.1 服务角色重定义
|
||||
|
||||
| 服务 | 现有职责 | **新职责** |
|
||||
| :--- | :--- | :--- |
|
||||
| **API Gateway** | 路由, 鉴权, 注册发现, 业务聚合, 流程触发 | 路由, 鉴权, 注册发现, **SSE/WS 代理 (Frontend Proxy)** |
|
||||
| **Workflow Orchestrator** | *(新服务)* | **DAG 调度**, **任务依赖管理**, **事件广播**, **状态快照** |
|
||||
| **Data Providers** | 数据抓取, 存库, 发 NATS 消息 | (保持不变) 接收指令 -> 干活 -> 发结果事件 |
|
||||
| **Report Generator** | 报告生成, 发 NATS 消息 | (保持不变) 接收指令 -> 干活 -> 发进度/结果事件 |
|
||||
| **Data Processors** | *(新服务类型)* | **数据清洗/转换** (接收上下文 -> 转换 -> 更新上下文) |
|
||||
|
||||
### 2.2 数据流向 (Data Flow)
|
||||
|
||||
1. **启动**: 前端 -> Gateway (`POST /start`) -> **Orchestrator** (NATS: `StartWorkflow`)
|
||||
2. **调度**: **Orchestrator** 解析模板构建 DAG -> NATS: 触发无依赖的 Tasks (如 Data Fetching)
|
||||
3. **反馈**: Executors (Providers/ReportGen/Processors) -> NATS: `TaskCompleted` -> **Orchestrator**
|
||||
4. **流转**: **Orchestrator** 检查依赖 -> NATS: 触发下一层 Tasks
|
||||
5. **广播**: **Orchestrator** -> NATS: `WorkflowEvent` (Task Status Updates) -> Gateway -> 前端 (SSE)
|
||||
|
||||
## 3. 接口与协议定义 (Contracts & Schemas)
|
||||
|
||||
需在 `services/common-contracts` 中进行以下调整:
|
||||
|
||||
### 3.1 新增 Commands (NATS Subject: `workflow.commands.*`)
|
||||
|
||||
```rust
|
||||
// Topic: workflow.commands.start
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct StartWorkflowCommand {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: CanonicalSymbol,
|
||||
pub market: String,
|
||||
pub template_id: String,
|
||||
}
|
||||
|
||||
// 新增:用于手动请求状态对齐 (Reconnect Scenario)
|
||||
// Topic: workflow.commands.sync_state
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct SyncStateCommand {
|
||||
pub request_id: Uuid,
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 新增 Events (NATS Subject: `events.workflow.{request_id}`)
|
||||
|
||||
这是前端唯一需要订阅的流。
|
||||
|
||||
```rust
|
||||
// Topic: events.workflow.{request_id}
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "type", content = "payload")]
|
||||
pub enum WorkflowEvent {
|
||||
// 1. 流程初始化 (携带完整的任务依赖图)
|
||||
WorkflowStarted {
|
||||
timestamp: i64,
|
||||
// 定义所有任务及其依赖关系,前端可据此绘制流程图或进度条
|
||||
task_graph: WorkflowDag
|
||||
},
|
||||
|
||||
// 2. 任务状态变更 (核心事件)
|
||||
TaskStateChanged {
|
||||
task_id: String, // e.g., "fetch:tushare", "process:clean_financials", "module:swot_analysis"
|
||||
task_type: TaskType, // DataFetch | DataProcessing | Analysis
|
||||
status: TaskStatus, // Pending, Scheduled, Running, Completed, Failed, Skipped
|
||||
message: Option<String>,
|
||||
timestamp: i64
|
||||
},
|
||||
|
||||
// 3. 任务流式输出 (用于 LLM 打字机效果)
|
||||
TaskStreamUpdate {
|
||||
task_id: String,
|
||||
content_delta: String,
|
||||
index: u32
|
||||
},
|
||||
|
||||
// 4. 流程整体结束
|
||||
WorkflowCompleted {
|
||||
result_summary: serde_json::Value,
|
||||
end_timestamp: i64
|
||||
},
|
||||
|
||||
WorkflowFailed {
|
||||
reason: String,
|
||||
is_fatal: bool,
|
||||
end_timestamp: i64
|
||||
},
|
||||
|
||||
// 5. 状态快照 (用于重连/丢包恢复)
|
||||
// 当前端重连或显式发送 SyncStateCommand 时,Orchestrator 发送此事件
|
||||
WorkflowStateSnapshot {
|
||||
timestamp: i64,
|
||||
task_graph: WorkflowDag,
|
||||
tasks_status: HashMap<String, TaskStatus>, // 当前所有任务的最新状态
|
||||
tasks_output: HashMap<String, Option<String>> // (可选) 已完成任务的关键输出摘要
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct WorkflowDag {
|
||||
pub nodes: Vec<TaskNode>,
|
||||
pub edges: Vec<TaskDependency> // from -> to
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TaskNode {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub type: TaskType,
|
||||
pub initial_status: TaskStatus
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq)]
|
||||
pub enum TaskType {
|
||||
DataFetch, // 创造原始上下文
|
||||
DataProcessing, // 消耗并转换上下文 (New)
|
||||
Analysis // 读取上下文生成新内容
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq)]
|
||||
pub enum TaskStatus {
|
||||
Pending, // 等待依赖
|
||||
Scheduled, // 依赖满足,已下发给 Worker
|
||||
Running, // Worker 正在执行
|
||||
Completed, // 执行成功
|
||||
Failed, // 执行失败
|
||||
Skipped // 因上游失败或策略原因被跳过
|
||||
}
|
||||
```
|
||||
|
||||
### 3.3 调整现有 Messages
|
||||
|
||||
* **`FetchCompanyDataCommand`**: Publisher 变更为 `Workflow Orchestrator`。
|
||||
* **`GenerateReportCommand`**: Publisher 变更为 `Workflow Orchestrator`。
|
||||
|
||||
## 4. Workflow Orchestrator 内部设计
|
||||
|
||||
### 4.1 DAG 调度器 (DAG Scheduler)
|
||||
每个 `request_id` 对应一个 DAG 实例。
|
||||
|
||||
1. **初始化**: 根据 `TemplateID` 读取配置。
|
||||
* 创建 Data Fetch Tasks (作为 DAG 的 Root Nodes)。
|
||||
* 创建 Analysis Module Tasks (根据 `dependencies` 配置连接边)。
|
||||
2. **依赖检查**:
|
||||
* 监听 Task 状态变更。
|
||||
* 当 Task A 变成 `Completed` -> 检查依赖 A 的 Task B。
|
||||
* 如果 Task B 的所有依赖都 `Completed` -> 触发 Task B。
|
||||
* 如果 Task A `Failed` -> 将依赖 A 的 Task B 标记为 `Skipped` (除非有容错策略)。
|
||||
|
||||
### 4.2 状态对齐机制 (State Alignment / Snapshot)
|
||||
为了解决前端刷新或网络丢包导致的状态不一致:
|
||||
|
||||
1. **主动推送快照 (On Connect)**:
|
||||
* Gateway 在前端建立 SSE 连接时,向 Orchestrator 发送 `SyncStateCommand`。
|
||||
* Orchestrator 收到命令后,将当前内存中的完整 DAG 状态打包成 `WorkflowStateSnapshot` 事件发送。
|
||||
2. **前端合并逻辑**:
|
||||
* 前端收到 Snapshot 后,全量替换本地的任务状态树。
|
||||
* 如果 Snapshot 显示某任务 `Running`,前端恢复 Loading 动画。
|
||||
* 如果 Snapshot 显示某任务 `Completed`,前端渲染结果。
|
||||
|
||||
### 4.3 容错策略 (Policy)
|
||||
Orchestrator 需要内置策略来处理非二元结果。
|
||||
* **Data Fetch Policy**: 并非所有 Data Fetch 必须成功。可以配置 "At least one data source" 策略。如果满足策略,Orchestrator 将下游的 Analysis Task 依赖视为满足。
|
||||
|
||||
## 5. 实施步骤 (Implementation Checklist)
|
||||
|
||||
### Phase 1: Contract & Interface
|
||||
- [x] **Update common-contracts**:
|
||||
- [x] Add `StartWorkflowCommand` and `SyncStateCommand`.
|
||||
- [x] Add `WorkflowEvent` enum (incl. Started, StateChanged, StreamUpdate, Completed, Failed, Snapshot).
|
||||
- [x] Add `WorkflowDag`, `TaskNode`, `TaskType`, `TaskStatus` structs.
|
||||
- [x] Update publishers for `FetchCompanyDataCommand` and `GenerateReportCommand`.
|
||||
- [x] Bump version and publish crate.
|
||||
|
||||
### Phase 2: Workflow Orchestrator Service (New)
|
||||
- [x] **Scaffold Service**:
|
||||
- [x] Create new Rust service `services/workflow-orchestrator-service`.
|
||||
- [x] Setup `Dockerfile`, `Cargo.toml`, and `main.rs`.
|
||||
- [x] Implement NATS connection and multi-topic subscription.
|
||||
- [x] **Core Logic - State Machine**:
|
||||
- [x] Implement `WorkflowState` struct (InMemory + Redis/DB persistence optional for MVP).
|
||||
- [x] Implement `DagScheduler`: Logic to parse template and build dependency graph.
|
||||
- [x] **Core Logic - Handlers**:
|
||||
- [x] Handle `StartWorkflowCommand`: Init DAG, fire initial tasks.
|
||||
- [x] Handle `TaskCompleted` events (from Providers/ReportGen): Update DAG, trigger next tasks.
|
||||
- [x] Handle `SyncStateCommand`: Serialize current state and emit `WorkflowStateSnapshot`.
|
||||
- [x] **Policy Engine**:
|
||||
- [x] Implement "At least one provider" policy for data fetching.
|
||||
|
||||
### Phase 3: API Gateway Refactoring
|
||||
- [x] **Remove Legacy Logic**:
|
||||
- [x] Delete `aggregator.rs` completely.
|
||||
- [x] Remove `trigger_data_fetch` aggregation logic.
|
||||
- [x] Remove `/api/tasks` polling endpoint.
|
||||
- [x] **Implement Proxy Logic**:
|
||||
- [x] Add `POST /api/v2/workflow/start` -> Publishes `StartWorkflowCommand`.
|
||||
- [x] Add `GET /api/v2/workflow/events/{id}` -> Subscribes to NATS, sends `SyncStateCommand` on open, proxies events to SSE.
|
||||
|
||||
### Phase 4: Integration & Frontend
|
||||
- [x] **Docker Compose**: Add `workflow-orchestrator-service` to stack.
|
||||
- [x] **Frontend Adapter**:
|
||||
- [x] **Type Definitions**: Define `WorkflowEvent`, `WorkflowDag`, `TaskStatus` in `src/types/workflow.ts`.
|
||||
- [x] **API Proxy**: Implement Next.js Route Handlers for `POST /workflow/start` and `GET /workflow/events/{id}` (SSE).
|
||||
- [x] **Core Logic (`useWorkflow`)**:
|
||||
- [x] Implement SSE connection management with auto-reconnect.
|
||||
- [x] Handle `WorkflowStarted`, `TaskStreamUpdate`, `WorkflowCompleted`.
|
||||
- [x] Implement state restoration via `WorkflowStateSnapshot`.
|
||||
- [x] **UI Components**:
|
||||
- [x] `WorkflowVisualizer`: Task list and status tracking.
|
||||
- [x] `TaskOutputViewer`: Markdown-rendered stream output.
|
||||
- [x] `WorkflowReportLayout`: Integrated analysis page layout.
|
||||
- [x] **Page Integration**: Refactor `app/report/[symbol]/page.tsx` to use the new workflow engine.
|
||||
|
||||
---
|
||||
*Updated: 2025-11-20 - Added Implementation Checklist*
|
||||
@ -1,175 +0,0 @@
|
||||
# 架构修订:基于会话的数据快照与分层存储 (Session-Based Data Snapshotting)
|
||||
|
||||
## 1. 核心理念修订 (Core Philosophy Refinement)
|
||||
|
||||
基于您的反馈,我们修正了架构的核心逻辑,将数据明确划分为两类,并采取不同的存储策略。
|
||||
|
||||
### 1.1 数据分类 (Data Classification)
|
||||
|
||||
1. **客观历史数据 (Objective History / Time-Series)**
|
||||
* **定义**: 股价、成交量、K线图等交易数据。
|
||||
* **特性**: "出现即历史",不可篡改,全球唯一。
|
||||
* **存储策略**: **全局共享存储**。不需要按 Session 隔离,不需要存多份。
|
||||
* **表**: 现有的 `daily_market_data` (TimescaleDB) 保持不变。
|
||||
|
||||
2. **观测型数据 (Observational Data / Fundamentals)**
|
||||
* **定义**: 财务报表、公司简介、以及 Provider 返回的原始非结构化或半结构化信息。
|
||||
* **特性**: 不同来源(Providers)说法不一;可能随时间修正(Restatement);分析依赖于“当时”获取的版本。
|
||||
* **存储策略**: **基于 Session 的快照存储**。每一次 Session 都必须保存一份当时获取的原始数据的完整副本。
|
||||
* **表**: 新增 `session_raw_data` 表。
|
||||
|
||||
### 1.2 解决的问题
|
||||
* **会话隔离**: 新的 Session 拥有自己独立的一套基础面数据,不受历史 Session 干扰,也不污染未来 Session。
|
||||
* **历史回溯**: 即使 Provider 变了,查看历史 Report 时,依然能看到当时是基于什么数据得出的结论。
|
||||
* **数据清洗解耦**: 我们现在只负责“收集并快照”,不负责“清洗和聚合”。复杂的清洗逻辑(WASM/AI)留待后续模块处理。
|
||||
|
||||
---
|
||||
|
||||
## 2. 数据库架构设计 (Schema Design)
|
||||
|
||||
### 2.1 新增:会话原始数据表 (`session_raw_data`)
|
||||
|
||||
这是本次架构调整的核心。我们不再试图把财务数据强行塞进一个全局唯一的标准表,而是忠实记录每个 Provider 在该 Session 中返回的内容。
|
||||
|
||||
```sql
|
||||
CREATE TABLE session_raw_data (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
request_id UUID NOT NULL, -- 关联的 Session ID
|
||||
symbol VARCHAR(32) NOT NULL,
|
||||
provider VARCHAR(64) NOT NULL, -- e.g., 'tushare', 'alphavantage'
|
||||
data_type VARCHAR(32) NOT NULL, -- e.g., 'financial_statements', 'company_profile'
|
||||
|
||||
-- 核心:直接存储 Provider 返回的(或稍微标准化的)完整 JSON
|
||||
data_payload JSONB NOT NULL,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
-- 索引:为了快速查询某次 Session 的数据
|
||||
CONSTRAINT fk_request_id FOREIGN KEY (request_id) REFERENCES requests(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX idx_session_data_req ON session_raw_data(request_id);
|
||||
```
|
||||
|
||||
### 2.2 新增:供应商缓存表 (`provider_response_cache`)
|
||||
|
||||
为了优化性能和节省 API 调用次数,我们在全局层引入缓存。但请注意:**缓存仅作为读取源,不作为 Session 的存储地。**
|
||||
|
||||
```sql
|
||||
CREATE TABLE provider_response_cache (
|
||||
cache_key VARCHAR(255) PRIMARY KEY, -- e.g., "tushare:AAPL:financials"
|
||||
data_payload JSONB NOT NULL,
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL
|
||||
);
|
||||
```
|
||||
|
||||
### 2.3 保持不变:市场数据表 (`daily_market_data`)
|
||||
* 继续使用 TimescaleDB 存储 `open`, `high`, `low`, `close`, `volume`。
|
||||
* 所有 Session 共享读取此表。
|
||||
|
||||
---
|
||||
|
||||
## 3. 数据流转逻辑 (Data Lifecycle)
|
||||
|
||||
### Phase 1: Session 启动与数据获取 (Acquisition)
|
||||
|
||||
1. **Start**: API Gateway 生成 `request_id`。
|
||||
2. **Fetch & Cache Logic (在 Provider Service 中执行)**:
|
||||
* Provider 收到任务 (Symbol: AAPL)。
|
||||
* **Check Cache**: 查询 `provider_response_cache`。
|
||||
* *Hit*: 拿出现成的 JSON。
|
||||
* *Miss*: 调用外部 API,获得 JSON,写入 Cache (设置过期时间如 24h)。
|
||||
3. **Snapshot (关键步骤)**:
|
||||
* Provider 将拿到的 JSON (无论来自 Cache 还是 API),作为一条**新记录**写入 `session_raw_data`。
|
||||
* 字段: `request_id=UUID`, `provider=tushare`, `data=JSON`。
|
||||
|
||||
### Phase 2: 展示与分析 (Consumption)
|
||||
|
||||
1. **Frontend Raw View (UI)**:
|
||||
* 前端调用 `GET /api/v1/session/{request_id}/raw-data`。
|
||||
* 后端 `SELECT * FROM session_raw_data WHERE request_id = ...`。
|
||||
* UI 依然可以使用之前的 Accordion 结构,展示 "Tushare: Financials", "AlphaVantage: Profile"。这就是用户看到的“本次调查的原始底稿”。
|
||||
|
||||
2. **Analysis (LLM)**:
|
||||
* Report Generator 获取 `request_id` 对应的所有 raw data。
|
||||
* 将这些 Raw Data 作为 Context 喂给 LLM。
|
||||
* (未来扩展): 在这一步之前,插入一个 "Data Cleaning Agent/Wasm",读取 raw data,输出 clean data,再喂给 LLM。
|
||||
|
||||
### Phase 3: 归档与清理 (Cleanup)
|
||||
|
||||
* **Session Deletion**: 当我们需要清理某个历史 Session 时,只需 `DELETE FROM session_raw_data WHERE request_id = ...`。
|
||||
* **副作用**: 零。因为 `daily_market_data` 是共享的(留着也没事),而 Session 独享的 `raw_data` 被彻底删除了。
|
||||
|
||||
---
|
||||
|
||||
## 4. 实施路线图 (Implementation Roadmap)
|
||||
|
||||
1. **Database Migration**:
|
||||
* 创建 `session_raw_data` 表。
|
||||
* 创建 `provider_response_cache` 表。
|
||||
* (清理旧表): 废弃 `time_series_financials` 表(原计划用于存标准化的财务指标,现在确认不需要。我们只存 `session_raw_data` 中的原始基本面数据,财务报表由原始数据动态推导)。
|
||||
* **保留** `daily_market_data` 表(存储股价、K线等客观时间序列数据,保持全局共享)。
|
||||
|
||||
2. **Provider Services**:
|
||||
* 引入 Cache 检查逻辑。
|
||||
* 修改输出逻辑:不再尝试 Upsert 全局表,而是 Insert `session_raw_data`。
|
||||
|
||||
3. **Frontend Refactor**:
|
||||
* 修改 `RawDataViewer` 的数据源,从读取“最后一次更新”改为读取“当前 Session 的 Raw Data”。
|
||||
* 这完美解决了“刷新页面看到旧数据”的问题——如果是一个新 Session ID,它的 `session_raw_data` 一开始是空的,UI 就会显示为空/Loading,直到新的 Snapshot 写入。
|
||||
|
||||
4. **Future Extensibility (Aggregation)**:
|
||||
* 当前架构下,Frontend 直接展示 Raw Data。
|
||||
* 未来:新增 `DataProcessorService`。它监听 "Data Fetched" 事件,读取 `session_raw_data`,执行聚合逻辑,将结果写入 `session_clean_data` (假想表),供 UI 显示“完美报表”。
|
||||
|
||||
---
|
||||
|
||||
## 5. Step-by-Step Task List
|
||||
|
||||
### Phase 1: Data Persistence Service & Database (Foundation)
|
||||
- [x] **Task 1.1**: Create new SQL migration file.
|
||||
- Define `session_raw_data` table (Columns: `id`, `request_id`, `symbol`, `provider`, `data_type`, `data_payload`, `created_at`).
|
||||
- Define `provider_response_cache` table (Columns: `cache_key`, `data_payload`, `updated_at`, `expires_at`).
|
||||
- (Optional) Rename `time_series_financials` to `_deprecated_time_series_financials` to prevent accidental usage.
|
||||
- [x] **Task 1.2**: Run SQL migration (`sqlx migrate run`).
|
||||
- [x] **Task 1.3**: Implement `db/session_data.rs` in Data Persistence Service.
|
||||
- Function: `insert_session_data(pool, request_id, provider, data_type, payload)`.
|
||||
- Function: `get_session_data(pool, request_id)`.
|
||||
- [x] **Task 1.4**: Implement `db/provider_cache.rs` in Data Persistence Service.
|
||||
- Function: `get_cache(pool, key) -> Option<Payload>`.
|
||||
- Function: `set_cache(pool, key, payload, ttl)`.
|
||||
- [x] **Task 1.5**: Expose new API endpoints in `api/`.
|
||||
- `POST /api/v1/session-data` (Internal use by Providers).
|
||||
- `GET /api/v1/session-data/:request_id` (Used by ReportGen & Frontend).
|
||||
- `GET/POST /api/v1/provider-cache` (Internal use by Providers).
|
||||
|
||||
### Phase 2: Common Contracts & SDK (Glue Code)
|
||||
- [x] **Task 2.1**: Update `common-contracts`.
|
||||
- Add DTOs for `SessionData` and `CacheEntry`.
|
||||
- Update `PersistenceClient` struct to include methods for calling new endpoints (`save_session_data`, `get_cache`, `set_cache`).
|
||||
|
||||
### Phase 3: Provider Services (Logic Update)
|
||||
- [x] **Task 3.1**: Refactor `tushare-provider-service`.
|
||||
- Update Worker to check Cache first.
|
||||
- On Cache Miss: Call Tushare API -> Save to Cache.
|
||||
- **Final Step**: Post data to `POST /api/v1/session-data` (instead of old batch insert).
|
||||
- Ensure `request_id` is propagated correctly.
|
||||
- [x] **Task 3.2**: Refactor `alphavantage-provider-service` (same logic).
|
||||
- [x] **Task 3.3**: Refactor `yfinance-provider-service` (same logic).
|
||||
- [x] **Task 3.4**: Verify `FinancialsPersistedEvent` is still emitted (or similar event) to trigger Gateway aggregation.
|
||||
|
||||
### Phase 4: API Gateway & Report Generator (Consumption)
|
||||
- [x] **Task 4.1**: Update `api-gateway` routing.
|
||||
- Proxy `GET /api/v1/session-data/:request_id` for Frontend.
|
||||
- [x] **Task 4.2**: Update `report-generator-service`.
|
||||
- In `worker.rs`, change data fetching logic.
|
||||
- Instead of `get_financials_by_symbol`, call `get_session_data(request_id)`.
|
||||
- Pass the raw JSON list to the LLM Context Builder.
|
||||
|
||||
### Phase 5: Frontend (UI Update)
|
||||
- [x] **Task 5.1**: Update `useReportEngine.ts`.
|
||||
- Change polling/fetching logic to request `GET /api/v1/session-data/${requestId}`.
|
||||
- [x] **Task 5.2**: Update `RawDataViewer.tsx`.
|
||||
- Adapt to new data structure (List of `{ provider, data_type, payload }`).
|
||||
- Ensure the UI correctly groups these raw snapshots by Provider.
|
||||
@ -1,110 +0,0 @@
|
||||
# 动态服务注册与发现机制设计方案 (Dynamic Service Registration & Discovery Proposal)
|
||||
|
||||
## 1. 问题陈述 (Problem Statement)
|
||||
目前的 **API Gateway** 依赖于静态配置(环境变量中的 `provider_services` 映射表)来获知可用的数据提供商服务 (Data Provider Services)。
|
||||
* **脆弱性 (Brittleness)**: 增加或迁移 Provider 需要修改 Gateway 配置并重启。
|
||||
* **缺乏健康感知 (Lack of Health Awareness)**: Gateway 会盲目地尝试连接配置的 URL。如果某个服务挂了(但配置还在),请求会遭遇超时或连接错误。
|
||||
* **运维复杂 (Operational Complexity)**: 手动管理 URL 既机械又容易出错。
|
||||
|
||||
## 2. 解决方案:动态注册系统 (Dynamic Registration System)
|
||||
我们将实施**服务注册 (Service Registry)** 模式,由 API Gateway 充当注册中心。
|
||||
|
||||
### 2.1. "注册" 生命周期
|
||||
1. **启动 (Startup)**: 当一个 Provider Service (例如 Tushare) 启动时,它向 API Gateway 发送 `POST /v1/registry/register` 请求。
|
||||
* 载荷包括:服务 ID、基础 URL、能力标识(如 "tushare")。
|
||||
2. **存活心跳 (Liveness/Heartbeat)**: Provider Service 运行一个后台任务,每隔 **N 秒** (建议 **10秒**) 发送一次 `POST /v1/registry/heartbeat`。
|
||||
* **注意**: 由于我们主要在本地容器网络运行,网络开销极低,我们可以使用较短的心跳周期(如 10秒)来实现快速的故障检测。
|
||||
3. **发现 (Discovery)**: API Gateway 在内存中维护活跃服务列表。
|
||||
* 如果超过 **2 * N 秒** (如 20秒) 未收到心跳,该服务将被标记为“不健康”或被移除。
|
||||
4. **关闭 (Shutdown)**: 在优雅退出 (Graceful Shutdown, SIGTERM/SIGINT) 时,Provider 发送 `POST /v1/registry/deregister`。
|
||||
|
||||
### 2.2. 架构变更
|
||||
|
||||
#### A. 共享契约 (`common-contracts`)
|
||||
定义注册所需的数据结构。
|
||||
|
||||
```rust
|
||||
// services/common-contracts/src/registry.rs
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct ServiceRegistration {
|
||||
pub service_id: String, // 唯一ID, 例如 "tushare-provider-1"
|
||||
pub service_name: String, // 类型, 例如 "tushare"
|
||||
pub base_url: String, // 例如 "http://10.0.1.5:8000"
|
||||
pub health_check_url: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Heartbeat {
|
||||
pub service_id: String,
|
||||
pub status: ServiceStatus, // Active, Degraded
|
||||
}
|
||||
```
|
||||
|
||||
#### B. API Gateway (`api-gateway`)
|
||||
* **新组件**: `ServiceRegistry` (带 TTL 的线程安全 Map)。
|
||||
* **新接口**:
|
||||
* `POST /v1/registry/register`: 添加/更新条目。
|
||||
* `POST /v1/registry/heartbeat`: 刷新 TTL。
|
||||
* `POST /v1/registry/deregister`: 移除条目。
|
||||
* **逻辑变更**: `get_task_progress` 和 `trigger_data_fetch` 将不再读取静态配置,而是查询动态的 `ServiceRegistry`。
|
||||
|
||||
#### C. Provider Services (`*-provider-service`)
|
||||
我们需要一个统一的机制来处理这个生命周期。
|
||||
建议在 `common-contracts` 中引入一个标准的生命周期处理模块。
|
||||
|
||||
**建议的 Trait / 辅助结构体:**
|
||||
|
||||
```rust
|
||||
// services/common-contracts/src/lifecycle.rs (New)
|
||||
|
||||
pub struct ServiceRegistrar {
|
||||
gateway_url: String,
|
||||
registration: ServiceRegistration,
|
||||
// ...
|
||||
}
|
||||
|
||||
impl ServiceRegistrar {
|
||||
/// 注册服务 (重试直到成功)
|
||||
pub async fn register(&self) -> Result<()>;
|
||||
/// 启动后台心跳循环 (10s 间隔)
|
||||
pub async fn start_heartbeat_loop(&self);
|
||||
/// 注销服务
|
||||
pub async fn deregister(&self) -> Result<()>;
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 实施计划 (TODO List)
|
||||
|
||||
### Phase 1: 基础建设 (Infrastructure)
|
||||
* [ ] **Task 1.1 (Contracts)**: 在 `services/common-contracts` 中创建 `registry.rs`,定义 `ServiceRegistration` 和 `Heartbeat` 结构体。
|
||||
* [ ] **Task 1.2 (Library)**: 在 `services/common-contracts` 中实现 `ServiceRegistrar` 逻辑。
|
||||
* 包含重试机制的 `register`。
|
||||
* 包含 `tokio::time::interval` (10s) 的 `start_heartbeat_loop`。
|
||||
* 确保能从环境变量 (如 `API_GATEWAY_URL`) 获取 Gateway 地址。
|
||||
* [ ] **Task 1.3 (Gateway Core)**: 在 `api-gateway` 中实现 `ServiceRegistry` 状态管理(使用 `Arc<RwLock<HashMap<...>>>`)。
|
||||
* [ ] **Task 1.4 (Gateway API)**: 在 `api-gateway` 中添加 `/v1/registry/*` 路由并挂载 Handler。
|
||||
|
||||
### Phase 2: Provider 改造 (Provider Migration)
|
||||
*由于所有 Provider 架构一致,以下步骤需在 `tushare`, `finnhub`, `alphavantage`, `yfinance` 四个服务中重复执行:*
|
||||
|
||||
* [ ] **Task 2.1 (Config)**: 更新 `AppConfig`,增加 `gateway_url` 配置项。
|
||||
* [ ] **Task 2.2 (Main Loop)**: 修改 `main.rs`。
|
||||
* 初始化 `ServiceRegistrar`。
|
||||
* 在 HTTP Server 启动前(或同时)调用 `registrar.register().await`。
|
||||
* 使用 `tokio::spawn` 启动 `registrar.start_heartbeat_loop()`。
|
||||
* [ ] **Task 2.3 (Shutdown)**: 添加 Graceful Shutdown 钩子,确保在收到 Ctrl+C 时调用 `registrar.deregister()`。
|
||||
|
||||
### Phase 3: 消费端适配 (Gateway Consumption)
|
||||
* [ ] **Task 3.1**: 修改 `api-gateway` 的 `test_data_source_config`,不再查 Config,改为查 Registry。
|
||||
* [ ] **Task 3.2**: 修改 `api-gateway` 的 `trigger_data_fetch`,根据 `service_name` (如 "tushare") 从 Registry 查找可用的 `base_url`。
|
||||
* 如果找到多个同名服务,可以做简单的 Load Balance(轮询)。
|
||||
* [ ] **Task 3.3**: 修改 `api-gateway` 的 `get_task_progress`,遍历 Registry 中的所有服务来聚合状态。
|
||||
|
||||
### Phase 4: 清理 (Cleanup)
|
||||
* [ ] **Task 4.1**: 移除 `api-gateway` 中关于 `provider_services` 的静态配置代码和环境变量。
|
||||
|
||||
## 4. 预期收益
|
||||
* **即插即用 (Plug-and-Play)**: 启动一个新的 Provider 实例,它会自动出现在系统中。
|
||||
* **自愈 (Self-Healing)**: 如果 Provider 崩溃,它会从注册表中消失(TTL 过期),Gateway 不会再向其发送请求,避免了无意义的等待和超时。
|
||||
* **零配置 (Zero-Config)**: 扩容或迁移 Provider 时无需修改 Gateway 环境变量。
|
||||
@ -1,45 +0,0 @@
|
||||
# 前端架构重构计划:状态管理与工作流控制权移交
|
||||
|
||||
## 1. 背景与现状
|
||||
当前的 `fundamental-analysis` 前端项目源自一个 POC (Proof of Concept) 原型。在快速迭代过程中,遗留了大量“为了跑通流程而写”的临时逻辑。核心问题在于**前端承担了过多的业务控制逻辑**,导致前后端状态不一致、错误处理困难、用户体验割裂。
|
||||
|
||||
### 核心痛点
|
||||
1. **“自嗨式”状态流转**:前端自行判断何时从“数据获取”切换到“分析报告”阶段(基于轮询结果推断),而非响应后端的明确指令。
|
||||
2. **脆弱的 Polling + SSE 混合模式**:前端先轮询 HTTP 接口查询进度,再断开连接 SSE 流。这两者之间存在状态断层,且严重依赖 HTTP 接口的实时性(而这个接口又是后端实时聚合下游得来的,极易超时)。
|
||||
3. **缺乏统一的状态源 (Source of Truth)**:前端维护了一套复杂的 `ReportState`,后端也有一套状态,两者通过不稳定的网络请求同步,经常出现“前端显示完成,后端还在跑”或“后端报错,前端还在转圈”的情况。
|
||||
|
||||
## 2. 重构目标
|
||||
**原则:前端归前端(UI展示),后端归后端(业务逻辑与流转控制)。**
|
||||
|
||||
1. **控制权移交**:所有涉及业务流程流转(Phase Transition)的逻辑,必须由后端通过事件或状态字段明确驱动。前端只负责渲染当前状态。
|
||||
2. **单一数据流 (Single Stream)**:废除“HTTP Polling -> SSE”的混合模式,建立统一的 WebSocket 或 SSE 通道。从发请求那一刻起,所有状态变更(包括数据获取进度、分析进度、报错)全由服务端推送。
|
||||
3. **简化状态机**:前端 `useReportEngine` 钩子应简化为单纯的“状态订阅者”,不再包含复杂的判断逻辑(如 `if (tasks.every(t => t.success)) switchPhase()`)。
|
||||
|
||||
## 3. 实施方案 (Tasks)
|
||||
|
||||
### Phase 1: 后端基础设施准备 (Backend Readiness)
|
||||
- [ ] **统一事件流接口**:在 `api-gateway` 实现一个统一的 SSE/WebSocket 端点(如 `/v2/workflow/events`)。
|
||||
- 该端点应聚合:`DataFetchProgress` (NATS), `WorkflowStart` (NATS), `ModuleProgress` (ReportGenerator), `WorkflowComplete`。
|
||||
- [ ] **Gateway 状态缓存**:`api-gateway` 需要维护一个轻量级的 Request 状态缓存(Redis 或 内存),不再实时透传查询请求给下游 Provider,而是直接返回缓存的最新状态。
|
||||
- [ ] **定义统一状态协议**:制定前后端通用的状态枚举(`PENDING`, `DATA_FETCHING`, `ANALYZING`, `COMPLETED`, `FAILED`)。
|
||||
|
||||
### Phase 2: 前端逻辑剥离 (Frontend Refactoring)
|
||||
- [ ] **废除 useReportEngine 里的推断逻辑**:删除所有 `useEffect` 里关于状态切换的 `if/else` 判断代码。
|
||||
- [ ] **实现 Event-Driven Hook**:重写 `useReportEngine`,使其核心逻辑变为:连接流 -> 收到事件 -> 更新 State。
|
||||
- 收到 `STATUS_CHANGED: DATA_FETCHING` -> 显示数据加载 UI。
|
||||
- 收到 `STATUS_CHANGED: ANALYZING` -> 自动切换到分析 UI(无需前端判断数据是否齐备)。
|
||||
- 收到 `ERROR` -> 显示错误 UI。
|
||||
- [ ] **清理旧代码**:移除对 `/api/tasks` 轮询的依赖代码。
|
||||
|
||||
### Phase 3: 验证与兜底
|
||||
- [ ] **断线重连机制**:实现 SSE/WS 的自动重连,并能从后端获取“当前快照”来恢复状态,防止刷新页面丢失进度。
|
||||
- [ ] **超时兜底**:仅保留最基本的网络超时提示(如“服务器连接中断”),不再处理业务逻辑超时。
|
||||
|
||||
## 4. 复杂度评估与建议
|
||||
- **复杂度**:中等偏高 (Medium-High)。涉及前后端协议变更和核心 Hook 重写。
|
||||
- **风险**:高。这是系统的心脏部位,重构期间可能会导致整个分析流程暂时不可用。
|
||||
- **建议**:**单独开一个线程(Branch/Session)进行**。不要在当前修复 Bug 的线程中混合进行。这需要系统性的设计和逐步替换,无法通过简单的 Patch 完成。
|
||||
|
||||
---
|
||||
*Created: 2025-11-20*
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user