chore: massive update covering recent refactoring and bug fixes

- fix: infinite message loop in workflow orchestrator
- feat: restore realtime LLM streaming from report generator to frontend
- refactor: major update to provider services (generic workers, workflow adapters)
- refactor: common contracts and message definitions updated
- feat: enhanced logging and observability in orchestrator
- docs: update project management tasks and status
- chore: dependency updates and config adjustments
This commit is contained in:
Lv, Qi 2025-11-30 19:17:02 +08:00
parent 5dc13fa735
commit e9e4d0c1b3
72 changed files with 9400 additions and 1524 deletions

211
Cargo.lock generated
View File

@ -40,6 +40,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"async-trait",
"axum",
"chrono",
"common-contracts",
@ -448,6 +449,50 @@ dependencies = [
"generic-array",
]
[[package]]
name = "bollard"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30"
dependencies = [
"base64 0.22.1",
"bollard-stubs",
"bytes",
"futures-core",
"futures-util",
"hex",
"http",
"http-body-util",
"hyper",
"hyper-named-pipe",
"hyper-util",
"hyperlocal",
"log",
"pin-project-lite",
"serde",
"serde_derive",
"serde_json",
"serde_repr",
"serde_urlencoded",
"thiserror 2.0.17",
"tokio",
"tokio-util",
"tower-service",
"url",
"winapi",
]
[[package]]
name = "bollard-stubs"
version = "1.47.1-rc.27.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da"
dependencies = [
"serde",
"serde_repr",
"serde_with 3.16.1",
]
[[package]]
name = "borsh"
version = "1.6.0"
@ -681,6 +726,19 @@ dependencies = [
"yaml-rust2",
]
[[package]]
name = "console"
version = "0.15.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys 0.59.0",
]
[[package]]
name = "const-oid"
version = "0.9.6"
@ -1152,6 +1210,12 @@ dependencies = [
"serde",
]
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -1161,6 +1225,28 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "end-to-end"
version = "0.1.0"
dependencies = [
"anyhow",
"bollard",
"chrono",
"common-contracts",
"console",
"eventsource-stream",
"futures",
"indicatif",
"rand 0.9.2",
"reqwest",
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-subscriber",
"uuid",
]
[[package]]
name = "equivalent"
version = "1.0.2"
@ -1245,6 +1331,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"async-nats",
"async-trait",
"axum",
"chrono",
"common-contracts",
@ -1699,6 +1786,21 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-named-pipe"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278"
dependencies = [
"hex",
"hyper",
"hyper-util",
"pin-project-lite",
"tokio",
"tower-service",
"winapi",
]
[[package]]
name = "hyper-rustls"
version = "0.27.7"
@ -1759,6 +1861,21 @@ dependencies = [
"windows-registry",
]
[[package]]
name = "hyperlocal"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7"
dependencies = [
"hex",
"http-body-util",
"hyper",
"hyper-util",
"pin-project-lite",
"tokio",
"tower-service",
]
[[package]]
name = "iana-time-zone"
version = "0.1.64"
@ -1930,6 +2047,19 @@ dependencies = [
"serde_core",
]
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width",
"web-time",
]
[[package]]
name = "instant"
version = "0.1.13"
@ -2358,6 +2488,12 @@ dependencies = [
"libm",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "oas"
version = "0.2.1"
@ -2367,7 +2503,7 @@ dependencies = [
"either",
"serde",
"serde_json",
"serde_with",
"serde_with 2.3.3",
]
[[package]]
@ -3109,7 +3245,7 @@ dependencies = [
"rand 0.9.2",
"reqwest",
"rmcp-macros",
"schemars",
"schemars 1.1.0",
"serde",
"serde_json",
"sse-stream",
@ -3376,6 +3512,18 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "schemars"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
dependencies = [
"dyn-clone",
"ref-cast",
"serde",
"serde_json",
]
[[package]]
name = "schemars"
version = "1.1.0"
@ -3610,6 +3758,24 @@ dependencies = [
"time",
]
[[package]]
name = "serde_with"
version = "3.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7"
dependencies = [
"base64 0.22.1",
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.12.1",
"schemars 0.9.0",
"schemars 1.1.0",
"serde_core",
"serde_json",
"time",
]
[[package]]
name = "serde_with_macros"
version = "2.3.3"
@ -3677,7 +3843,7 @@ dependencies = [
"quote",
"reqwest",
"rmcp",
"schemars",
"schemars 1.1.0",
"serde",
"serde_json",
"serde_urlencoded",
@ -3702,7 +3868,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"schemars",
"schemars 1.1.0",
"serde",
"serde_json",
"serde_urlencoded",
@ -4672,6 +4838,12 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"
@ -4947,6 +5119,22 @@ dependencies = [
"wasite",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.11"
@ -4956,6 +5144,12 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.62.2"
@ -5044,6 +5238,15 @@ dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.60.2"

View File

@ -12,6 +12,7 @@ members = [
"services/workflow-orchestrator-service",
"services/yfinance-provider-service",
"crates/workflow-context",
"tests/end-to-end",
]
[workspace.package]

5553
assets/tushare.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -98,7 +98,7 @@ services:
environment:
SERVER_PORT: 4000
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
@ -109,6 +109,8 @@ services:
condition: service_healthy
alphavantage-provider-service:
condition: service_started
mock-provider-service:
condition: service_started
tushare-provider-service:
condition: service_started
finnhub-provider-service:
@ -129,6 +131,38 @@ services:
- cargo-target:/app/target
- cargo-cache:/usr/local/cargo
mock-provider-service:
build:
context: .
dockerfile: docker/Dockerfile.dev
container_name: mock-provider-service
working_dir: /app/services/mock-provider-service
command: ["cargo", "watch", "-x", "run"]
volumes:
- workflow_data:/mnt/workflow_data
- ./:/app
- cargo-target:/app/target
- cargo-cache:/usr/local/cargo
environment:
SERVER_PORT: 8006
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
API_GATEWAY_URL: http://api-gateway:4000
WORKFLOW_DATA_PATH: /mnt/workflow_data
SERVICE_HOST: mock-provider-service
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
depends_on:
- nats
- data-persistence-service
networks:
- app-network
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://localhost:8006/health >/dev/null || exit 1"]
interval: 5s
timeout: 5s
retries: 12
alphavantage-provider-service:
build:
context: .
@ -144,7 +178,7 @@ services:
environment:
SERVER_PORT: 8000
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
API_GATEWAY_URL: http://api-gateway:4000
WORKFLOW_DATA_PATH: /mnt/workflow_data
SERVICE_HOST: alphavantage-provider-service
@ -176,7 +210,7 @@ services:
environment:
SERVER_PORT: 8001
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
TUSHARE_API_URL: http://api.waditu.com
API_GATEWAY_URL: http://api-gateway:4000
WORKFLOW_DATA_PATH: /mnt/workflow_data
@ -209,7 +243,7 @@ services:
environment:
SERVER_PORT: 8002
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
FINNHUB_API_URL: https://finnhub.io/api/v1
API_GATEWAY_URL: http://api-gateway:4000
WORKFLOW_DATA_PATH: /mnt/workflow_data
@ -242,7 +276,7 @@ services:
environment:
SERVER_PORT: 8003
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
API_GATEWAY_URL: http://api-gateway:4000
WORKFLOW_DATA_PATH: /mnt/workflow_data
SERVICE_HOST: yfinance-provider-service
@ -277,7 +311,7 @@ services:
environment:
SERVER_PORT: 8004
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
WORKFLOW_DATA_PATH: /mnt/workflow_data
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
@ -307,7 +341,7 @@ services:
environment:
SERVER_PORT: 8005
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
WORKFLOW_DATA_PATH: /mnt/workflow_data
RUST_LOG: info
RUST_BACKTRACE: "1"

View File

@ -0,0 +1,170 @@
# Tushare 报表呈现优化任务 (Report Presentation Refactor)
## 1. 背景与问题
目前的 Tushare 数据以 `metric_name` + `period_date` 的长表形式存储和展示。
问题:
- **可读性差**:无法直观对比同一时间点下的不同指标,也无法直观对比同一指标的历史趋势。
- **缺乏语义**:原始字段如 `c_pay_acq_const_fiolta` 对普通用户极其晦涩。
- **精度冗余**`1420023169.52` 这样的数字难以快速阅读。
## 2. 设计目标 (Design Goal)
生成一份结构化、人类可读的 Markdown 报告,必须**包含所有原始数据**,无损且清晰。
### 2.1 核心转换逻辑 (Pivot)
- **全量保留**:所有原始数据中的 `metric_name` 都必须展示,不可遗漏。
- **行 (Rows)**: 财务指标 (Metrics),需映射为中文名称,并进行逻辑分组(如利润表、资产负债表)。未匹配到字典的指标显示原始 Key。
- **列 (Columns)**: 报告期 (Period Date),按时间倒序排列。
- **值 (Values)**: 格式化后的数值 (e.g., 亿元、万元、百分比)。
### 2.2 展示结构 (Sectioning)
采用**“按年份分组”+“指标分组”**的嵌套结构,利用 Markdown 标题层级进行索引。
- **Data Provider 仅负责数据格式化,不涉及 LLM 摘要生成。**
- **纯数据展示**:不生成任何自然语言评论或摘要。
**结构示例:**
```markdown
# 600521.SS 财务数据明细
## 2025年度
### 关键指标
| 指标 | 2025-09-30 | ... |
| :--- | :--- | :--- |
| 总市值 | 14.20 亿 | ... |
### 资产负债表
...
## 2024年度
### 关键指标
| 指标 | 2024-12-31 | 2024-09-30 | 2024-06-30 | 2024-03-31 |
| :--- | :--- | :--- | :--- | :--- |
| ... | ... | ... | ... | ... |
### 资产负债表
...
```
*分段策略:按**年份**Year进行一级切分。每年包含该年所有已披露的报告期。这种方式索引最清晰且单表列数固定通常最多4列完美适配阅读。*
## 3. 实施步骤 (Implementation Plan)
### Step 1: 数据字典完善 (Data Dictionary)
- 完善 `docs/5_data_dictionary/` 下的定义。
- 建立 `metric_name` -> `Display Name (CN)` 的全量映射表。
### Step 2: 数据转换逻辑 (Transformation)
- **分组算法**:
- 解析 `period_date` 提取年份。
- 按年份将数据归类到不同的 `YearBlock`
- 年份倒序排列2025 -> 2024 -> ...)。
- 年份内部按日期倒序排列12-31 -> 09-30 ...)。
- **行分类算法**: 将 Metrics 分为 `Snapshot`, `Income`, `Balance`, `CashFlow`, `Ratios`, `Misc`
- **格式化**:
- 金额: 除以 10^8 保留 2 位小数 (e.g., "14.20 亿")。
- 比例: 乘 100 保留 2 位小数 (e.g., "15.30%").
- 人数/户数: 整数或“万”单位。
### Step 3: Markdown 渲染 (Rendering)
- 实现分层渲染:
1. Level 2 Title: `## 2024年度`
2. Level 3 Title: `### 资产负债表`
3. Table: 渲染该年份内的该类指标。
## 4. 详细设计与映射 (Detailed Design)
### 4.1 字段映射表 (Field Mapping Dictionary)
**A. 摘要与市场 (Snapshot & Market)**
| Metric Key | 中文显示 | 单位策略 |
| :--- | :--- | :--- |
| `total_mv` | 总市值 | 亿 (保留2位) |
| `employees` | 员工人数 | 整数 |
| `holder_num` | 股东户数 | 万 (保留2位) |
| `close` | 收盘价 | 原始值 |
| `pe` | 市盈率 | 原始值 |
| `pb` | 市净率 | 原始值 |
**B. 利润表 (Income Statement)**
| Metric Key | 中文显示 | 单位策略 |
| :--- | :--- | :--- |
| `revenue` | 营业收入 | 亿 |
| `n_income` | 净利润 | 亿 |
| `rd_exp` | 研发费用 | 亿 |
| `sell_exp` | 销售费用 | 亿 |
| `admin_exp` | 管理费用 | 亿 |
| `fin_exp` | 财务费用 | 亿 |
| `total_cogs` | 营业成本 | 亿 |
| `tax_to_ebt` | 实际税率 | % |
| `__tax_rate` | 所得税率(计算) | % |
**C. 资产负债表 (Balance Sheet)**
| Metric Key | 中文显示 | 单位策略 |
| :--- | :--- | :--- |
| `total_assets` | 总资产 | 亿 |
| `fix_assets` | 固定资产 | 亿 |
| `inventories` | 存货 | 亿 |
| `accounts_receiv`| 应收账款 | 亿 |
| `accounts_pay` | 应付账款 | 亿 |
| `prepayment` | 预付款项 | 亿 |
| `adv_receipts` | 预收款项 | 亿 |
| `contract_liab` | 合同负债 | 亿 |
| `money_cap` | 货币资金 | 亿 |
| `lt_eqt_invest` | 长期股权投资 | 亿 |
| `goodwill` | 商誉 | 亿 |
| `st_borr` | 短期借款 | 亿 |
| `lt_borr` | 长期借款 | 亿 |
**D. 现金流量表 (Cash Flow)**
| Metric Key | 中文显示 | 单位策略 |
| :--- | :--- | :--- |
| `n_cashflow_act` | 经营净现金流 | 亿 |
| `c_paid_to_for_empl` | 支付职工现金 | 亿 |
| `c_pay_acq_const_fiolta` | 购建资产支付 | 亿 |
| `dividend_amount`| 分红总额 | 亿 |
**E. 运营与比率 (Ratios)**
| Metric Key | 中文显示 | 单位策略 |
| :--- | :--- | :--- |
| `arturn_days` | 应收周转天数 | 天 |
| `invturn_days` | 存货周转天数 | 天 |
| `__gross_margin` | 毛利率 | % |
| `__net_margin` | 净利率 | % |
| `__money_cap_ratio` | 现金占比 | % |
| `__fix_assets_ratio` | 固定资产占比 | % |
| `__lt_invest_ratio` | 长投占比 | % |
| `__goodwill_ratio` | 商誉占比 | % |
| `__ar_ratio` | 应收占比 | % |
| `__ap_ratio` | 应付占比 | % |
| `__st_borr_ratio` | 短贷占比 | % |
| `__lt_borr_ratio` | 长贷占比 | % |
| `__rd_rate` | 研发费率 | % |
| `__sell_rate` | 销售费率 | % |
| `__admin_rate` | 管理费率 | % |
**F. 其他 (Misc)**
*未匹配上述任何字段的 Key将直接显示原始 Key并保留原始数值精度。*
### 4.2 Markdown 结构全貌 (Structure Preview)
```markdown
# Tushare 财务数据明细
## 2025年度
### 关键指标
| 指标 | 2025-09-30 | ... |
| :--- | :--- | :--- |
| 总市值 | 14.20 亿 | ... |
### 利润表
| 指标 | 2025-09-30 | ... |
| :--- | :--- | ... |
| 营业收入 | 64.09 亿 | ... |
... (资产负债、现金流) ...
## 2024年度
### 关键指标
...
```

View File

@ -0,0 +1,142 @@
# 高可靠任务调度协议设计 (High Reliability Task Scheduling Protocol)
**日期**: 2025-11-29
**状态**: Implemented (Testing Pending)
**优先级**: High
**背景**: 当前 Workflow 采用 "Fire-and-Forget" 模式,导致当 Provider 处于降级、网络分区或宕机状态时Orchestrator 无法感知,造成任务“假死”且无法自动恢复。
## 1. 核心设计理念
将 Orchestrator 从简单的“发令员”升级为“任务监理”,对任务进行全生命周期管理。引入 **握手 (Handshake)**、**心跳 (Heartbeat)** 和 **熔断 (Circuit Breaking)** 机制。
## 2. 协议流程图
### 2.1 正常调度流程 (Happy Path)
```mermaid
sequenceDiagram
participant O as Orchestrator
participant P as Provider (Worker)
O->>P: Dispatch Task (Request)
activate O
P->>O: Acknowledgement (Accepted)
deactivate O
Note over O: Task Status: Running
loop Execution
P->>P: Processing...
P->>O: Heartbeat (Progress Update)
Note over O: Reset Watchdog Timer
end
P->>O: TaskCompleted (Result)
Note over O: Task Status: Completed
```
### 2.2 拒绝调度流程 (Provider Degraded)
Provider 在降级模式下不再通过 Sleep 阻塞,而是保持连接并快速拒绝任务。
```mermaid
sequenceDiagram
participant O as Orchestrator
participant P as Provider (Degraded)
O->>P: Dispatch Task (Request)
activate O
P->>O: Acknowledgement (Rejected: "API Key Missing")
deactivate O
Note over O: Task Status: Failed/Skipped
O->>O: Trigger Failure Handling
```
### 2.3 调度超时 (Dispatch Timeout)
Provider 宕机或网络断连。
```mermaid
sequenceDiagram
participant O as Orchestrator
participant P as Provider (Dead)
O->>P: Dispatch Task (Request)
activate O
Note right of O: Wait 5s...
O->>O: Timeout Error
deactivate O
Note over O: Task Status: Failed (Dispatch Error)
```
### 2.4 执行超时/心跳丢失 (Execution Watchdog)
任务开始执行后Worker 意外死亡。
```mermaid
sequenceDiagram
participant O as Orchestrator (Monitor Loop)
participant P as Provider
P->>O: Heartbeat (T=0)
P->>P: Crash! 💥
loop Every 1s
O->>O: Check Active Tasks
end
Note over O: Now > LastHeartbeat + 30s
O->>O: Mark Task Failed (Zombie)
```
## 3. 实施任务清单 (Implementation Tasks)
### Phase 1: 协议定义 (Common Contracts)
- [x] **Define `TaskAcknowledgement` DTO**:
- `Accepted`
- `Rejected { reason: String }`
- [x] **Define `TaskHeartbeat`**:
- 复用 `WorkflowEvent::TaskStateChanged`,当 `status=Running` 时视为心跳。
### Phase 2: 调度器改造 (Orchestrator)
- [x] **Refactor `dispatch_task`**:
- 从 `nats.publish` 改为 `nats.request`
- 设置 strict timeout (e.g., 5s)。
- 处理 `Rejected` 响应,立即置为 Failed。
- [x] **Implement `TaskMonitor`**:
- 新增后台 Tokio Task。
- 维护 `RunningTasks` 列表,包含 `last_heartbeat_at``started_at`
- 逻辑:
- `if (now - started_at > max_timeout) -> Fail (Timeout)`
- `if (now - last_heartbeat_at > heartbeat_timeout) -> Fail (Zombie)`
### Phase 3: 提供者改造 (Provider Services)
`tushare-provider` 为首个改造对象,其他 Provider 后续跟进。
- [x] **Remove Blocking Sleep**:
- 即使在 `Degraded` 状态,也要连接 NATS。
- [x] **Implement Ack Logic**:
- 收到 Command 后,先检查自身状态。
- 如果 `Degraded`,回复 `Rejected`
- 如果正常,回复 `Accepted` 并开始异步执行。
- [x] **Implement Heartbeat**:
- 在长耗时操作(如 Fetch定期发送进度/心跳事件。
- 已在 `WorkflowNodeRunner` 层面统一实现。
### Phase 4: 验证与测试 (Validation)
- [ ] **Unit Tests (Contracts/Orchestrator)**:
- 验证 Ack 序列化/反序列化。
- 验证 TaskMonitor 的超时逻辑Mock 时间/Task
- [ ] **Component Test (Mock Provider)**:
- 创建一个 Mock Provider模拟
- 正常 ACK + 完成。
- 正常 ACK + 半路 Crash测试心跳丢失
- 拒绝 ACK (Degraded)。
- 根本不 ACK (Timeout)。
- 验证 Orchestrator 在上述情况下的状态流转是否正确。
## 4. 预期效果
1. **快速失败**: 系统不再因为配置错误(如缺 Key而 hang 住 5 分钟,而是毫秒级报错。
2. **自我修复**: 遇到网络波动导致的丢包Orchestrator 能感知并(未来)触发重试。
3. **可观测性**: 能够清晰区分 "调度失败"、"拒绝执行" 和 "执行超时" 三种错误模式。

View File

@ -0,0 +1,75 @@
# 任务Realtime Logs 数据流优化 (缓冲与回放) [已完成]
## 目标
解决前端 `Realtime Logs` 面板在页面刷新、重新连接或初始加载延迟时丢失日志的问题。
目标是确保 "First-hand" 服务端日志能够可靠地流向前端,不依赖 NATS 的临时性。
## 实施方案
### 1. 后端:增强 `SyncStateCommand` (Orchestrator)
我们需要修改 `handle_sync_state` 逻辑,使其在发送状态快照的同时,**也能读取当前的临时日志文件**,并将历史日志作为事件发送给前端。
* **修改 `workflow.rs` -> `handle_sync_state`**:
* 调用 `log_manager.read_current_logs(req_id)` (需要新增此方法,非破坏性读取)。
* 读取到的日志内容可能是巨大的字符串。为了不阻塞 NATS 消息,可以分块发送,或者作为 `WorkflowStateSnapshot` 的一部分发送(如果大小允许)。
* **方案选择**: 发送一个新的事件类型 `WorkflowLogHistory` 或者复用 `TaskLog`(批量发送)。
* 鉴于前端 `handleEvent` 处理 `TaskLog` 是追加式的,我们可以循环发送 `TaskLog` 事件。
* **更优方案**: 在 `WorkflowStateSnapshot` 结构体中增加 `logs: Vec<String>` 字段。这样前端在恢复快照时一次性填入。
### 2. 定义数据结构变更
* **`common-contracts/src/messages.rs`**:
* 修改 `WorkflowStateSnapshot`,增加 `logs: Vec<String>`
### 3. 完善 `LogBufferManager`
* **`logging.rs`**:
* 新增 `read_current_logs(&self, request_id: &str) -> Result<Vec<String>>`
* 读取文件,按行分割,返回 `Vec<String>`
### 4. 前端适配
* **`useWorkflowStore.ts`**:
* 在 `handleEvent` -> `WorkflowStateSnapshot` 分支中,处理 `event.payload.logs`
* 将这些日志合并到 `state.logs` (Global Logs) 或者解析后分发到 `state.tasks` (如果日志格式包含 Task ID)。
* 目前日志格式为 `[ISO Time] [Level] Message`,不一定包含 Task ID所以主要作为 Global Logs 展示。
### 5. 流程梳理
1. **前端启动/刷新**:
* 调用 `SSE /events/{id}`
* API Gateway 收到连接,订阅 NATS并发送 `SyncStateCommand` 给 Orchestrator。
2. **Orchestrator**:
* 收到 `SyncStateCommand`
* 生成 DAG 快照。
* **读取 `temp_logs/{id}.log`**。
* 构建 `WorkflowStateSnapshot` (包含 logs)。
* 发布到 NATS。
3. **前端接收**:
* 收到 Snapshot。
* 恢复 DAG 状态。
* 恢复 Logs 面板内容。
4. **后续实时日志**:
* Orchestrator 继续运行Tracing Layer 写入文件。
* **关键点**: 我们之前删除了 `publish_log`。现在需要恢复**实时推送**能力,但不是手动调用。
* **方案**: `FileRequestLogLayer` 除了写文件,还应该有一个机制将日志推送到 NATS 吗?
* **回答**: 是的。之前的重构把推送删了,导致前端**收不到实时更新**了。
* **修正**: `FileRequestLogLayer` 应该同时负责:
1. 写文件 (持久化缓冲)。
2. 推送到 NATS (实时展示)。
* **技术难点**: Layer 是同步的NATS 是异步的。
* **解决**: 使用 `tokio::sync::broadcast``mpsc` 通道。Layer 将日志发送到通道,有一个后台 Task 负责接收通道消息并推送到 NATS。
## 修正后的后端任务列表
1. **恢复实时推送通道**:
* 在 `AppState` 中增加一个 `log_broadcast_tx` (sender)。
* `FileRequestLogLayer` 持有这个 sender。
* 在 `main.rs` 启动一个后台任务,监听 receiver将日志封装为 `WorkflowEvent::TaskLog` 并推送到 NATS。
2. **实现历史回放 (Snapshot)**:
* 修改 `WorkflowStateSnapshot` 增加 `logs` 字段。
* `LogBufferManager` 增加读取方法。
* `handle_sync_state` 填充 logs。
## 前端任务列表
1. 更新 `WorkflowStateSnapshot` 类型定义。
2. 在 Store 中处理 Snapshot 携带的日志。
这个方案兼顾了实时性和可靠性(断线重连)。

View File

@ -0,0 +1,58 @@
# 任务:重构工作流日志 (基于文件缓冲的持久化方案) [已完成]
## 背景 (Context)
目前 `workflow-orchestrator-service` 通过 `publish_log` 手动向 NATS 发送日志这种方式既不规范也导致了“双重日志”问题Rust 标准日志 vs 前端 NATS 日志),且无法持久化保存。
我们需要一种方案,能自动捕获 Rust 标准日志(`tracing`将其暂存并在工作流结束时归档到全局上下文VGCS中。考虑到日志可能非常长为了避免内存溢出OOM和保证数据安全我们将采用**文件系统**作为临时缓冲区,而不是内存。
## 目标 (Objectives)
1. **清理 (Cleanup)**: 删除 `workflow-orchestrator-service` 中所有手动的 `publish_log``WorkflowEvent::TaskLog` 发送逻辑。
2. **捕获 (Capture)**: 实现一个自定义的 `tracing` Layer能够识别当前的 `request_id`,并将日志实时追加写入到磁盘上的临时文件中。
3. **持久化 (Persistence)**: 当工作流结束(无论成功或失败)时,读取对应的临时日志文件,将其存入 VGCS 仓库(`workflow.log`),然后清理临时文件。
## 实施方案 (Implementation Plan)
### 1. 日志管理器 (`LogBufferManager`)
创建一个新的结构体 `LogBufferManager`,用于管理临时日志文件。
* **目录**: 在服务根目录下创建 `temp_logs/` 目录。
* **路径策略**: 每个请求对应一个文件,例如 `temp_logs/{request_id}.log`
* **功能**:
* `append(request_id, message)`: 以追加模式打开(或创建)文件,写入日志行。为了性能,可以考虑持有活跃文件的句柄缓存(`DashMap<String, File>`或者简单地每次打开OS层面对追加写的缓存通常已经很好。考虑到并发量不大**每次 Open 追加**是最稳妥且无状态的,不容易出 Bug。
* `finalize(request_id)`: 读取完整文件内容,返回 `String` 或 `Vec<u8>`,然后**删除**该文件。
### 2. 自定义 Tracing Layer (`FileRequestLogLayer`)
`workflow-orchestrator-service` 中新建 `logging` 模块。
* 实现 `tracing_subscriber::Layer`
* **逻辑 (`on_event`)**:
1. 从 Span 的 Extensions 中尝试获取 `request_id` (或者从 event 的字段中获取)。
2. 如果找到了 `request_id`
* 格式化日志信息 (e.g., `[2025-11-30T10:00:00Z INFO] Message...`).
* 调用 `LogBufferManager::append(request_id, line)`.
### 3. 更新 `AppState``main.rs`
* 在 `AppState` 中加入 `pub log_manager: Arc<LogBufferManager>`
* 在 `main.rs` 中初始化 `LogBufferManager` (确保 `temp_logs` 目录存在)。
* 配置 `tracing-subscriber`,将 `FileRequestLogLayer` 注册进去。注意Layer 需要能访问到 `LogBufferManager`(可以通过全局静态变量或者在 Layer 构造时传入 Arc
### 4. 重构 `WorkflowEngine` (`workflow.rs`)
* **移除旧代码**: 删除所有 `publish_log` 方法及其调用。
* **上下文传递**: 确保所有处理逻辑都在带有 `request_id` 的 Span 下运行。
* 例如: `let _span = tracing::info_span!("workflow", request_id = %req_id).entered();`
* 或者使用 `#[tracing::instrument(fields(request_id = %cmd.request_id))]`
* **归档逻辑**:
* 在 `try_finish_workflow` (或处理完成/失败的地方)
* 调用 `self.state.log_manager.finalize(req_id)` 获取完整日志。
* 使用 `self.state.vgcs` 将日志内容写入 `workflow.log` (或 `_execution.log`)。
* 提交到 VGCS。
## 预期结果
* **稳定性**: 即使日志有几百 MB也不会占用服务内存只会占用磁盘空间。
* **隔离性**: 不同 Request 的日志写入不同的文件,互不干扰,支持高并发。
* **可观测性**: 未来通过 History API 可以查看到完整的、包含系统级信息的执行日志。
## 约束与风险
* **磁盘空间**: 需定期清理 `temp_logs` 中可能因服务崩溃而残留的僵尸文件(可以在服务启动时清理,或者由 Cron 处理)。目前先在服务启动时打印警告或简单清理。
* **性能**: 频繁的文件打开/关闭Append模式在 SSD 上通常不是问题,但如果是极高频日志(每秒数千条)可能会有开销。鉴于我们的场景是分析任务,频率可控。

View File

@ -214,6 +214,38 @@ export type WorkflowHistoryDto = {
};
export type Value = unknown;
export const DataSourceProvider = z.enum([
"Tushare",
"Finnhub",
"Alphavantage",
"Yfinance",
]);
export const DataSourceConfig = z.object({
api_key: z.union([z.string(), z.null()]).optional(),
api_url: z.union([z.string(), z.null()]).optional(),
enabled: z.boolean(),
provider: DataSourceProvider,
});
export const DataSourcesConfig =
z.record(DataSourceConfig);
export const TestLlmConfigRequest = z.object({
api_base_url: z.string(),
api_key: z.string(),
model_id: z.string(),
});
export const LlmModel = z.object({
is_active: z.boolean(),
model_id: z.string(),
name: z.union([z.string(), z.null()]).optional(),
});
export const LlmProvider = z.object({
api_base_url: z.string(),
api_key: z.string(),
models: z.array(LlmModel),
name: z.string(),
});
export const LlmProvidersConfig = z.record(LlmProvider);
export const AnalysisTemplateSummary = z.object({ id: z.string(), name: z.string() });
export const LlmConfig = z
.object({
max_tokens: z.union([z.number(), z.null()]),
@ -258,39 +290,6 @@ export const AnalysisTemplateSet = z.object({
modules: z.record(AnalysisModuleConfig),
name: z.string(),
});
export const AnalysisTemplateSets =
z.record(AnalysisTemplateSet);
export const DataSourceProvider = z.enum([
"Tushare",
"Finnhub",
"Alphavantage",
"Yfinance",
]);
export const DataSourceConfig = z.object({
api_key: z.union([z.string(), z.null()]).optional(),
api_url: z.union([z.string(), z.null()]).optional(),
enabled: z.boolean(),
provider: DataSourceProvider,
});
export const DataSourcesConfig =
z.record(DataSourceConfig);
export const TestLlmConfigRequest = z.object({
api_base_url: z.string(),
api_key: z.string(),
model_id: z.string(),
});
export const LlmModel = z.object({
is_active: z.boolean(),
model_id: z.string(),
name: z.union([z.string(), z.null()]).optional(),
});
export const LlmProvider = z.object({
api_base_url: z.string(),
api_key: z.string(),
models: z.array(LlmModel),
name: z.string(),
});
export const LlmProvidersConfig = z.record(LlmProvider);
export const TestConfigRequest = z.object({ data: z.unknown(), type: z.string() });
export const TestConnectionResponse = z.object({
message: z.string(),
@ -384,6 +383,8 @@ export const TaskProgress = z.object({
status: ObservabilityTaskStatus,
task_name: z.string(),
});
export const AnalysisTemplateSets =
z.record(AnalysisTemplateSet);
export const CanonicalSymbol = z.string();
export const ServiceStatus = z.enum(["Ok", "Degraded", "Unhealthy"]);
export const HealthStatus = z.object({
@ -506,6 +507,7 @@ export const WorkflowEvent = z.union([
.object({
payload: z
.object({
logs: z.array(z.string()),
task_graph: WorkflowDag,
tasks_metadata: z.record(TaskMetadata),
tasks_output: z.record(z.union([z.string(), z.null()])),
@ -519,12 +521,6 @@ export const WorkflowEvent = z.union([
]);
export const schemas = {
LlmConfig,
SelectionMode,
ContextSelectorConfig,
AnalysisModuleConfig,
AnalysisTemplateSet,
AnalysisTemplateSets,
DataSourceProvider,
DataSourceConfig,
DataSourcesConfig,
@ -532,6 +528,12 @@ export const schemas = {
LlmModel,
LlmProvider,
LlmProvidersConfig,
AnalysisTemplateSummary,
LlmConfig,
SelectionMode,
ContextSelectorConfig,
AnalysisModuleConfig,
AnalysisTemplateSet,
TestConfigRequest,
TestConnectionResponse,
DiscoverPreviewRequest,
@ -548,6 +550,7 @@ export const schemas = {
RequestAcceptedResponse,
ObservabilityTaskStatus,
TaskProgress,
AnalysisTemplateSets,
CanonicalSymbol,
ServiceStatus,
HealthStatus,
@ -562,27 +565,6 @@ export const schemas = {
};
export const endpoints = makeApi([
{
method: "get",
path: "/api/v1/configs/analysis_template_sets",
alias: "get_analysis_template_sets",
requestFormat: "json",
response: z.record(AnalysisTemplateSet),
},
{
method: "put",
path: "/api/v1/configs/analysis_template_sets",
alias: "update_analysis_template_sets",
requestFormat: "json",
parameters: [
{
name: "body",
type: "Body",
schema: z.record(AnalysisTemplateSet),
},
],
response: z.record(AnalysisTemplateSet),
},
{
method: "get",
path: "/api/v1/configs/data_sources",
@ -639,6 +621,81 @@ export const endpoints = makeApi([
],
response: z.void(),
},
{
method: "get",
path: "/api/v1/configs/templates",
alias: "get_templates",
requestFormat: "json",
response: z.array(AnalysisTemplateSummary),
},
{
method: "get",
path: "/api/v1/configs/templates/:id",
alias: "get_template_by_id",
requestFormat: "json",
parameters: [
{
name: "id",
type: "Path",
schema: z.string(),
},
],
response: AnalysisTemplateSet,
errors: [
{
status: 404,
description: `Template not found`,
schema: z.void(),
},
],
},
{
method: "put",
path: "/api/v1/configs/templates/:id",
alias: "update_template",
requestFormat: "json",
parameters: [
{
name: "body",
type: "Body",
schema: AnalysisTemplateSet,
},
{
name: "id",
type: "Path",
schema: z.string(),
},
],
response: AnalysisTemplateSet,
errors: [
{
status: 404,
description: `Template not found`,
schema: z.void(),
},
],
},
{
method: "delete",
path: "/api/v1/configs/templates/:id",
alias: "delete_template",
requestFormat: "json",
parameters: [
{
name: "id",
type: "Path",
schema: z.string(),
},
],
response: z.void(),
errors: [
{
status: 404,
description: `Template not found`,
schema: z.void(),
},
],
},
{
method: "post",
path: "/api/v1/configs/test",

View File

@ -154,11 +154,18 @@ export const ContextExplorer: React.FC<ContextExplorerProps> = ({
const res = await fetch(`/api/context/${reqId}/tree/${commitHash}?path=`);
if (res.ok) {
const data = await res.json();
if (Array.isArray(data)) {
data.sort((a: DirEntry, b: DirEntry) => {
if (a.kind === b.kind) return a.name.localeCompare(b.name);
return a.kind === 'Dir' ? -1 : 1;
});
setRootEntries(data);
} else {
console.error("ContextExplorer: Expected array from tree API, got:", data);
setRootEntries([]);
}
} else {
console.error("ContextExplorer: Fetch failed", res.status, res.statusText);
}
} catch (e) {
console.error(e);

View File

@ -0,0 +1,76 @@
import { useState } from 'react';
import { Terminal, ChevronUp, ChevronDown } from 'lucide-react';
import { Card } from "@/components/ui/card";
import { Button } from "@/components/ui/button";
import { useAutoScroll } from '@/hooks/useAutoScroll';
import { cn } from "@/lib/utils";
export interface LogEntry {
log: string;
timestamp?: number; // Added optional timestamp for potential sorting if needed
}
interface RealtimeLogsProps {
logs: string[];
className?: string;
}
export function RealtimeLogs({ logs, className }: RealtimeLogsProps) {
// Default to expanded if there are logs, or maybe collapsed to be unintrusive?
// Original code: const [isExpanded, setIsExpanded] = useState(false);
// Let's keep it collapsed by default as per original code to avoid clutter.
const [isExpanded, setIsExpanded] = useState(false);
const logsViewportRef = useAutoScroll(logs.length);
const toggleExpand = () => {
setIsExpanded(!isExpanded);
};
return (
<Card className={cn("flex flex-col shadow-sm transition-all duration-300 ease-in-out border-l-4 border-l-primary py-0 gap-0 bg-background overflow-hidden", className, isExpanded ? "h-[300px]" : "h-8")}>
<div
className="flex items-center justify-between px-2 py-1 cursor-pointer hover:bg-muted/50 transition-colors h-8 shrink-0"
onClick={toggleExpand}
>
<div className="flex items-center gap-2 overflow-hidden flex-1">
<Terminal className="h-3 w-3 text-muted-foreground shrink-0" />
<span className="text-[10px] font-medium text-muted-foreground whitespace-nowrap shrink-0 mr-2">Real-time Logs</span>
{/* Preview last log when collapsed */}
{!isExpanded && logs.length > 0 && (
<div className="flex-1 flex items-center gap-2 overflow-hidden text-[10px] font-mono text-muted-foreground/80">
<span className="truncate">{logs[logs.length - 1]}</span>
</div>
)}
{!isExpanded && logs.length === 0 && (
<span className="text-[10px] italic text-muted-foreground/50">Waiting for logs...</span>
)}
</div>
<Button variant="ghost" size="icon" className="h-4 w-4 text-muted-foreground hover:text-foreground shrink-0 ml-2">
{isExpanded ? <ChevronUp className="h-3 w-3" /> : <ChevronDown className="h-3 w-3" />}
</Button>
</div>
{/* Expanded Content */}
<div
className={cn(
"flex-1 bg-muted/10 border-t transition-all duration-300 min-h-0",
isExpanded ? "opacity-100 visible" : "opacity-0 invisible h-0 overflow-hidden"
)}
>
<div ref={logsViewportRef} className="h-full overflow-y-auto p-3 font-mono text-[10px] leading-relaxed scrollbar-thin scrollbar-thumb-border scrollbar-track-transparent">
<div className="space-y-1">
{logs.length === 0 && <span className="text-muted-foreground italic">Waiting for logs...</span>}
{logs.map((entry, i) => (
<div key={i} className="break-all flex gap-2">
<span className="text-foreground/90">{entry}</span>
</div>
))}
</div>
</div>
</div>
</Card>
);
}

View File

@ -1,5 +1,5 @@
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { LlmProvidersConfig, DataSourcesConfig, AnalysisTemplateSets, TestConfigRequest, TestLlmConfigRequest } from '../types/config';
import { LlmProvidersConfig, DataSourcesConfig, TestConfigRequest, TestLlmConfigRequest } from '../types/config';
import { client } from '../api/client';
// --- Hooks ---
@ -59,16 +59,40 @@ export function useAnalysisTemplates() {
return useQuery({
queryKey: ['analysis-templates'],
queryFn: async () => {
return await client.get_analysis_template_sets();
return await client.get_templates();
}
});
}
export function useUpdateAnalysisTemplates() {
export function useAnalysisTemplate(id: string | null) {
return useQuery({
queryKey: ['analysis-template', id],
queryFn: async () => {
if (!id) return null;
return await client.get_template_by_id({ params: { id } });
},
enabled: !!id
});
}
export function useSaveAnalysisTemplate() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: async (config: AnalysisTemplateSets) => {
return await client.update_analysis_template_sets(config);
mutationFn: async ({ id, template }: { id: string, template: AnalysisTemplateSet }) => {
return await client.update_template(template, { params: { id } });
},
onSuccess: (data, variables) => {
queryClient.invalidateQueries({ queryKey: ['analysis-templates'] });
queryClient.invalidateQueries({ queryKey: ['analysis-template', variables.id] });
}
});
}
export function useDeleteAnalysisTemplate() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: async (id: string) => {
return await client.delete_template(undefined, { params: { id } });
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: ['analysis-templates'] });
@ -76,6 +100,9 @@ export function useUpdateAnalysisTemplates() {
});
}
export type AnalysisTemplateSet = import('../api/schema.gen').AnalysisTemplateSet;
export function useDiscoverModels() {
const queryClient = useQueryClient();
return useMutation({

View File

@ -7,7 +7,7 @@ import { Input } from "@/components/ui/input"
import { Label } from "@/components/ui/label"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
import { BarChart3, Search, Sparkles, Loader2, AlertCircle } from "lucide-react"
import { useAnalysisTemplates, useLlmProviders } from "@/hooks/useConfig"
import { useAnalysisTemplates, useAnalysisTemplate, useLlmProviders } from "@/hooks/useConfig"
import { client } from '@/api/client';
import { DataRequest } from '@/api/schema.gen';
import { z } from 'zod';
@ -24,25 +24,25 @@ export function Dashboard() {
const [templateId, setTemplateId] = useState("");
const { data: templates, isLoading: isTemplatesLoading } = useAnalysisTemplates();
const { data: selectedTemplate } = useAnalysisTemplate(templateId || null);
const { data: llmProviders } = useLlmProviders();
const [validationError, setValidationError] = useState<string | null>(null);
// Auto-select first template when loaded
useEffect(() => {
if (templates && Object.keys(templates).length > 0 && !templateId) {
setTemplateId(Object.keys(templates)[0]);
if (templates && templates.length > 0 && !templateId) {
setTemplateId(templates[0].id);
}
}, [templates, templateId]);
// Validate template against providers
useEffect(() => {
if (!templateId || !templates || !templates[templateId] || !llmProviders) {
if (!selectedTemplate || !llmProviders) {
setValidationError(null);
return;
}
const selectedTemplate = templates[templateId];
const missingConfigs: string[] = [];
Object.values(selectedTemplate.modules).forEach(module => {
@ -63,7 +63,7 @@ export function Dashboard() {
setValidationError(null);
}
}, [templateId, templates, llmProviders]);
}, [selectedTemplate, llmProviders]);
const startWorkflowMutation = useMutation({
mutationFn: async (payload: DataRequestDTO) => {
@ -155,9 +155,9 @@ export function Dashboard() {
<SelectValue placeholder={isTemplatesLoading ? "Loading templates..." : "Select a template"} />
</SelectTrigger>
<SelectContent>
{templates && Object.keys(templates).length > 0 ? (
Object.entries(templates).map(([id, t]) => (
<SelectItem key={id} value={id}>
{templates && templates.length > 0 ? (
templates.map((t) => (
<SelectItem key={t.id} value={t.id}>
{t.name}
</SelectItem>
))

View File

@ -117,7 +117,7 @@ export function HistoricalReportPage() {
const tabNodes = dag?.nodes || [];
return (
<div className="container py-4 space-y-4 min-h-[calc(100vh-4rem)] flex flex-col">
<div className="w-full px-6 py-4 space-y-4 min-h-[calc(100vh-4rem)] flex flex-col">
{/* Header Area */}
<div className="flex items-center justify-between shrink-0">
<div className="space-y-1">
@ -362,8 +362,8 @@ function TaskDetailView({ task, requestId, mode: _mode }: { task?: TaskState, re
<div className="relative h-full flex flex-col overflow-hidden">
{/* Main Report View */}
<div className="flex-1 overflow-auto p-8 bg-background">
<div className="max-w-4xl mx-auto">
<div className="prose dark:prose-invert max-w-none prose-p:text-foreground prose-headings:text-foreground prose-li:text-foreground prose-strong:text-foreground prose-span:text-foreground">
<div className="w-full">
<div className="prose dark:prose-invert max-w-none prose-p:text-foreground prose-headings:text-foreground prose-li:text-foreground prose-strong:text-foreground prose-span:text-foreground prose-td:text-foreground prose-th:text-foreground">
{task?.content ? (
<ReactMarkdown remarkPlugins={[remarkGfm]}>
{task.content || ''}
@ -396,9 +396,9 @@ function TaskDetailView({ task, requestId, mode: _mode }: { task?: TaskState, re
</Button>
</div>
{/* Inspector Panel (Right Side Sheet) */}
{/* Inspector Panel (Overlaid on Content) */}
<div className={cn(
"absolute top-0 right-0 h-full w-[600px] bg-background border-l shadow-2xl transition-transform duration-300 transform z-20 flex flex-col",
"absolute top-0 right-0 h-full w-full bg-background border-l shadow-2xl transition-transform duration-300 transform z-20 flex flex-col",
isInspectorOpen ? "translate-x-0" : "translate-x-full"
)}>
<div className="flex items-center justify-between p-4 border-b shrink-0">
@ -411,55 +411,20 @@ function TaskDetailView({ task, requestId, mode: _mode }: { task?: TaskState, re
</Button>
</div>
<Tabs defaultValue="logs" className="flex-1 flex flex-col min-h-0">
<div className="px-4 border-b shrink-0">
<TabsList className="w-full justify-start h-10 p-0 bg-transparent">
<TabsTrigger value="logs" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Logs</TabsTrigger>
{hasContext && (
<TabsTrigger value="context" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Context</TabsTrigger>
)}
<TabsTrigger value="raw" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Metadata</TabsTrigger>
</TabsList>
</div>
<TabsContent value="logs" className="flex-1 m-0 p-0 overflow-auto bg-muted/30">
{task?.logs && task.logs.length > 0 ? (
<div className="p-4 space-y-1 font-mono text-xs">
{task.logs.map((log, i) => (
<div key={i} className="break-words whitespace-pre-wrap">{log}</div>
))}
</div>
) : (
<div className="flex items-center justify-center h-full text-muted-foreground text-sm">
No logs available
</div>
)}
{/* TODO: Add support for loading _execution.md in historical mode */}
</TabsContent>
<TabsContent value="context" className="flex-1 m-0 p-0 min-h-0">
{requestId && (task?.inputCommit || task?.outputCommit) && (
<div className="flex-1 flex flex-col min-h-0">
{hasContext && requestId && (task?.inputCommit || task?.outputCommit) ? (
<ContextExplorer
reqId={requestId}
commitHash={task.outputCommit || task.inputCommit!}
diffTargetHash={task.outputCommit ? task.inputCommit : undefined}
className="h-full p-4"
/>
) : (
<div className="flex items-center justify-center h-full text-muted-foreground text-sm">
No context available
</div>
)}
</TabsContent>
<TabsContent value="raw" className="flex-1 m-0 p-4 overflow-auto">
<pre className="text-xs font-mono bg-muted p-4 rounded-lg overflow-auto">
{JSON.stringify({
status: task?.status,
progress: task?.progress,
message: task?.message,
inputCommit: task?.inputCommit,
outputCommit: task?.outputCommit
}, null, 2)}
</pre>
</TabsContent>
</Tabs>
</div>
</div>
</div>
);

View File

@ -17,6 +17,8 @@ import { WorkflowStatus, ConnectionStatus, TaskState } from '@/types/workflow';
import { Progress } from "@/components/ui/progress"
import { cn, formatNodeName } from '@/lib/utils';
import { RealtimeLogs } from '@/components/workflow/RealtimeLogs';
export function ReportPage() {
const { id } = useParams();
const [searchParams] = useSearchParams();
@ -43,7 +45,8 @@ export function ReportPage() {
tasks,
dag,
activeTab,
setActiveTab
setActiveTab,
logs: globalLogs
} = useWorkflowStore();
const { data: templates } = useAnalysisTemplates();
@ -60,9 +63,11 @@ export function ReportPage() {
// If the workflow is already finished, SSE might close immediately or 404.
const loadSnapshot = async () => {
try {
console.log(`[ReportPage] Fetching snapshot for ${id}...`);
const res = await fetch(`/api/v1/workflow/snapshot/${id}`);
if (res.ok) {
const snapshot = await res.json();
console.log(`[ReportPage] Snapshot loaded successfully for ${id}`, snapshot);
// Handle tagged enum wrapper (type/payload) if present
let rawPayload = snapshot.data_payload;
@ -71,9 +76,11 @@ export function ReportPage() {
}
loadFromSnapshot(rawPayload);
} else {
console.warn(`[ReportPage] Snapshot fetch failed: ${res.status} ${res.statusText}`);
}
} catch (e) {
console.warn("Snapshot load failed (normal for new tasks):", e);
console.warn("[ReportPage] Snapshot load exception (normal for new tasks):", e);
}
};
@ -81,25 +88,39 @@ export function ReportPage() {
// 2. Connect to Real-time Stream
try {
console.log(`[ReportPage] Initializing EventSource for ${id}...`);
eventSource = new EventSource(`/api/v1/workflow/events/${id}`);
eventSource.onopen = () => {
console.log(`[ReportPage] SSE Connection Opened for ${id}`);
};
eventSource.onmessage = (event) => {
try {
// console.log(`[ReportPage] SSE Message received:`, event.data);
const parsedEvent = JSON.parse(event.data);
if (parsedEvent.type === 'WorkflowStateSnapshot') {
console.log(`[ReportPage] !!! Received WorkflowStateSnapshot !!!`, parsedEvent);
} else if (parsedEvent.type !== 'TaskStreamUpdate' && parsedEvent.type !== 'TaskLog') {
// Suppress high-frequency logs to prevent browser lag
console.log(`[ReportPage] SSE Event: ${parsedEvent.type}`, parsedEvent);
}
handleEvent(parsedEvent);
} catch (e) {
console.error("Failed to parse SSE event:", e);
console.error("[ReportPage] Failed to parse SSE event:", e);
}
};
eventSource.onerror = (err) => {
// Standard behavior: if connection closes, it might be finished or failed.
// We rely on Snapshot for history if SSE fails.
console.warn("SSE Connection Closed/Error", err);
console.warn("[ReportPage] SSE Connection Closed/Error", err);
eventSource?.close();
};
} catch (e) {
console.error("Failed to init SSE:", e);
console.error("[ReportPage] Failed to init SSE:", e);
}
return () => {
@ -110,8 +131,16 @@ export function ReportPage() {
// Include ALL nodes in tabs to allow debugging context for DataFetch tasks
const tabNodes = dag?.nodes || [];
// Use global raw logs directly
// const { tasks, logs: globalLogs } = useWorkflowStore();
return (
<div className="container py-4 space-y-4 min-h-[calc(100vh-4rem)] flex flex-col">
<div className="w-full px-6 py-4 space-y-4 min-h-[calc(100vh-4rem)] flex flex-col">
{/* Realtime Logs - Only in realtime mode */}
{mode === 'realtime' && (
<RealtimeLogs logs={globalLogs} className="fixed bottom-0 left-0 right-0 z-50 w-full border-l-0 border-t-4 border-t-primary rounded-none shadow-[0_-4px_12px_rgba(0,0,0,0.1)]" />
)}
{/* Header Area */}
<div className="flex items-center justify-between shrink-0">
<div className="space-y-1">
@ -283,7 +312,15 @@ function OverviewTabContent({ status, tasks, totalTasks, completedTasks }: {
totalTasks: number,
completedTasks: number
}) {
const progress = totalTasks > 0 ? (completedTasks / totalTasks) * 100 : 0;
// Count ALL tasks that have reached a terminal state (Completed, Skipped, Failed)
// This is more accurate for "progress" than just successful completions.
const processedCount = Object.values(tasks).filter(t =>
t.status === schemas.TaskStatus.enum.Completed ||
t.status === schemas.TaskStatus.enum.Skipped ||
t.status === schemas.TaskStatus.enum.Failed
).length;
const progress = totalTasks > 0 ? (processedCount / totalTasks) * 100 : 0;
// Find errors
const failedTasks = Object.entries(tasks).filter(([_, t]) => t.status === schemas.TaskStatus.enum.Failed);
@ -312,7 +349,7 @@ function OverviewTabContent({ status, tasks, totalTasks, completedTasks }: {
<div className="space-y-2">
<div className="flex justify-between text-sm text-muted-foreground">
<span>Overall Progress</span>
<span>{Math.round(progress)}% ({completedTasks}/{totalTasks} tasks)</span>
<span>{Math.round(progress)}% ({processedCount}/{totalTasks} tasks)</span>
</div>
<Progress value={progress} className="h-2" />
</div>
@ -442,8 +479,8 @@ function TaskDetailView({ taskId, task, requestId, mode }: { taskId: string, tas
<div className="relative h-full flex flex-col overflow-hidden">
{/* Main Report View */}
<div className="flex-1 overflow-auto p-8 bg-background">
<div className="max-w-4xl mx-auto">
<div className="prose dark:prose-invert max-w-none prose-p:text-foreground prose-headings:text-foreground prose-li:text-foreground prose-strong:text-foreground prose-span:text-foreground">
<div className="w-full">
<div className="prose dark:prose-invert max-w-none prose-p:text-foreground prose-headings:text-foreground prose-li:text-foreground prose-strong:text-foreground prose-span:text-foreground prose-td:text-foreground prose-th:text-foreground">
{task?.content ? (
<ReactMarkdown remarkPlugins={[remarkGfm]}>
{task.content || ''}
@ -476,9 +513,9 @@ function TaskDetailView({ taskId, task, requestId, mode }: { taskId: string, tas
</Button>
</div>
{/* Inspector Panel (Right Side Sheet) */}
{/* Inspector Panel (Overlaid on Content) */}
<div className={cn(
"absolute top-0 right-0 h-full w-[600px] bg-background border-l shadow-2xl transition-transform duration-300 transform z-20 flex flex-col",
"absolute top-0 right-0 h-full w-full bg-background border-l shadow-2xl transition-transform duration-300 transform z-20 flex flex-col",
isInspectorOpen ? "translate-x-0" : "translate-x-full"
)}>
<div className="flex items-center justify-between p-4 border-b shrink-0">
@ -491,55 +528,20 @@ function TaskDetailView({ taskId, task, requestId, mode }: { taskId: string, tas
</Button>
</div>
<Tabs defaultValue="logs" className="flex-1 flex flex-col min-h-0">
<div className="px-4 border-b shrink-0">
<TabsList className="w-full justify-start h-10 p-0 bg-transparent">
<TabsTrigger value="logs" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Logs</TabsTrigger>
{hasContext && (
<TabsTrigger value="context" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Context</TabsTrigger>
)}
<TabsTrigger value="raw" className="rounded-none border-b-2 border-transparent data-[state=active]:border-primary px-4 py-2">Metadata</TabsTrigger>
</TabsList>
</div>
<TabsContent value="logs" className="flex-1 m-0 p-0 overflow-auto bg-muted/30">
{task?.logs && task.logs.length > 0 ? (
<div className="p-4 space-y-1 font-mono text-xs">
{task.logs.map((log, i) => (
<div key={i} className="break-words whitespace-pre-wrap">{log}</div>
))}
</div>
) : (
<div className="flex items-center justify-center h-full text-muted-foreground text-sm">
No logs available
</div>
)}
{/* TODO: Add support for loading _execution.md in historical mode */}
</TabsContent>
<TabsContent value="context" className="flex-1 m-0 p-0 min-h-0">
{requestId && (task?.inputCommit || task?.outputCommit) && (
<div className="flex-1 flex flex-col min-h-0">
{hasContext && requestId && (task?.inputCommit || task?.outputCommit) ? (
<ContextExplorer
reqId={requestId}
commitHash={task.outputCommit || task.inputCommit!}
diffTargetHash={task.outputCommit ? task.inputCommit : undefined}
className="h-full p-4"
/>
) : (
<div className="flex items-center justify-center h-full text-muted-foreground text-sm">
No context available
</div>
)}
</TabsContent>
<TabsContent value="raw" className="flex-1 m-0 p-4 overflow-auto">
<pre className="text-xs font-mono bg-muted p-4 rounded-lg overflow-auto">
{JSON.stringify({
status: task?.status,
progress: task?.progress,
message: task?.message,
inputCommit: task?.inputCommit,
outputCommit: task?.outputCommit
}, null, 2)}
</pre>
</TabsContent>
</Tabs>
</div>
</div>
</div>
);

View File

@ -1,5 +1,11 @@
import { useState, useEffect, useMemo } from "react"
import { useAnalysisTemplates, useUpdateAnalysisTemplates, useLlmProviders } from "@/hooks/useConfig"
import {
useAnalysisTemplates,
useAnalysisTemplate,
useSaveAnalysisTemplate,
useDeleteAnalysisTemplate,
useLlmProviders
} from "@/hooks/useConfig"
import { AnalysisTemplateSet, AnalysisModuleConfig } from "@/types/config"
import { schemas } from "@/api/schema.gen"
import { z } from "zod"
@ -16,29 +22,28 @@ import { useToast } from "@/hooks/use-toast"
export function TemplateTab() {
const { data: templates, isLoading } = useAnalysisTemplates();
const updateTemplates = useUpdateAnalysisTemplates();
const saveTemplate = useSaveAnalysisTemplate();
const deleteTemplate = useDeleteAnalysisTemplate();
const { toast } = useToast();
const [selectedId, setSelectedId] = useState<string | null>(null);
// Auto select first if none selected
useEffect(() => {
if (templates && !selectedId && Object.keys(templates).length > 0) {
setSelectedId(Object.keys(templates)[0]);
if (templates && !selectedId && templates.length > 0) {
setSelectedId(templates[0].id);
}
}, [templates, selectedId]);
if (isLoading) return <div>Loading templates...</div>;
const handleCreateTemplate = () => {
if (!templates) return;
const newId = crypto.randomUUID();
const newTemplate: AnalysisTemplateSet = {
name: "New Template",
modules: {}
};
const newTemplates = { ...templates, [newId]: newTemplate };
updateTemplates.mutate(newTemplates, {
saveTemplate.mutate({ id: newId, template: newTemplate }, {
onSuccess: () => {
toast({ title: "Success", description: "Template created" });
setSelectedId(newId);
@ -47,20 +52,8 @@ export function TemplateTab() {
});
}
const handleUpdateTemplate = (id: string, updatedTemplate: AnalysisTemplateSet) => {
if (!templates) return;
const newTemplates = { ...templates, [id]: updatedTemplate };
updateTemplates.mutate(newTemplates, {
onSuccess: () => toast({ title: "Success", description: "Template saved" }),
onError: () => toast({ title: "Error", description: "Failed to save template", type: "error" })
});
}
const handleDeleteTemplate = (id: string) => {
if (!templates) return;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { [id]: removed, ...rest } = templates;
updateTemplates.mutate(rest, {
deleteTemplate.mutate(id, {
onSuccess: () => {
toast({ title: "Success", description: "Template deleted" });
if (selectedId === id) setSelectedId(null);
@ -69,8 +62,6 @@ export function TemplateTab() {
});
}
const activeTemplate = (templates && selectedId) ? (templates as Record<string, AnalysisTemplateSet>)[selectedId] : null;
return (
<div className="flex h-[600px] border rounded-md overflow-hidden">
{/* Sidebar List */}
@ -81,20 +72,20 @@ export function TemplateTab() {
</div>
<ScrollArea className="flex-1">
<div className="p-2 space-y-1">
{templates && Object.entries(templates).map(([id, t]) => (
<div key={id} className="group relative flex items-center">
{templates && templates.map((t) => (
<div key={t.id} className="group relative flex items-center">
<button
onClick={() => setSelectedId(id)}
onClick={() => setSelectedId(t.id)}
className={`w-full text-left px-3 py-2 rounded-md text-sm transition-colors flex items-center justify-between ${
selectedId === id ? "bg-accent text-accent-foreground font-medium" : "hover:bg-muted"
selectedId === t.id ? "bg-accent text-accent-foreground font-medium" : "hover:bg-muted"
}`}
>
<span className="truncate pr-6">{(t as AnalysisTemplateSet).name}</span>
{selectedId === id && <ArrowRight className="h-3 w-3 opacity-50" />}
<span className="truncate pr-6">{t.name}</span>
{selectedId === t.id && <ArrowRight className="h-3 w-3 opacity-50" />}
</button>
{/* Delete button visible on hover */}
<button
onClick={(e) => { e.stopPropagation(); handleDeleteTemplate(id); }}
onClick={(e) => { e.stopPropagation(); handleDeleteTemplate(t.id); }}
className="absolute right-2 top-2 hidden group-hover:block text-muted-foreground hover:text-destructive"
>
<Trash2 className="h-3 w-3" />
@ -112,16 +103,14 @@ export function TemplateTab() {
{/* Main Content */}
<div className="flex-1 bg-background flex flex-col">
{activeTemplate && selectedId ? (
<TemplateDetailView
key={selectedId} // Force remount on ID change
template={activeTemplate}
onSave={(t) => handleUpdateTemplate(selectedId, t)}
isSaving={updateTemplates.isPending}
{selectedId ? (
<TemplateDetailWrapper
key={selectedId}
templateId={selectedId}
/>
) : (
<div className="flex-1 flex items-center justify-center text-muted-foreground">
{templates && Object.keys(templates).length === 0 ? "No templates found. Create one." : "Select a template"}
{templates && templates.length === 0 ? "No templates found. Create one." : "Select a template"}
</div>
)}
</div>
@ -129,6 +118,30 @@ export function TemplateTab() {
)
}
function TemplateDetailWrapper({ templateId }: { templateId: string }) {
const { data: template, isLoading, isError } = useAnalysisTemplate(templateId);
const saveTemplate = useSaveAnalysisTemplate();
const { toast } = useToast();
if (isLoading) return <div className="flex items-center justify-center h-full">Loading details...</div>;
if (isError || !template) return <div className="flex items-center justify-center h-full text-destructive">Error loading template</div>;
const handleSave = (updatedTemplate: AnalysisTemplateSet) => {
saveTemplate.mutate({ id: templateId, template: updatedTemplate }, {
onSuccess: () => toast({ title: "Success", description: "Template saved" }),
onError: () => toast({ title: "Error", description: "Failed to save template", type: "error" })
});
};
return (
<TemplateDetailView
template={template}
onSave={handleSave}
isSaving={saveTemplate.isPending}
/>
);
}
function TemplateDetailView({ template, onSave, isSaving }: { template: AnalysisTemplateSet, onSave: (t: AnalysisTemplateSet) => void, isSaving: boolean }) {
const [localTemplate, setLocalTemplate] = useState(template);
const [isDirty, setIsDirty] = useState(false);

View File

@ -8,6 +8,7 @@ interface WorkflowStoreState {
mode: 'realtime' | 'historical';
dag: WorkflowDag | null;
tasks: Record<string, TaskState>;
logs: string[]; // Global realtime logs
error: string | null;
activeTab: string; // For UI linking
@ -19,6 +20,7 @@ interface WorkflowStoreState {
updateTaskContent: (taskId: string, delta: string) => void; // Stream content (append)
setTaskContent: (taskId: string, content: string) => void; // Set full content
appendTaskLog: (taskId: string, log: string) => void;
appendGlobalLog: (log: string) => void; // New action for raw global logs
setActiveTab: (tabId: string) => void;
completeWorkflow: (result: unknown) => void;
failWorkflow: (reason: string) => void;
@ -33,6 +35,7 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
mode: 'realtime',
dag: null,
tasks: {},
logs: [],
error: null,
activeTab: 'overview',
@ -42,6 +45,7 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
mode: 'realtime',
error: null,
tasks: {},
logs: [],
activeTab: 'overview'
}),
@ -155,6 +159,12 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
});
},
appendGlobalLog: (log) => {
set(state => ({
logs: [...state.logs, log]
}));
},
setActiveTab: (tabId) => set({ activeTab: tabId }),
completeWorkflow: (_result) => set({ status: schemas.TaskStatus.enum.Completed }),
@ -162,7 +172,10 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
handleEvent: (event: WorkflowEvent) => {
const state = get();
// console.log('Handling Event:', event.type, event);
// Enhanced Logging (Filtered)
if (event.type !== 'TaskStreamUpdate' && event.type !== 'TaskLog') {
console.log(`[Store] Handling Event: ${event.type}`, event);
}
switch (event.type) {
case 'WorkflowStarted':
@ -170,6 +183,7 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
break;
case 'TaskStateChanged': {
const p = event.payload;
console.log(`[Store] Task Update: ${p.task_id} -> ${p.status}`);
// @ts-ignore
state.updateTaskStatus(
p.task_id,
@ -191,51 +205,96 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
const p = event.payload;
const time = new Date(p.timestamp).toLocaleTimeString();
const log = `[${time}] [${p.level}] ${p.message}`;
// Update Task-specific logs
state.appendTaskLog(p.task_id, log);
// Update Global Raw Logs
const globalLog = `[${time}] [${p.task_id}] [${p.level}] ${p.message}`;
state.appendGlobalLog(globalLog);
break;
}
case 'WorkflowCompleted': {
console.log("[Store] Workflow Completed");
state.completeWorkflow(event.payload.result_summary);
break;
}
case 'WorkflowFailed': {
console.log("[Store] Workflow Failed:", event.payload.reason);
state.failWorkflow(event.payload.reason);
break;
}
case 'WorkflowStateSnapshot': {
// Used for real-time rehydration (e.g. page refresh)
console.log("[Store] Processing WorkflowStateSnapshot...", event.payload);
// First, restore DAG if present
if (event.payload.task_graph) {
// WARNING: setDag resets tasks to initial state!
// We must be careful not to lose existing state if we are just updating.
// But usually Snapshot means "replace everything".
state.setDag(event.payload.task_graph);
}
const currentTasks = get().tasks;
const currentTasks = get().tasks; // These are now reset if setDag was called
const newTasks = { ...currentTasks };
if (event.payload.tasks_status) {
Object.entries(event.payload.tasks_status).forEach(([taskId, status]) => {
const payload = event.payload as any;
// NEW: Handle task_states (Comprehensive Snapshot)
if (payload.task_states) {
Object.entries(payload.task_states).forEach(([taskId, stateSnapshot]: [string, any]) => {
// Merge or Create
const existing = newTasks[taskId] || {
status: schemas.TaskStatus.enum.Pending,
logs: [],
progress: 0,
content: ''
};
newTasks[taskId] = {
...existing,
status: stateSnapshot.status,
// Prefer snapshot logs if available, they are the full history
logs: (stateSnapshot.logs && stateSnapshot.logs.length > 0) ? stateSnapshot.logs : existing.logs,
// Prefer snapshot content if available
content: stateSnapshot.content !== undefined && stateSnapshot.content !== null ? stateSnapshot.content : existing.content,
inputCommit: stateSnapshot.input_commit,
outputCommit: stateSnapshot.output_commit,
metadata: stateSnapshot.metadata
};
});
} else {
// Fallback / Compatibility
if (payload.tasks_status) {
Object.entries(payload.tasks_status).forEach(([taskId, status]) => {
if (newTasks[taskId] && status) {
newTasks[taskId] = { ...newTasks[taskId], status: status as TaskStatus };
}
});
}
if (event.payload.tasks_output) {
Object.entries(event.payload.tasks_output).forEach(([taskId, outputCommit]) => {
if (payload.tasks_output) {
Object.entries(payload.tasks_output).forEach(([taskId, outputCommit]) => {
if (newTasks[taskId] && outputCommit) {
newTasks[taskId] = { ...newTasks[taskId], outputCommit: outputCommit as string };
}
});
}
if (event.payload.tasks_metadata) {
Object.entries(event.payload.tasks_metadata).forEach(([taskId, metadata]) => {
if (payload.tasks_metadata) {
Object.entries(payload.tasks_metadata).forEach(([taskId, metadata]) => {
if (newTasks[taskId] && metadata) {
// Note: The generated client types define metadata as TaskMetadata which includes optional paths.
// We store it directly as it matches our TaskState.metadata shape partially.
newTasks[taskId] = { ...newTasks[taskId], metadata: metadata };
}
});
}
}
// Handle global log replay
// @ts-ignore
if (payload.logs && Array.isArray(payload.logs)) {
set({ logs: payload.logs });
}
set({ tasks: newTasks });
break;
@ -244,7 +303,11 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
},
loadFromSnapshot: (payload: any) => {
// Used for loading completed/archived sessions
const dag = payload.task_graph;
// Check if we have the new `task_states` format in the snapshot
const taskStates = payload.task_states;
const tasks_status = payload.tasks_status;
const tasks_output = payload.tasks_output;
const tasks_metadata = payload.tasks_metadata;
@ -253,14 +316,28 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
if (dag) {
dag.nodes.forEach((node: any) => {
if (taskStates && taskStates[node.id]) {
// Use new format
const s = taskStates[node.id];
newTasks[node.id] = {
status: s.status,
logs: s.logs || [],
progress: s.status === 'Completed' ? 100 : 0,
content: s.content || '',
outputCommit: s.output_commit,
metadata: s.metadata
};
} else {
// Legacy fallback
newTasks[node.id] = {
status: tasks_status?.[node.id] || node.initial_status,
logs: [],
progress: 100,
content: '', // Content is not in snapshot, needs on-demand loading
content: '', // Content is not in legacy snapshot
outputCommit: tasks_output?.[node.id],
metadata: tasks_metadata?.[node.id]
};
}
});
}
@ -271,6 +348,10 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
mode: 'historical',
error: null
});
if (payload.logs) {
set({ logs: payload.logs });
}
},
reset: () => set({
@ -279,6 +360,7 @@ export const useWorkflowStore = create<WorkflowStoreState>((set, get) => ({
mode: 'realtime',
dag: null,
tasks: {},
logs: [],
error: null,
activeTab: 'overview'
})

View File

@ -9,56 +9,6 @@
"version": "0.1.0"
},
"paths": {
"/api/v1/configs/analysis_template_sets": {
"get": {
"tags": [
"api"
],
"summary": "[GET /api/v1/configs/analysis_template_sets]",
"operationId": "get_analysis_template_sets",
"responses": {
"200": {
"description": "Analysis template sets configuration",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSets"
}
}
}
}
}
},
"put": {
"tags": [
"api"
],
"summary": "[PUT /api/v1/configs/analysis_template_sets]",
"operationId": "update_analysis_template_sets",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSets"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Updated analysis template sets configuration",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSets"
}
}
}
}
}
}
},
"/api/v1/configs/data_sources": {
"get": {
"tags": [
@ -183,6 +133,134 @@
}
}
},
"/api/v1/configs/templates": {
"get": {
"tags": [
"api"
],
"summary": "[GET /api/v1/configs/templates]",
"operationId": "get_templates",
"responses": {
"200": {
"description": "List of analysis templates",
"content": {
"application/json": {
"schema": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AnalysisTemplateSummary"
}
}
}
}
}
}
}
},
"/api/v1/configs/templates/{id}": {
"get": {
"tags": [
"api"
],
"summary": "[GET /api/v1/configs/templates/{id}]",
"operationId": "get_template_by_id",
"parameters": [
{
"name": "id",
"in": "path",
"description": "Template ID",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Analysis template details",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSet"
}
}
}
},
"404": {
"description": "Template not found"
}
}
},
"put": {
"tags": [
"api"
],
"summary": "[PUT /api/v1/configs/templates/{id}]",
"operationId": "update_template",
"parameters": [
{
"name": "id",
"in": "path",
"description": "Template ID",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSet"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Updated analysis template",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AnalysisTemplateSet"
}
}
}
},
"404": {
"description": "Template not found"
}
}
},
"delete": {
"tags": [
"api"
],
"summary": "[DELETE /api/v1/configs/templates/{id}]",
"operationId": "delete_template",
"parameters": [
{
"name": "id",
"in": "path",
"description": "Template ID",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"204": {
"description": "Template deleted"
},
"404": {
"description": "Template not found"
}
}
}
},
"/api/v1/configs/test": {
"post": {
"tags": [
@ -576,6 +654,23 @@
"type": "string"
}
},
"AnalysisTemplateSummary": {
"type": "object",
"description": "Summary of an analysis template (for listing purposes).",
"required": [
"id",
"name"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
}
},
"additionalProperties": false
},
"CanonicalSymbol": {
"type": "string",
"description": "CanonicalSymbol 是系统内部唯一的股票代码标识符类型\n它封装了一个标准化的字符串遵循 Yahoo Finance 格式)\n使用 newtype 模式防止与普通 String 混淆",

View File

@ -145,7 +145,7 @@ services:
environment:
SERVER_PORT: 4000
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
PROVIDER_SERVICES: '["http://alphavantage-provider-service:8000", "http://tushare-provider-service:8001", "http://finnhub-provider-service:8002", "http://yfinance-provider-service:8003"]'
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
@ -171,7 +171,7 @@ services:
environment:
SERVER_PORT: 8000
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
depends_on:
@ -192,7 +192,7 @@ services:
environment:
SERVER_PORT: 8001
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
TUSHARE_API_URL: http://api.waditu.com
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
@ -214,7 +214,7 @@ services:
environment:
SERVER_PORT: 8002
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
FINNHUB_API_URL: https://finnhub.io/api/v1
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
@ -236,7 +236,7 @@ services:
environment:
SERVER_PORT: 8003
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
depends_on:
@ -260,7 +260,7 @@ services:
environment:
SERVER_PORT: 8004
NATS_ADDR: nats://nats:4222
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000
RUST_LOG: info,axum=info
RUST_BACKTRACE: "1"
depends_on:

View File

@ -4,7 +4,7 @@ set -e
# Configuration
COMPOSE_FILE="docker-compose.test.yml"
export NATS_ADDR="nats://localhost:4223"
export DATA_PERSISTENCE_SERVICE_URL="http://localhost:3001/api/v1"
export DATA_PERSISTENCE_SERVICE_URL="http://localhost:3005"
# For services that might need direct DB access (e.g. persistence tests)
export DATABASE_URL="postgresql://postgres:postgres@localhost:5433/fundamental_test"
@ -47,7 +47,7 @@ function start_env() {
# Simple wait loop for persistence service
local max_retries=30
local count=0
while ! curl -s http://localhost:3001/health > /dev/null; do
while ! curl -s http://localhost:3005/health > /dev/null; do
sleep 2
count=$((count+1))
if [ $count -ge $max_retries ]; then
@ -102,7 +102,7 @@ function run_tests() {
}
function check_env_ready() {
if curl -s http://localhost:3001/health > /dev/null; then
if curl -s http://localhost:3005/health > /dev/null; then
return 0
else
return 1

View File

@ -44,3 +44,4 @@ anyhow = "1.0"
chrono = { version = "0.4", features = ["serde"] }
sse-stream = "0.2"
futures = "0.3"
async-trait = "0.1.89"

View File

@ -25,7 +25,7 @@ async fn poll_and_update_config(state: &AppState) -> Result<()> {
info!("Polling for data source configurations...");
let client = reqwest::Client::new();
let url = format!(
"{}/configs/data_sources",
"{}/api/v1/configs/data_sources",
state.config.data_persistence_service_url
);

View File

@ -0,0 +1,13 @@
use anyhow::Result;
use common_contracts::workflow_types::WorkflowTaskCommand;
use crate::state::AppState;
use crate::workflow_adapter::AlphavantageNode;
use common_contracts::workflow_runner::WorkflowNodeRunner;
use std::sync::Arc;
pub async fn handle_workflow_command(state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> {
let node = Arc::new(AlphavantageNode::new(state));
let runner = WorkflowNodeRunner::new(nats);
runner.run(node, cmd).await
}

View File

@ -6,7 +6,8 @@ mod mapping;
mod message_consumer;
// mod persistence; // Removed
mod state;
mod worker;
mod workflow_adapter;
mod generic_worker;
mod av_client;
mod config_poller;
mod transport;

View File

@ -1,7 +1,6 @@
use crate::error::Result;
use crate::state::{AppState, ServiceOperationalStatus};
use common_contracts::messages::FetchCompanyDataCommand;
use common_contracts::subjects::NatsSubject;
use common_contracts::workflow_types::WorkflowTaskCommand;
use futures_util::StreamExt;
use std::time::Duration;
use tracing::{error, info, warn};
@ -24,7 +23,7 @@ pub async fn run(state: AppState) -> Result<()> {
match async_nats::connect(&state.config.nats_addr).await {
Ok(client) => {
info!("Successfully connected to NATS.");
if let Err(e) = subscribe_and_process(state.clone(), client).await {
if let Err(e) = subscribe_workflow(state.clone(), client).await {
error!("NATS subscription error: {}. Reconnecting in 10s...", e);
}
}
@ -36,45 +35,54 @@ pub async fn run(state: AppState) -> Result<()> {
}
}
async fn subscribe_and_process(state: AppState, client: async_nats::Client) -> Result<()> {
let subject = NatsSubject::DataFetchCommands.to_string();
use common_contracts::ack::TaskAcknowledgement;
async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Result<()> {
// Alphavantage routing key: provider.alphavantage
let subject = "workflow.cmd.provider.alphavantage".to_string();
let mut subscriber = client.subscribe(subject.clone()).await?;
info!(
"Consumer started, waiting for messages on subject '{}'",
subject
);
info!("Workflow Consumer started on '{}'", subject);
while let Some(message) = subscriber.next().await {
// Check status
let current_status = state.status.read().await.clone();
if matches!(current_status, ServiceOperationalStatus::Degraded {..}) {
warn!("Service became degraded. Disconnecting from NATS and pausing consumption.");
warn!("Service became degraded. Disconnecting from NATS.");
// Reject if degraded
if let Some(reply_to) = message.reply {
let ack = TaskAcknowledgement::Rejected { reason: "Service degraded".to_string() };
if let Ok(payload) = serde_json::to_vec(&ack) {
let _ = client.publish(reply_to, payload.into()).await;
}
}
subscriber.unsubscribe().await?;
return Ok(());
}
info!("Received NATS message.");
let state_clone = state.clone();
let publisher_clone = client.clone();
// Accept
if let Some(reply_to) = message.reply.clone() {
let ack = TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Acceptance Ack: {}", e);
}
}
}
let state = state.clone();
let client = client.clone();
tokio::spawn(async move {
match serde_json::from_slice::<FetchCompanyDataCommand>(&message.payload) {
Ok(command) => {
let request_id = command.request_id;
info!("Deserialized command for symbol: {}", command.symbol);
if let Err(e) =
crate::worker::handle_fetch_command(state_clone.clone(), command, publisher_clone)
.await
{
error!("Error handling fetch command: {:?}", e);
if let Some(mut task) = state_clone.tasks.get_mut(&request_id) {
task.status = common_contracts::observability::ObservabilityTaskStatus::Failed;
task.details = format!("Worker failed: {}", e);
}
}
}
Err(e) => {
error!("Failed to deserialize message: {}", e);
match serde_json::from_slice::<WorkflowTaskCommand>(&message.payload) {
Ok(cmd) => {
info!("Received workflow command for task: {}", cmd.task_id);
if let Err(e) = crate::generic_worker::handle_workflow_command(state, client, cmd).await {
error!("Generic worker handler failed: {}", e);
}
},
Err(e) => error!("Failed to parse WorkflowTaskCommand: {}", e),
}
});
}

View File

@ -1,432 +0,0 @@
use crate::error::{Result, AppError};
use crate::mapping::{CombinedFinancials, parse_company_profile, parse_financials, parse_realtime_quote};
use common_contracts::persistence_client::PersistenceClient;
use common_contracts::dtos::{ProviderCacheDto, SessionDataDto};
use crate::state::{AppState, TaskStore};
use chrono::{Utc, Datelike, Duration};
use common_contracts::messages::{FetchCompanyDataCommand, FinancialsPersistedEvent, DataFetchFailedEvent};
use common_contracts::observability::{TaskProgress, ObservabilityTaskStatus};
use tracing::{error, info, instrument, warn};
use uuid::Uuid;
use serde_json::Value;
#[instrument(skip(state, command, publisher), fields(request_id = %command.request_id, symbol = %command.symbol))]
pub async fn handle_fetch_command(
state: AppState,
command: FetchCompanyDataCommand,
publisher: async_nats::Client,
) -> Result<()> {
match handle_fetch_command_inner(state.clone(), &command, &publisher).await {
Ok(_) => Ok(()),
Err(e) => {
error!("AlphaVantage workflow failed: {}", e);
// Publish failure event
let event = DataFetchFailedEvent {
request_id: command.request_id,
symbol: command.symbol.clone(),
error: e.to_string(),
provider_id: Some("alphavantage".to_string()),
};
let _ = publisher
.publish(
"events.data.fetch_failed".to_string(),
serde_json::to_vec(&event).unwrap().into(),
)
.await;
// Update task status
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.status = ObservabilityTaskStatus::Failed;
task.details = format!("Failed: {}", e);
} else {
// If task doesn't exist (e.g. failed at insert), create a failed task
let task = TaskProgress {
request_id: command.request_id,
task_name: format!("alphavantage:{}", command.symbol),
status: ObservabilityTaskStatus::Failed,
progress_percent: 0,
details: format!("Failed: {}", e),
started_at: Utc::now(),
};
state.tasks.insert(command.request_id, task);
}
Err(e)
}
}
}
async fn handle_fetch_command_inner(
state: AppState,
command: &FetchCompanyDataCommand,
publisher: &async_nats::Client,
) -> Result<()> {
info!("Handling fetch data command.");
let task = TaskProgress {
request_id: command.request_id,
task_name: format!("alphavantage:{}", command.symbol),
status: ObservabilityTaskStatus::InProgress,
progress_percent: 0,
details: "Initializing...".to_string(),
started_at: Utc::now(),
};
state.tasks.insert(command.request_id, task);
let client = match state.get_provider().await {
Some(p) => p,
None => {
let reason = "Execution failed: Alphavantage provider is not available (misconfigured).".to_string();
return Err(AppError::ProviderNotAvailable(reason));
}
};
let persistence_client =
PersistenceClient::new(state.config.data_persistence_service_url.clone());
let symbol = command.symbol.clone();
// Symbol conversion using shared logic
let av_symbol = symbol.to_alphavantage();
info!("Using symbol for AlphaVantage: {}", av_symbol);
update_task_progress(
&state.tasks,
command.request_id,
10,
"Checking cache...",
None,
)
.await;
// --- 1. Check Cache ---
let cache_key = format!("alphavantage:{}:all", av_symbol);
let (overview_json, income_json, balance_json, cashflow_json, quote_json) = match persistence_client.get_cache(&cache_key).await.map_err(|e| AppError::Internal(e.to_string()))? {
Some(cache_entry) => {
info!("Cache HIT for {}", cache_key);
// Deserialize tuple of JSONs
let data: (Value, Value, Value, Value, Value) = serde_json::from_value(cache_entry.data_payload)
.map_err(|e| AppError::Internal(format!("Failed to deserialize cache: {}", e)))?;
update_task_progress(
&state.tasks,
command.request_id,
50,
"Data retrieved from cache",
None,
).await;
data
},
None => {
info!("Cache MISS for {}", cache_key);
update_task_progress(
&state.tasks,
command.request_id,
20,
"Fetching from AlphaVantage API...",
None,
).await;
let params_overview = vec![("symbol", av_symbol.as_str())];
let params_income = vec![("symbol", av_symbol.as_str())];
let params_balance = vec![("symbol", av_symbol.as_str())];
let params_cashflow = vec![("symbol", av_symbol.as_str())];
// Add datatype=json to force JSON response if supported (or at least Python-dict like)
let params_quote = vec![("symbol", av_symbol.as_str()), ("datatype", "json")];
let overview_json = client.query("COMPANY_OVERVIEW", &params_overview).await?;
check_av_response(&overview_json)?;
tokio::time::sleep(std::time::Duration::from_secs(2)).await; // Rate limit protection
let quote_json = client.query("GLOBAL_QUOTE", &params_quote).await?;
check_av_response(&quote_json)?;
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let income_json = client.query("INCOME_STATEMENT", &params_income).await?;
check_av_response(&income_json)?;
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let balance_json = client.query("BALANCE_SHEET", &params_balance).await?;
check_av_response(&balance_json)?;
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let cashflow_json = client.query("CASH_FLOW", &params_cashflow).await?;
check_av_response(&cashflow_json)?;
let data = (
overview_json,
income_json,
balance_json,
cashflow_json,
quote_json
);
// Write to Cache
let payload = serde_json::json!(data);
persistence_client.set_cache(&ProviderCacheDto {
cache_key,
data_payload: payload,
expires_at: Utc::now() + Duration::hours(24),
updated_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
data
}
};
update_task_progress(
&state.tasks,
command.request_id,
70,
"Data fetched, processing...",
None,
)
.await;
// --- 2. Transform and Snapshot Data ---
// 2.1 Profile
if let Some(_symbol_val) = overview_json.get("Symbol") {
match parse_company_profile(overview_json) {
Ok(profile_to_persist) => {
// Update Global Profile
// REMOVED: upsert_company_profile is deprecated.
// let _ = persistence_client.upsert_company_profile(profile_to_persist.clone()).await;
// Snapshot Profile
persistence_client.insert_session_data(&SessionDataDto {
request_id: command.request_id,
symbol: command.symbol.to_string(),
provider: "alphavantage".to_string(),
data_type: "company_profile".to_string(),
data_payload: serde_json::to_value(&profile_to_persist).unwrap(),
created_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
},
Err(e) => {
warn!("Failed to parse CompanyProfile: {}", e);
}
}
} else {
// If Symbol is missing but check_av_response passed, it might be an empty object {}
warn!("COMPANY_OVERVIEW returned JSON without 'Symbol' field: {:?}", overview_json);
}
// 2.2 Financials
let mut years_updated: Vec<u16> = Vec::new();
if income_json.get("annualReports").is_some() {
let combined_financials = CombinedFinancials {
income: income_json,
balance_sheet: balance_json,
cash_flow: cashflow_json,
};
match parse_financials(combined_financials) {
Ok(financials_to_persist) => {
if !financials_to_persist.is_empty() {
years_updated = financials_to_persist
.iter()
.map(|f| f.period_date.year() as u16)
.collect();
// Snapshot Financials
persistence_client.insert_session_data(&SessionDataDto {
request_id: command.request_id,
symbol: command.symbol.to_string(),
provider: "alphavantage".to_string(),
data_type: "financial_statements".to_string(),
data_payload: serde_json::to_value(&financials_to_persist).unwrap(),
created_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
}
},
Err(e) => {
warn!("Failed to parse Financials: {}", e);
}
}
}
// 2.3 Quote
// Fix Python-dict string if necessary
let fixed_quote_json = if let Some(s) = quote_json.as_str() {
if s.trim().starts_with("{'Global Quote'") {
let fixed = s.replace("'", "\"");
match serde_json::from_str::<Value>(&fixed) {
Ok(v) => v,
Err(e) => {
warn!("Failed to fix/parse quoted JSON string: {}. Error: {}", s, e);
quote_json // fallback to original
}
}
} else {
quote_json
}
} else {
quote_json
};
// Realtime quote is global/time-series, so we still use upsert_realtime_quote
let mut summary = format!("Fetched {} years of financial data", years_updated.len());
match parse_realtime_quote(fixed_quote_json, &command.market) {
Ok(mut quote_to_persist) => {
quote_to_persist.symbol = command.symbol.to_string();
// Snapshot Realtime Quote
let _ = persistence_client.insert_session_data(&SessionDataDto {
request_id: command.request_id,
symbol: command.symbol.to_string(),
provider: "alphavantage".to_string(),
data_type: "realtime_quote".to_string(),
data_payload: serde_json::to_value(&quote_to_persist).unwrap(),
created_at: None,
}).await;
summary = format!("Parsed Realtime Quote for {}: Price={}, Volume={:?}",
quote_to_persist.symbol, quote_to_persist.price, quote_to_persist.volume);
},
Err(e) => {
warn!("Failed to parse RealtimeQuote: {}", e);
}
}
update_task_progress(
&state.tasks,
command.request_id,
90,
"Snapshot created, publishing events...",
None,
)
.await;
// --- 3. Publish events ---
let event = FinancialsPersistedEvent {
request_id: command.request_id,
symbol: command.symbol.clone(),
years_updated,
template_id: command.template_id.clone(),
provider_id: Some("alphavantage".to_string()),
data_summary: Some(summary),
};
let subject = "events.data.financials_persisted".to_string();
publisher
.publish(subject, serde_json::to_vec(&event).unwrap().into())
.await?;
// Update Provider Status
// REMOVED: update_provider_status is deprecated or missing in client.
/*
persistence_client.update_provider_status(command.symbol.as_str(), "alphavantage", common_contracts::dtos::ProviderStatusDto {
last_updated: chrono::Utc::now(),
status: TaskStatus::Completed,
data_version: None,
}).await?;
*/
update_task_progress(
&state.tasks,
command.request_id,
100,
"Task completed successfully",
Some(ObservabilityTaskStatus::Completed),
).await;
info!("AlphaVantage task completed successfully.");
Ok(())
}
fn check_av_response(v: &Value) -> Result<()> {
if let Some(note) = v.get("Note").and_then(|s| s.as_str()) {
return Err(AppError::Internal(format!("AlphaVantage Rate Limit: {}", note)));
}
if let Some(info) = v.get("Information").and_then(|s| s.as_str()) {
return Err(AppError::Internal(format!("AlphaVantage Information: {}", info)));
}
Ok(())
}
async fn update_task_progress(tasks: &TaskStore, request_id: Uuid, percent: u8, details: &str, status: Option<ObservabilityTaskStatus>) {
if let Some(mut task) = tasks.get_mut(&request_id) {
task.progress_percent = percent;
task.details = details.to_string();
if let Some(s) = status {
task.status = s;
}
info!("Task update: {}% - {} (Status: {:?})", percent, details, task.status);
}
}
#[cfg(test)]
mod integration_tests {
use super::*;
use crate::config::AppConfig;
use crate::state::AppState;
use std::time::Duration;
use common_contracts::symbol_utils::{CanonicalSymbol, Market};
#[tokio::test]
async fn test_alphavantage_fetch_flow() {
// Check if running in test environment
if std::env::var("NATS_ADDR").is_err() {
// Skip if env vars not set (e.g. running cargo test without script)
// But better to panic to alert developer
// panic!("Must run integration tests with run_component_tests.sh or set env vars");
println!("Skipping integration test (no environment)");
return;
}
// 1. Environment Variables
// Assumed set by external script, but we double check specific overrides for component test
// NATS_ADDR, DATA_PERSISTENCE_SERVICE_URL, ALPHAVANTAGE_API_KEY, ALPHAVANTAGE_MCP_URL
let api_key = std::env::var("ALPHAVANTAGE_API_KEY")
.unwrap_or_else(|_| "PUOO7UPTNXN325NN".to_string());
let mcp_url = std::env::var("ALPHAVANTAGE_MCP_URL")
.expect("ALPHAVANTAGE_MCP_URL must be set");
let config = AppConfig::load().expect("Failed to load config");
let state = AppState::new(config.clone()).expect("Failed to create state");
// 2. Manual Init Provider (Skip Config Poller)
state.update_provider(
Some(api_key),
Some(mcp_url)
).await;
// Wait for connection
let mut connected = false;
for _ in 0..10 {
if state.get_provider().await.is_some() {
connected = true;
break;
}
tokio::time::sleep(Duration::from_millis(500)).await;
}
assert!(connected, "Failed to connect to AlphaVantage MCP Provider");
// 3. Construct Command
let request_id = Uuid::new_v4();
let cmd = FetchCompanyDataCommand {
request_id,
symbol: CanonicalSymbol::new("IBM", &Market::US),
market: "US".to_string(),
template_id: Some("default".to_string()),
output_path: None,
};
// 4. NATS
let nats_client = async_nats::connect(&config.nats_addr).await
.expect("Failed to connect to NATS");
// 5. Run
let result = handle_fetch_command_inner(state.clone(), &cmd, &nats_client).await;
// 6. Assert
assert!(result.is_ok(), "Worker execution failed: {:?}", result.err());
let task = state.tasks.get(&request_id).expect("Task should exist");
assert_eq!(task.status, ObservabilityTaskStatus::Completed);
}
}

View File

@ -0,0 +1,145 @@
use async_trait::async_trait;
use anyhow::{Result, anyhow, Context};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::time::Duration;
use tokio::time::sleep;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent, CacheKey};
use common_contracts::data_formatting;
use crate::state::AppState;
use crate::mapping;
pub struct AlphavantageNode {
state: AppState,
}
impl AlphavantageNode {
pub fn new(state: AppState) -> Self {
Self { state }
}
}
#[async_trait]
impl WorkflowNode for AlphavantageNode {
fn node_type(&self) -> &str {
"alphavantage"
}
fn get_cache_config(&self, config: &Value) -> Option<(CacheKey, Duration)> {
let symbol = config.get("symbol").and_then(|s| s.as_str())?;
let key_parts = vec![
"alphavantage",
"company_data",
symbol,
"all"
];
let cache_key = CacheKey(key_parts.join(":"));
let ttl = Duration::from_secs(86400); // 24h
Some((cache_key, ttl))
}
async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult> {
let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string();
if symbol.is_empty() {
return Err(anyhow!("Missing symbol in config"));
}
// 1. Get Provider (MCP Client)
let provider = self.state.get_provider().await
.ok_or_else(|| anyhow!("Alphavantage Provider not initialized"))?;
// 2. Fetch Data via MCP (Sequential with Rate Limit Protection)
// COMPANY_OVERVIEW
let overview_json = provider.query("COMPANY_OVERVIEW", &[("symbol", &symbol)]).await
.context("Failed to fetch COMPANY_OVERVIEW")?;
check_av_response(&overview_json)?;
sleep(Duration::from_secs(2)).await;
// GLOBAL_QUOTE
let _quote_json = provider.query("GLOBAL_QUOTE", &[("symbol", &symbol), ("datatype", "json")]).await
.context("Failed to fetch GLOBAL_QUOTE")?;
// check_av_response(&quote_json)?; // Quote not strictly required for Profile/Financials report
sleep(Duration::from_secs(2)).await;
// INCOME_STATEMENT
let income_json = provider.query("INCOME_STATEMENT", &[("symbol", &symbol)]).await
.context("Failed to fetch INCOME_STATEMENT")?;
check_av_response(&income_json)?;
sleep(Duration::from_secs(2)).await;
// BALANCE_SHEET
let balance_json = provider.query("BALANCE_SHEET", &[("symbol", &symbol)]).await
.context("Failed to fetch BALANCE_SHEET")?;
check_av_response(&balance_json)?;
sleep(Duration::from_secs(2)).await;
// CASH_FLOW
let cashflow_json = provider.query("CASH_FLOW", &[("symbol", &symbol)]).await
.context("Failed to fetch CASH_FLOW")?;
check_av_response(&cashflow_json)?;
// 3. Parse & Combine
let profile = mapping::parse_company_profile(overview_json)?;
let combined = mapping::CombinedFinancials {
income: income_json,
balance_sheet: balance_json,
cash_flow: cashflow_json,
};
let financials = mapping::parse_financials(combined)?;
// 4. Artifacts
let mut artifacts = HashMap::new();
artifacts.insert("profile.json".to_string(), json!(profile).into());
artifacts.insert("financials.json".to_string(), json!(financials).into());
Ok(NodeExecutionResult {
artifacts,
meta_summary: Some(json!({
"symbol": symbol,
"records": financials.len()
})),
})
}
fn render_report(&self, result: &NodeExecutionResult) -> Result<String> {
let profile_json = match result.artifacts.get("profile.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing profile.json")),
};
let financials_json = match result.artifacts.get("financials.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing financials.json")),
};
let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown");
let mut report_md = String::new();
report_md.push_str(&format!("# Alphavantage Data Report: {}\n\n", symbol));
report_md.push_str("## Company Profile\n\n");
report_md.push_str(&data_formatting::format_data(profile_json));
report_md.push_str("\n\n");
report_md.push_str("## Financial Statements\n\n");
report_md.push_str(&data_formatting::format_data(financials_json));
Ok(report_md)
}
}
fn check_av_response(v: &Value) -> Result<()> {
if let Some(note) = v.get("Note").and_then(|s| s.as_str()) {
return Err(anyhow!("AlphaVantage Rate Limit: {}", note));
}
if let Some(info) = v.get("Information").and_then(|s| s.as_str()) {
return Err(anyhow!("AlphaVantage Information: {}", info));
}
Ok(())
}

View File

@ -10,6 +10,7 @@ use axum::{
use common_contracts::config_models::{
AnalysisTemplateSets, DataSourceProvider,
DataSourcesConfig, LlmProvider, LlmProvidersConfig,
AnalysisTemplateSummary, AnalysisTemplateSet
};
use common_contracts::dtos::{SessionDataDto, WorkflowHistoryDto, WorkflowHistorySummaryDto};
use common_contracts::messages::GenerateReportCommand;
@ -187,9 +188,17 @@ fn create_v1_router() -> Router<AppState> {
"/configs/llm_providers",
get(get_llm_providers_config).put(update_llm_providers_config),
)
// .route(
// "/configs/analysis_template_sets",
// get(get_analysis_template_sets).put(update_analysis_template_sets),
// )
.route(
"/configs/analysis_template_sets",
get(get_analysis_template_sets).put(update_analysis_template_sets),
"/configs/templates",
get(get_templates),
)
.route(
"/configs/templates/{id}",
get(get_template_by_id).put(update_template).delete(delete_template),
)
.route(
"/configs/data_sources",
@ -241,11 +250,16 @@ struct LegacySystemConfigResponse {
async fn get_legacy_system_config(State(state): State<AppState>) -> Result<impl IntoResponse> {
let persistence = state.persistence_client.clone();
let (llm_providers, analysis_template_sets, data_sources) = try_join!(
// let (llm_providers, analysis_template_sets, data_sources) = try_join!(
// persistence.get_llm_providers_config(),
// persistence.get_analysis_template_sets(),
// persistence.get_data_sources_config()
// )?;
let (llm_providers, data_sources) = try_join!(
persistence.get_llm_providers_config(),
persistence.get_analysis_template_sets(),
persistence.get_data_sources_config()
)?;
let analysis_template_sets = AnalysisTemplateSets::default(); // Empty placeholder
let new_api = derive_primary_provider(&llm_providers);
let ds_map = project_data_sources(data_sources);
@ -441,7 +455,7 @@ async fn get_workflow_snapshot(
) -> Result<impl IntoResponse> {
// Note: The persistence service currently returns ALL session data for a request_id
// and ignores the query params. We must filter manually here until persistence service is updated.
let snapshots = state.persistence_client.get_session_data(request_id, Some("orchestrator"), Some("workflow_snapshot")).await?;
let snapshots = state.persistence_client.get_session_data(request_id).await?;
info!("get_workflow_snapshot: retrieved {} records for {}", snapshots.len(), request_id);
@ -493,11 +507,40 @@ async fn workflow_events_stream(
// 3. Convert NATS stream to SSE stream
let stream = async_stream::stream! {
while let Some(msg) = subscriber.next().await {
if let Ok(event) = serde_json::from_slice::<WorkflowEvent>(&msg.payload) {
// Try to verify payload size
let payload_len = msg.payload.len();
if payload_len > 100 * 1024 { // 100KB warning
warn!("Received large NATS message: {} bytes", payload_len);
}
match serde_json::from_slice::<WorkflowEvent>(&msg.payload) {
Ok(event) => {
// Extra debug for Snapshot
if let WorkflowEvent::WorkflowStateSnapshot { .. } = &event {
info!("Forwarding WorkflowStateSnapshot to SSE client");
}
match axum::response::sse::Event::default().json_data(event) {
Ok(sse_event) => yield Ok::<_, anyhow::Error>(sse_event),
Err(e) => error!("Failed to serialize SSE event: {}", e),
}
},
Err(e) => {
// Try to parse as generic JSON to debug content
error!("Failed to deserialize WorkflowEvent from NATS payload. Error: {}", e);
if let Ok(json_val) = serde_json::from_slice::<serde_json::Value>(&msg.payload) {
// Print first 500 chars of JSON to avoid flooding logs
let json_str = json_val.to_string();
let preview = if json_str.len() > 500 {
format!("{}...", &json_str[..500])
} else {
json_str
};
error!("Payload preview: {}", preview);
} else {
error!("Payload is not valid JSON. Raw bytes len: {}", msg.payload.len());
}
}
}
}
};
@ -922,40 +965,118 @@ async fn update_llm_providers_config(
Ok(Json(updated_config))
}
/// [GET /api/v1/configs/analysis_template_sets]
// /// [GET /api/v1/configs/analysis_template_sets]
// #[utoipa::path(
// get,
// path = "/api/v1/configs/analysis_template_sets",
// responses(
// (status = 200, description = "Analysis template sets configuration", body = AnalysisTemplateSets)
// )
// )]
// async fn get_analysis_template_sets(State(state): State<AppState>) -> Result<impl IntoResponse> {
// let config = state
// .persistence_client
// .get_analysis_template_sets()
// .await?;
// Ok(Json(config))
// }
// /// [PUT /api/v1/configs/analysis_template_sets]
// #[utoipa::path(
// put,
// path = "/api/v1/configs/analysis_template_sets",
// request_body = AnalysisTemplateSets,
// responses(
// (status = 200, description = "Updated analysis template sets configuration", body = AnalysisTemplateSets)
// )
// )]
// async fn update_analysis_template_sets(
// State(state): State<AppState>,
// Json(payload): Json<AnalysisTemplateSets>,
// ) -> Result<impl IntoResponse> {
// let updated_config = state
// .persistence_client
// .update_analysis_template_sets(&payload)
// .await?;
// Ok(Json(updated_config))
// }
/// [GET /api/v1/configs/templates]
#[utoipa::path(
get,
path = "/api/v1/configs/analysis_template_sets",
path = "/api/v1/configs/templates",
responses(
(status = 200, description = "Analysis template sets configuration", body = AnalysisTemplateSets)
(status = 200, description = "List of analysis templates", body = Vec<AnalysisTemplateSummary>)
)
)]
async fn get_analysis_template_sets(State(state): State<AppState>) -> Result<impl IntoResponse> {
let config = state
.persistence_client
.get_analysis_template_sets()
.await?;
Ok(Json(config))
async fn get_templates(State(state): State<AppState>) -> Result<impl IntoResponse> {
let templates = state.persistence_client.get_templates().await?;
Ok(Json(templates))
}
/// [PUT /api/v1/configs/analysis_template_sets]
/// [GET /api/v1/configs/templates/{id}]
#[utoipa::path(
put,
path = "/api/v1/configs/analysis_template_sets",
request_body = AnalysisTemplateSets,
get,
path = "/api/v1/configs/templates/{id}",
params(
("id" = String, Path, description = "Template ID")
),
responses(
(status = 200, description = "Updated analysis template sets configuration", body = AnalysisTemplateSets)
(status = 200, description = "Analysis template details", body = AnalysisTemplateSet),
(status = 404, description = "Template not found")
)
)]
async fn update_analysis_template_sets(
async fn get_template_by_id(
State(state): State<AppState>,
Json(payload): Json<AnalysisTemplateSets>,
Path(id): Path<String>,
) -> Result<impl IntoResponse> {
let updated_config = state
let template = state.persistence_client.get_template_by_id(&id).await?;
Ok(Json(template))
}
/// [PUT /api/v1/configs/templates/{id}]
#[utoipa::path(
put,
path = "/api/v1/configs/templates/{id}",
params(
("id" = String, Path, description = "Template ID")
),
request_body = AnalysisTemplateSet,
responses(
(status = 200, description = "Updated analysis template", body = AnalysisTemplateSet),
(status = 404, description = "Template not found")
)
)]
async fn update_template(
State(state): State<AppState>,
Path(id): Path<String>,
Json(payload): Json<AnalysisTemplateSet>,
) -> Result<impl IntoResponse> {
let updated_template = state
.persistence_client
.update_analysis_template_sets(&payload)
.update_template(&id, &payload)
.await?;
Ok(Json(updated_config))
Ok(Json(updated_template))
}
/// [DELETE /api/v1/configs/templates/{id}]
#[utoipa::path(
delete,
path = "/api/v1/configs/templates/{id}",
params(
("id" = String, Path, description = "Template ID")
),
responses(
(status = 204, description = "Template deleted"),
(status = 404, description = "Template not found")
)
)]
async fn delete_template(
State(state): State<AppState>,
Path(id): Path<String>,
) -> Result<impl IntoResponse> {
state.persistence_client.delete_template(&id).await?;
Ok(StatusCode::NO_CONTENT)
}
/// [GET /api/v1/configs/data_sources]

View File

@ -1,7 +1,6 @@
mod api;
mod config;
mod error;
mod persistence;
mod state;
mod openapi;
#[cfg(test)]

View File

@ -15,8 +15,12 @@ use crate::api;
api::resolve_symbol,
api::get_llm_providers_config,
api::update_llm_providers_config,
api::get_analysis_template_sets,
api::update_analysis_template_sets,
// api::get_analysis_template_sets,
// api::update_analysis_template_sets,
api::get_templates,
api::get_template_by_id,
api::update_template,
api::delete_template,
api::get_data_sources_config,
api::update_data_sources_config,
api::test_data_source_config,

View File

@ -1,203 +0,0 @@
//!
//! 数据持久化服务客户端
//!
use crate::error::Result;
use common_contracts::config_models::{
AnalysisTemplateSets, DataSourcesConfig, LlmProvidersConfig,
};
use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto, WorkflowHistoryDto, WorkflowHistorySummaryDto};
use uuid::Uuid;
#[derive(Clone)]
pub struct PersistenceClient {
client: reqwest::Client,
base_url: String,
}
impl PersistenceClient {
pub fn new(base_url: String) -> Self {
Self {
client: reqwest::Client::new(),
base_url,
}
}
pub async fn get_company_profile(&self, symbol: &str) -> Result<CompanyProfileDto> {
let url = format!("{}/companies/{}", self.base_url, symbol);
let profile = self
.client
.get(&url)
.send()
.await?
.error_for_status()?
.json::<CompanyProfileDto>()
.await?;
Ok(profile)
}
pub async fn get_financials(&self, symbol: &str) -> Result<Vec<TimeSeriesFinancialDto>> {
let url = format!(
"{}/market-data/financial-statements/{}",
self.base_url, symbol
);
let financials = self
.client
.get(&url)
.send()
.await?
.error_for_status()?
.json::<Vec<TimeSeriesFinancialDto>>()
.await?;
Ok(financials)
}
#[allow(dead_code)]
pub async fn get_session_data(
&self,
request_id: Uuid,
provider: Option<&str>,
data_type: Option<&str>,
) -> Result<Vec<common_contracts::dtos::SessionDataDto>> {
let url = format!("{}/session-data/{}", self.base_url, request_id);
let mut req = self.client.get(&url);
if let Some(p) = provider {
req = req.query(&[("provider", p)]);
}
if let Some(d) = data_type {
req = req.query(&[("data_type", d)]);
}
let data = req
.send()
.await?
.error_for_status()?
.json::<Vec<common_contracts::dtos::SessionDataDto>>()
.await?;
Ok(data)
}
pub async fn get_workflow_histories(&self, symbol: Option<&str>, limit: Option<i64>) -> Result<Vec<WorkflowHistorySummaryDto>> {
let url = format!("{}/history", self.base_url);
let mut req = self.client.get(&url);
if let Some(s) = symbol {
req = req.query(&[("symbol", s)]);
}
if let Some(l) = limit {
req = req.query(&[("limit", l)]);
}
let resp = req.send().await?.error_for_status()?;
let results = resp.json().await?;
Ok(results)
}
pub async fn get_workflow_history_by_id(&self, request_id: Uuid) -> Result<WorkflowHistoryDto> {
let url = format!("{}/history/{}", self.base_url, request_id);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let result = resp.json().await?;
Ok(result)
}
pub async fn clear_history(&self) -> Result<()> {
let url = format!("{}/system/history", self.base_url);
self.client
.delete(&url)
.send()
.await?
.error_for_status()?;
Ok(())
}
// --- Config Methods ---
pub async fn get_llm_providers_config(&self) -> Result<LlmProvidersConfig> {
let url = format!("{}/configs/llm_providers", self.base_url);
let config = self
.client
.get(&url)
.send()
.await?
.error_for_status()?
.json::<LlmProvidersConfig>()
.await?;
Ok(config)
}
pub async fn update_llm_providers_config(
&self,
payload: &LlmProvidersConfig,
) -> Result<LlmProvidersConfig> {
let url = format!("{}/configs/llm_providers", self.base_url);
let updated_config = self
.client
.put(&url)
.json(payload)
.send()
.await?
.error_for_status()?
.json::<LlmProvidersConfig>()
.await?;
Ok(updated_config)
}
pub async fn get_analysis_template_sets(&self) -> Result<AnalysisTemplateSets> {
let url = format!("{}/configs/analysis_template_sets", self.base_url);
let config = self
.client
.get(&url)
.send()
.await?
.error_for_status()?
.json::<AnalysisTemplateSets>()
.await?;
Ok(config)
}
pub async fn update_analysis_template_sets(
&self,
payload: &AnalysisTemplateSets,
) -> Result<AnalysisTemplateSets> {
let url = format!("{}/configs/analysis_template_sets", self.base_url);
let updated_config = self
.client
.put(&url)
.json(payload)
.send()
.await?
.error_for_status()?
.json::<AnalysisTemplateSets>()
.await?;
Ok(updated_config)
}
pub async fn get_data_sources_config(&self) -> Result<DataSourcesConfig> {
let url = format!("{}/configs/data_sources", self.base_url);
let config = self
.client
.get(&url)
.send()
.await?
.error_for_status()?
.json::<DataSourcesConfig>()
.await?;
Ok(config)
}
pub async fn update_data_sources_config(
&self,
payload: &DataSourcesConfig,
) -> Result<DataSourcesConfig> {
let url = format!("{}/configs/data_sources", self.base_url);
let updated_config = self
.client
.put(&url)
.json(payload)
.send()
.await?
.error_for_status()?
.json::<DataSourcesConfig>()
.await?;
Ok(updated_config)
}
}

View File

@ -1,6 +1,6 @@
use crate::config::AppConfig;
use crate::error::Result;
use crate::persistence::PersistenceClient;
use common_contracts::persistence_client::PersistenceClient;
use async_nats::Client as NatsClient;
use common_contracts::registry::{ServiceRegistration, ServiceRole};
use std::collections::HashMap;

View File

@ -0,0 +1,53 @@
use service_kit::api_dto;
#[api_dto]
pub enum TaskAcknowledgement {
Accepted,
Rejected { reason: String },
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_ack_serialization() {
// 1. Test Accepted
let ack = TaskAcknowledgement::Accepted;
let json = serde_json::to_value(&ack).unwrap();
assert_eq!(json, json!("Accepted"));
// 2. Test Rejected
let ack = TaskAcknowledgement::Rejected { reason: "Bad Key".to_string() };
let json = serde_json::to_value(&ack).unwrap();
assert_eq!(json, json!({
"Rejected": {
"reason": "Bad Key"
}
}));
}
#[test]
fn test_ack_deserialization() {
// 1. Test Accepted
let json = json!("Accepted");
let ack: TaskAcknowledgement = serde_json::from_value(json).unwrap();
match ack {
TaskAcknowledgement::Accepted => (),
_ => panic!("Expected Accepted"),
}
// 2. Test Rejected
let json = json!({
"Rejected": {
"reason": "Timeout"
}
});
let ack: TaskAcknowledgement = serde_json::from_value(json).unwrap();
match ack {
TaskAcknowledgement::Rejected { reason } => assert_eq!(reason, "Timeout"),
_ => panic!("Expected Rejected"),
}
}
}

View File

@ -88,6 +88,13 @@ pub struct AnalysisTemplateSet {
pub modules: HashMap<String, AnalysisModuleConfig>,
}
/// Summary of an analysis template (for listing purposes).
#[api_dto]
pub struct AnalysisTemplateSummary {
pub id: String,
pub name: String,
}
/// Configuration for a single analysis module.
pub use crate::configs::AnalysisModuleConfig;

View File

@ -7,8 +7,7 @@ pub struct LlmConfig {
pub model_id: Option<String>,
pub temperature: Option<f32>,
pub max_tokens: Option<u32>,
#[serde(flatten)]
pub extra_params: HashMap<String, serde_json::Value>,
pub extra_params: Option<HashMap<String, serde_json::Value>>,
}
#[api_dto]
@ -26,12 +25,7 @@ pub enum SelectionMode {
},
}
#[api_dto]
#[derive(PartialEq)]
pub struct ContextSelectorConfig {
#[serde(flatten)]
pub mode: SelectionMode,
}
pub type ContextSelectorConfig = SelectionMode;
#[api_dto]
#[derive(PartialEq)]

View File

@ -17,3 +17,4 @@ pub mod configs;
pub mod data_formatting;
pub mod workflow_node;
pub mod workflow_runner;
pub mod ack;

View File

@ -95,10 +95,21 @@ pub struct TaskMetadata {
/// The execution trace log path
pub execution_log_path: Option<String>,
/// Additional arbitrary metadata
#[serde(flatten)]
pub extra: HashMap<String, serde_json::Value>,
}
/// Comprehensive snapshot state for a single task
#[api_dto]
pub struct TaskStateSnapshot {
pub task_id: String,
pub status: TaskStatus,
pub logs: Vec<String>, // Historical logs for this task
pub content: Option<String>, // Current streamed content buffer
pub input_commit: Option<String>,
pub output_commit: Option<String>,
pub metadata: Option<TaskMetadata>,
}
// Topic: events.workflow.{request_id}
/// Unified event stream for frontend consumption.
#[api_dto]
@ -158,7 +169,13 @@ pub enum WorkflowEvent {
task_graph: WorkflowDag,
tasks_status: HashMap<String, TaskStatus>, // 当前所有任务的最新状态
tasks_output: HashMap<String, Option<String>>, // (可选) 已完成任务的关键输出摘要 (commit hash)
tasks_metadata: HashMap<String, TaskMetadata> // (New) 任务的关键元数据
tasks_metadata: HashMap<String, TaskMetadata>, // (New) 任务的关键元数据
/// New: Detailed state for each task including logs and content buffer
#[serde(default)]
task_states: HashMap<String, TaskStateSnapshot>,
logs: Vec<String>, // (New) 当前Session的历史日志回放 (Global)
}
}

View File

@ -4,7 +4,8 @@ use crate::dtos::{
NewWorkflowHistory, WorkflowHistoryDto, WorkflowHistorySummaryDto
};
use crate::config_models::{
DataSourcesConfig, LlmProvidersConfig, AnalysisTemplateSets
DataSourcesConfig, LlmProvidersConfig,
AnalysisTemplateSet, AnalysisTemplateSummary
};
use reqwest::{Client, StatusCode};
use uuid::Uuid;
@ -27,7 +28,7 @@ impl PersistenceClient {
// --- Workflow History (NEW) ---
pub async fn create_workflow_history(&self, dto: &NewWorkflowHistory) -> Result<WorkflowHistoryDto> {
let url = format!("{}/history", self.base_url);
let url = format!("{}/api/v1/history", self.base_url);
let resp = self.client
.post(&url)
.json(dto)
@ -39,7 +40,7 @@ impl PersistenceClient {
}
pub async fn get_workflow_histories(&self, symbol: Option<&str>, limit: Option<i64>) -> Result<Vec<WorkflowHistorySummaryDto>> {
let url = format!("{}/history", self.base_url);
let url = format!("{}/api/v1/history", self.base_url);
let mut req = self.client.get(&url);
if let Some(s) = symbol {
req = req.query(&[("symbol", s)]);
@ -53,7 +54,7 @@ impl PersistenceClient {
}
pub async fn get_workflow_history_by_id(&self, request_id: Uuid) -> Result<WorkflowHistoryDto> {
let url = format!("{}/history/{}", self.base_url, request_id);
let url = format!("{}/api/v1/history/{}", self.base_url, request_id);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let result = resp.json().await?;
Ok(result)
@ -62,7 +63,7 @@ impl PersistenceClient {
// --- Session Data ---
pub async fn insert_session_data(&self, dto: &SessionDataDto) -> Result<()> {
let url = format!("{}/session-data", self.base_url);
let url = format!("{}/api/v1/session-data", self.base_url);
self.client
.post(&url)
.json(dto)
@ -73,7 +74,7 @@ impl PersistenceClient {
}
pub async fn get_session_data(&self, request_id: Uuid) -> Result<Vec<SessionDataDto>> {
let url = format!("{}/session-data/{}", self.base_url, request_id);
let url = format!("{}/api/v1/session-data/{}", self.base_url, request_id);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let data = resp.json().await?;
Ok(data)
@ -82,7 +83,7 @@ impl PersistenceClient {
// --- Provider Cache ---
pub async fn get_cache(&self, key: &str) -> Result<Option<ProviderCacheDto>> {
let url = format!("{}/provider-cache", self.base_url);
let url = format!("{}/api/v1/provider-cache", self.base_url);
let resp = self.client
.get(&url)
.query(&[("key", key)])
@ -99,7 +100,7 @@ impl PersistenceClient {
}
pub async fn set_cache(&self, dto: &ProviderCacheDto) -> Result<()> {
let url = format!("{}/provider-cache", self.base_url);
let url = format!("{}/api/v1/provider-cache", self.base_url);
self.client
.post(&url)
.json(dto)
@ -111,8 +112,21 @@ impl PersistenceClient {
// --- Existing Methods (Ported for completeness) ---
pub async fn get_financials(&self, symbol: &str) -> Result<Vec<TimeSeriesFinancialDto>> {
let url = format!("{}/api/v1/market-data/financial-statements/{}", self.base_url, symbol);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let financials = resp.json().await?;
Ok(financials)
}
pub async fn clear_history(&self) -> Result<()> {
let url = format!("{}/api/v1/system/history", self.base_url);
self.client.delete(&url).send().await?.error_for_status()?;
Ok(())
}
pub async fn get_company_profile(&self, symbol: &str) -> Result<Option<CompanyProfileDto>> {
let url = format!("{}/companies/{}", self.base_url, symbol);
let url = format!("{}/api/v1/companies/{}", self.base_url, symbol);
let resp = self.client.get(&url).send().await?;
if resp.status() == StatusCode::NOT_FOUND {
return Ok(None);
@ -125,7 +139,7 @@ impl PersistenceClient {
if dtos.is_empty() {
return Ok(());
}
let url = format!("{}/market-data/financials/batch", self.base_url);
let url = format!("{}/api/v1/market-data/financials/batch", self.base_url);
let batch = TimeSeriesFinancialBatchDto { records: dtos };
self.client
@ -140,51 +154,80 @@ impl PersistenceClient {
// --- Configs ---
pub async fn get_data_sources_config(&self) -> Result<DataSourcesConfig> {
let url = format!("{}/configs/data_sources", self.base_url);
let url = format!("{}/api/v1/configs/data_sources", self.base_url);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let config = resp.json().await?;
Ok(config)
}
pub async fn update_data_sources_config(&self, config: &DataSourcesConfig) -> Result<DataSourcesConfig> {
let url = format!("{}/configs/data_sources", self.base_url);
let url = format!("{}/api/v1/configs/data_sources", self.base_url);
let resp = self.client.put(&url).json(config).send().await?.error_for_status()?;
let updated = resp.json().await?;
Ok(updated)
}
pub async fn get_llm_providers_config(&self) -> Result<LlmProvidersConfig> {
let url = format!("{}/configs/llm_providers", self.base_url);
let url = format!("{}/api/v1/configs/llm_providers", self.base_url);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let config = resp.json().await?;
Ok(config)
}
pub async fn update_llm_providers_config(&self, config: &LlmProvidersConfig) -> Result<LlmProvidersConfig> {
let url = format!("{}/configs/llm_providers", self.base_url);
let url = format!("{}/api/v1/configs/llm_providers", self.base_url);
let resp = self.client.put(&url).json(config).send().await?.error_for_status()?;
let updated = resp.json().await?;
Ok(updated)
}
pub async fn get_analysis_template_sets(&self) -> Result<AnalysisTemplateSets> {
let url = format!("{}/configs/analysis_template_sets", self.base_url);
// pub async fn get_analysis_template_sets(&self) -> Result<AnalysisTemplateSets> {
// let url = format!("{}/api/v1/configs/analysis_template_sets", self.base_url);
// let resp = self.client.get(&url).send().await?.error_for_status()?;
// let config = resp.json().await?;
// Ok(config)
// }
// pub async fn update_analysis_template_sets(&self, config: &AnalysisTemplateSets) -> Result<AnalysisTemplateSets> {
// let url = format!("{}/api/v1/configs/analysis_template_sets", self.base_url);
// let resp = self.client.put(&url).json(config).send().await?.error_for_status()?;
// let updated = resp.json().await?;
// Ok(updated)
// }
// --- Templates (Granular API) ---
pub async fn get_templates(&self) -> Result<Vec<AnalysisTemplateSummary>> {
let url = format!("{}/api/v1/templates", self.base_url);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let config = resp.json().await?;
Ok(config)
let summaries = resp.json().await?;
Ok(summaries)
}
pub async fn update_analysis_template_sets(&self, config: &AnalysisTemplateSets) -> Result<AnalysisTemplateSets> {
let url = format!("{}/configs/analysis_template_sets", self.base_url);
let resp = self.client.put(&url).json(config).send().await?.error_for_status()?;
pub async fn get_template_by_id(&self, id: &str) -> Result<AnalysisTemplateSet> {
let url = format!("{}/api/v1/templates/{}", self.base_url, id);
let resp = self.client.get(&url).send().await?.error_for_status()?;
let template = resp.json().await?;
Ok(template)
}
pub async fn update_template(&self, id: &str, template: &AnalysisTemplateSet) -> Result<AnalysisTemplateSet> {
let url = format!("{}/api/v1/templates/{}", self.base_url, id);
let resp = self.client.put(&url).json(template).send().await?.error_for_status()?;
let updated = resp.json().await?;
Ok(updated)
}
pub async fn delete_template(&self, id: &str) -> Result<()> {
let url = format!("{}/api/v1/templates/{}", self.base_url, id);
self.client.delete(&url).send().await?.error_for_status()?;
Ok(())
}
// --- Deprecated/Legacy Support ---
pub async fn update_provider_status(&self, symbol: &str, provider_id: &str, status: ProviderStatusDto) -> Result<()> {
let url = format!("{}/companies/{}/providers/{}/status", self.base_url, symbol, provider_id);
let url = format!("{}/api/v1/companies/{}/providers/{}/status", self.base_url, symbol, provider_id);
self.client.put(&url).json(&status).send().await?.error_for_status()?;
Ok(())
}

View File

@ -1,6 +1,7 @@
use async_trait::async_trait;
use anyhow::Result;
use serde_json::Value;
use serde::{Serialize, Deserialize};
use std::collections::HashMap;
/// Context provided to the node execution
@ -21,6 +22,7 @@ impl NodeContext {
}
/// Content of an artifact
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ArtifactContent {
Json(Value),
Text(String),
@ -60,11 +62,29 @@ pub struct NodeExecutionResult {
pub meta_summary: Option<Value>,
}
/// New Type for Cache Key to avoid string confusion
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CacheKey(pub String);
impl std::fmt::Display for CacheKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[async_trait]
pub trait WorkflowNode: Send + Sync {
/// Unique identifier/type of the node (e.g., "yfinance", "analysis")
fn node_type(&self) -> &str;
/// Cache Configuration Interface
///
/// Returns `None` (default) to bypass cache.
/// Returns `Some((CacheKey, Duration))` to enable caching.
fn get_cache_config(&self, _config: &Value) -> Option<(CacheKey, std::time::Duration)> {
None
}
/// Core execution logic
///
/// # Arguments

View File

@ -3,11 +3,15 @@ use anyhow::Result;
use tracing::{info, error};
use async_nats::Client;
use crate::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskStatus, TaskResult};
use crate::messages::WorkflowEvent as CommonWorkflowEvent;
use crate::workflow_node::{WorkflowNode, NodeContext};
use crate::subjects::SubjectMessage;
use workflow_context::WorkerContext;
use crate::workflow_node::{WorkflowNode, NodeContext, ArtifactContent, NodeExecutionResult};
use crate::dtos::ProviderCacheDto;
use crate::persistence_client::PersistenceClient;
use crate::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskResult, TaskStatus};
use chrono::Utc;
pub struct WorkflowNodeRunner {
nats: Client,
@ -25,21 +29,132 @@ impl WorkflowNodeRunner {
let task_id = cmd.task_id.clone();
info!("Starting node execution: type={}, task_id={}", node.node_type(), task_id);
// 1. Prepare Context
// 0. Publish Running Event
let running_event = WorkflowTaskEvent {
request_id: cmd.request_id,
task_id: task_id.clone(),
status: TaskStatus::Running,
result: None,
};
self.publish_event(running_event).await?;
// Setup Persistence Client (TODO: Pass this in properly instead of creating ad-hoc)
// For now we assume standard internal URL or fallback
let persistence_url = std::env::var("DATA_PERSISTENCE_SERVICE_URL").unwrap_or_else(|_| "http://data-persistence-service:3000".to_string());
let persistence = PersistenceClient::new(persistence_url);
// 1. Cache Check (Pre-check)
let cache_config = node.get_cache_config(&cmd.config);
if let Some((cache_key, _)) = &cache_config {
let key_str = cache_key.to_string();
match persistence.get_cache(&key_str).await {
Ok(Some(cached_entry)) => {
info!("Cache HIT for key: {}", key_str);
// Deserialize artifacts
if let Ok(artifacts) = serde_json::from_value::<std::collections::HashMap<String, ArtifactContent>>(cached_entry.data_payload) {
let result = NodeExecutionResult {
artifacts,
meta_summary: Some(serde_json::json!({"source": "cache", "key": key_str})),
};
// Pre-check: Validate cache content by attempting render_report
// If it fails (e.g. missing financials.md in old cache), treat as Cache MISS
match node.render_report(&result) {
Ok(_) => {
// Skip execution, jump to report rendering & commit
return self.process_result(node, &cmd, result).await;
},
Err(e) => {
tracing::warn!("Cache HIT but validation failed: {}. Treating as MISS.", e);
// Fall through to normal execution...
}
}
} else {
error!("Failed to deserialize cached artifacts for {}", key_str);
}
},
Ok(None) => info!("Cache MISS for key: {}", key_str),
Err(e) => error!("Cache lookup failed: {}", e),
}
}
// 2. Prepare Context
let root_path = cmd.storage.root_path.clone();
let req_id = cmd.request_id.to_string();
let base_commit = cmd.context.base_commit.clone().unwrap_or_default();
let context = NodeContext::new(req_id.clone(), base_commit.clone(), root_path.clone());
// 2. Execute Node Logic (Async)
// 3. Execute Node Logic (Async) with Heartbeat
let hb_task_id = task_id.clone();
let hb_req_id = cmd.request_id;
let hb_nats = self.nats.clone();
let heartbeat_handle = tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(5));
loop {
interval.tick().await;
// Publish Heartbeat
let event = WorkflowTaskEvent {
request_id: hb_req_id,
task_id: hb_task_id.clone(),
status: TaskStatus::Running,
result: None,
};
let subject = event.subject().to_string();
if let Ok(payload) = serde_json::to_vec(&event) {
if let Err(e) = hb_nats.publish(subject, payload.into()).await {
error!("Failed to publish heartbeat: {}", e);
}
}
}
});
let exec_result = match node.execute(&context, &cmd.config).await {
Ok(res) => res,
Ok(res) => {
heartbeat_handle.abort();
res
},
Err(e) => {
heartbeat_handle.abort();
return self.handle_failure(&cmd, &e.to_string()).await;
}
};
// 4. Cache Write (Post-write)
if let Some((cache_key, ttl)) = cache_config {
let key_str = cache_key.to_string();
if let Ok(payload) = serde_json::to_value(&exec_result.artifacts) {
let cache_dto = ProviderCacheDto {
cache_key: key_str.clone(),
data_payload: payload,
expires_at: Utc::now() + chrono::Duration::from_std(ttl).unwrap_or(chrono::Duration::hours(24)),
updated_at: None,
};
// Fire and forget cache write
let p_client = persistence.clone();
tokio::spawn(async move {
if let Err(e) = p_client.set_cache(&cache_dto).await {
error!("Failed to write cache for {}: {}", key_str, e);
}
});
}
}
// 5. Process Result (Render, Commit, Publish)
self.process_result(node, &cmd, exec_result).await
}
// Extracted common logic for processing execution result (whether from cache or fresh execution)
async fn process_result<N>(&self, node: Arc<N>, cmd: &WorkflowTaskCommand, exec_result: NodeExecutionResult) -> Result<()>
where N: WorkflowNode + 'static
{
let task_id = cmd.task_id.clone();
let root_path = cmd.storage.root_path.clone();
let req_id = cmd.request_id.to_string();
let base_commit = cmd.context.base_commit.clone().unwrap_or_default();
// 3. Render Report (Sync)
let report_md = match node.render_report(&exec_result) {
Ok(md) => md,
@ -54,7 +169,11 @@ impl WorkflowNodeRunner {
let base_commit_clone = base_commit.clone();
let root_path_clone = root_path.clone();
let req_id_clone = req_id.clone();
// Check for financials.md BEFORE moving artifacts
let has_financials_md = exec_result.artifacts.contains_key("financials.md");
let exec_result_artifacts = exec_result.artifacts;
let report_md_clone = report_md.clone();
let symbol = cmd.config.get("symbol").and_then(|s| s.as_str()).unwrap_or("unknown").to_string();
let symbol_for_blocking = symbol.clone();
@ -77,9 +196,11 @@ impl WorkflowNodeRunner {
ctx.write_file(&full_path, std::str::from_utf8(&bytes).unwrap_or(""))?;
}
// Write Report
// Write Report (ONLY if not superseded by financials.md)
if !has_financials_md {
let report_path = format!("{}/report.md", base_dir);
ctx.write_file(&report_path, &report_md_clone)?;
}
// Write Execution Log
let log_path = format!("{}/_execution.md", base_dir);
@ -98,20 +219,43 @@ impl WorkflowNodeRunner {
Err(e) => return self.handle_failure(&cmd, &format!("Task join error: {}", e)).await,
};
// 5. Publish Stream Update
// 5. Publish Stream Update (ONLY if not already streamed)
// If the worker streamed content, we don't want to double-publish the full report here as a delta.
// We assume if it's a large report and no stream happened, we might want to push it.
// But for now, let's be conservative: ONLY publish if we are sure no streaming happened, or if it's a very short summary.
// Actually, with the new Orchestrator forwarding logic, we should probably SKIP this full-report push if Orchestrator is already forwarding LLM streams.
//
// Current logic:
// LLM Client streams tokens -> Orchestrator forwards -> Frontend (Streaming OK)
// Here -> We push FULL report as one chunk -> Frontend appends it (Duplicate content!)
// FIX: Do NOT publish full report as StreamUpdate here if it's likely been streamed or if it's large.
// Or better: don't publish it here at all. The `TaskCompleted` event implicitly tells Orchestrator/Frontend that the task is done.
// The Frontend can fetch the final content from the Commit if needed, or rely on the accumulated Stream updates.
// If we disable this, tasks that DON'T stream (like DataFetch) won't show content until completion?
// DataFetch usually produces structured data, not Markdown stream.
// Let's keep it for DataFetch but disable for Analysis?
// Or just trust that `TaskCompleted` + loading from Commit is the Source of Truth for final state.
// Let's COMMENT OUT this block to prevent duplication/loops.
/*
let stream_event = CommonWorkflowEvent::TaskStreamUpdate {
task_id: task_id.clone(),
content_delta: report_md.clone(),
index: 0,
};
self.publish_common(&cmd.request_id, stream_event).await?;
*/
// 5.1 Update Meta Summary with Paths
let mut summary = exec_result.meta_summary.clone().unwrap_or(serde_json::json!({}));
if let Some(obj) = summary.as_object_mut() {
// Reconstruct paths used in VGCS block (must match)
let base_dir = format!("raw/{}/{}", node.node_type(), symbol);
obj.insert("output_path".to_string(), serde_json::Value::String(format!("{}/report.md", base_dir)));
let output_filename = if has_financials_md { "financials.md" } else { "report.md" };
obj.insert("output_path".to_string(), serde_json::Value::String(format!("{}/{}", base_dir, output_filename)));
obj.insert("execution_log_path".to_string(), serde_json::Value::String(format!("{}/_execution.md", base_dir)));
}

View File

@ -5,6 +5,7 @@ mod configs;
mod market_data;
mod system;
mod session_data;
mod templates;
use crate::AppState;
use axum::{
@ -19,15 +20,15 @@ pub fn create_router(_state: AppState) -> Router<AppState> {
.route("/api/v1/system/history", axum::routing::delete(system::clear_history))
// Configs
.route(
"/configs/llm_providers",
"/api/v1/configs/llm_providers",
get(configs::get_llm_providers_config).put(configs::update_llm_providers_config),
)
// .route(
// "/api/v1/configs/analysis_template_sets",
// get(configs::get_analysis_template_sets).put(configs::update_analysis_template_sets),
// )
.route(
"/configs/analysis_template_sets",
get(configs::get_analysis_template_sets).put(configs::update_analysis_template_sets),
)
.route(
"/configs/data_sources",
"/api/v1/configs/data_sources",
get(configs::get_data_sources_config).put(configs::update_data_sources_config),
)
// Companies
@ -72,6 +73,15 @@ pub fn create_router(_state: AppState) -> Router<AppState> {
.route(
"/history/{request_id}",
get(history::get_workflow_history_by_id),
)
// Templates (NEW)
.route(
"/api/v1/templates",
get(templates::get_templates),
)
.route(
"/api/v1/templates/{id}",
get(templates::get_template_by_id).put(templates::update_template).delete(templates::delete_template),
);
router

View File

@ -0,0 +1,80 @@
use axum::{extract::{Path, State}, Json};
use common_contracts::config_models::{AnalysisTemplateSets, AnalysisTemplateSet, AnalysisTemplateSummary};
use service_kit::api;
use crate::{db::system_config, AppState, ServerError};
#[api(GET, "/api/v1/templates", output(detail = "Vec<AnalysisTemplateSummary>"))]
pub async fn get_templates(
State(state): State<AppState>,
) -> Result<Json<Vec<AnalysisTemplateSummary>>, ServerError> {
let pool = state.pool();
// Note: This fetches the entire config blob. Optimization would be to query JSONB fields directly,
// but for now we follow the document-store pattern as requested.
let config = system_config::get_config::<AnalysisTemplateSets>(pool, "analysis_template_sets").await?;
let mut summaries: Vec<AnalysisTemplateSummary> = config.iter()
.map(|(id, template)| AnalysisTemplateSummary {
id: id.clone(),
name: template.name.clone(),
})
.collect();
// Sort by name for consistency
summaries.sort_by(|a, b| a.name.cmp(&b.name));
Ok(Json(summaries))
}
#[api(GET, "/api/v1/templates/{id}", output(detail = "AnalysisTemplateSet"))]
pub async fn get_template_by_id(
State(state): State<AppState>,
Path(id): Path<String>,
) -> Result<Json<AnalysisTemplateSet>, ServerError> {
let pool = state.pool();
let mut config = system_config::get_config::<AnalysisTemplateSets>(pool, "analysis_template_sets").await?;
let template = config.remove(&id).ok_or_else(|| ServerError::NotFound(format!("Template {} not found", id)))?;
Ok(Json(template))
}
#[api(PUT, "/api/v1/templates/{id}", output(detail = "AnalysisTemplateSet"))]
pub async fn update_template(
State(state): State<AppState>,
Path(id): Path<String>,
Json(payload): Json<AnalysisTemplateSet>,
) -> Result<Json<AnalysisTemplateSet>, ServerError> {
let pool = state.pool();
// 1. Fetch the whole config blob
let mut config = system_config::get_config::<AnalysisTemplateSets>(pool, "analysis_template_sets").await?;
// 2. Update the specific template in the map
config.insert(id.clone(), payload);
// 3. Save the whole blob back
let _ = system_config::update_config(pool, "analysis_template_sets", &config).await?;
// 4. Return the updated template
Ok(Json(config.remove(&id).unwrap()))
}
#[api(DELETE, "/api/v1/templates/{id}")]
pub async fn delete_template(
State(state): State<AppState>,
Path(id): Path<String>,
) -> Result<axum::http::StatusCode, ServerError> {
let pool = state.pool();
// 1. Fetch the whole config blob
let mut config = system_config::get_config::<AnalysisTemplateSets>(pool, "analysis_template_sets").await?;
// 2. Remove the specific template
if config.remove(&id).is_none() {
return Err(ServerError::NotFound(format!("Template {} not found", id)));
}
// 3. Save the whole blob back
let _ = system_config::update_config(pool, "analysis_template_sets", &config).await?;
Ok(axum::http::StatusCode::NO_CONTENT)
}

View File

@ -43,3 +43,4 @@ config = "0.15.19"
# Error Handling
thiserror = "2.0.17"
anyhow = "1.0"
async-trait = "0.1.89"

View File

@ -25,7 +25,7 @@ async fn poll_and_update_config(state: &AppState) -> Result<()> {
info!("Polling for data source configurations...");
let client = reqwest::Client::new();
let url = format!(
"{}/configs/data_sources",
"{}/api/v1/configs/data_sources",
state.config.data_persistence_service_url
);

View File

@ -0,0 +1,13 @@
use anyhow::Result;
use common_contracts::workflow_types::WorkflowTaskCommand;
use crate::state::AppState;
use crate::workflow_adapter::FinnhubNode;
use common_contracts::workflow_runner::WorkflowNodeRunner;
use std::sync::Arc;
pub async fn handle_workflow_command(state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> {
let node = Arc::new(FinnhubNode::new(state));
let runner = WorkflowNodeRunner::new(nats);
runner.run(node, cmd).await
}

View File

@ -7,7 +7,8 @@ mod mapping;
mod message_consumer;
// mod persistence; // Removed
mod state;
mod worker;
mod workflow_adapter;
mod generic_worker;
mod config_poller;
use crate::config::AppConfig;

View File

@ -1,7 +1,6 @@
use crate::error::Result;
use crate::state::{AppState, ServiceOperationalStatus};
use common_contracts::messages::FetchCompanyDataCommand;
use common_contracts::subjects::NatsSubject;
use common_contracts::workflow_types::WorkflowTaskCommand;
use futures_util::StreamExt;
use std::time::Duration;
use tracing::{error, info, warn};
@ -24,7 +23,7 @@ pub async fn run(state: AppState) -> Result<()> {
match async_nats::connect(&state.config.nats_addr).await {
Ok(client) => {
info!("Successfully connected to NATS.");
if let Err(e) = subscribe_and_process(state.clone(), client).await {
if let Err(e) = subscribe_workflow(state.clone(), client).await {
error!("NATS subscription error: {}. Reconnecting in 10s...", e);
}
}
@ -36,54 +35,56 @@ pub async fn run(state: AppState) -> Result<()> {
}
}
async fn subscribe_and_process(state: AppState, client: async_nats::Client) -> Result<()> {
let subject = NatsSubject::DataFetchCommands.to_string();
use common_contracts::ack::TaskAcknowledgement;
async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Result<()> {
// Finnhub routing key: provider.finnhub
let subject = "workflow.cmd.provider.finnhub".to_string();
let mut subscriber = client.subscribe(subject.clone()).await?;
info!(
"Consumer started, waiting for messages on subject '{}'",
subject
);
info!("Workflow Consumer started on '{}'", subject);
while let Some(message) = subscriber.next().await {
// Check status
let current_status = state.status.read().await.clone();
if matches!(current_status, ServiceOperationalStatus::Degraded {..}) {
warn!("Service became degraded. Disconnecting from NATS and pausing consumption.");
warn!("Service became degraded. Disconnecting from NATS.");
// Reject if degraded
if let Some(reply_to) = message.reply {
let ack = TaskAcknowledgement::Rejected { reason: "Service degraded".to_string() };
if let Ok(payload) = serde_json::to_vec(&ack) {
let _ = client.publish(reply_to, payload.into()).await;
}
}
subscriber.unsubscribe().await?;
return Ok(());
}
info!("Received NATS message.");
let state_clone = state.clone();
let publisher_clone = client.clone();
// Accept
if let Some(reply_to) = message.reply.clone() {
let ack = TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Acceptance Ack: {}", e);
}
}
}
let state = state.clone();
let client = client.clone();
tokio::spawn(async move {
match serde_json::from_slice::<FetchCompanyDataCommand>(&message.payload) {
Ok(command) => {
info!("Deserialized command for symbol: {}", command.symbol);
// Skip processing if market is 'CN'
if command.market.to_uppercase() == "CN" {
info!(
"Skipping command for symbol '{}' as its market ('{}') is 'CN'.",
command.symbol, command.market
);
return;
}
if let Err(e) =
crate::worker::handle_fetch_command(state_clone, command, publisher_clone)
.await
{
error!("Error handling fetch command: {:?}", e);
}
}
Err(e) => {
error!("Failed to deserialize message: {}", e);
match serde_json::from_slice::<WorkflowTaskCommand>(&message.payload) {
Ok(cmd) => {
info!("Received workflow command for task: {}", cmd.task_id);
if let Err(e) = crate::generic_worker::handle_workflow_command(state, client, cmd).await {
error!("Generic worker handler failed: {}", e);
}
},
Err(e) => error!("Failed to parse WorkflowTaskCommand: {}", e),
}
});
}
Ok(())
}

View File

@ -1,265 +0,0 @@
use crate::error::{AppError, Result};
use common_contracts::persistence_client::PersistenceClient;
use crate::state::AppState;
use chrono::{Datelike, Utc, Duration};
use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto, SessionDataDto, ProviderCacheDto};
use common_contracts::messages::{CompanyProfilePersistedEvent, FetchCompanyDataCommand, FinancialsPersistedEvent, DataFetchFailedEvent};
use common_contracts::observability::{TaskProgress, ObservabilityTaskStatus};
use tracing::{error, info};
pub async fn handle_fetch_command(
state: AppState,
command: FetchCompanyDataCommand,
publisher: async_nats::Client,
) -> Result<()> {
match handle_fetch_command_inner(state.clone(), &command, &publisher).await {
Ok(_) => Ok(()),
Err(e) => {
error!("Finnhub workflow failed: {}", e);
// Publish failure event
let event = DataFetchFailedEvent {
request_id: command.request_id,
symbol: command.symbol.clone(),
error: e.to_string(),
provider_id: Some("finnhub".to_string()),
};
let _ = publisher
.publish(
"events.data.fetch_failed".to_string(),
serde_json::to_vec(&event).unwrap().into(),
)
.await;
// Update task status
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.status = ObservabilityTaskStatus::Failed;
task.details = format!("Failed: {}", e);
} else {
// If task doesn't exist (e.g. failed at insert), create a failed task
let task = TaskProgress {
request_id: command.request_id,
task_name: format!("finnhub:{}", command.symbol),
status: ObservabilityTaskStatus::Failed,
progress_percent: 0,
details: format!("Failed: {}", e),
started_at: Utc::now(),
};
state.tasks.insert(command.request_id, task);
}
Err(e)
}
}
}
async fn handle_fetch_command_inner(
state: AppState,
command: &FetchCompanyDataCommand,
publisher: &async_nats::Client,
) -> Result<()> {
info!("Handling Finnhub fetch data command.");
state.tasks.insert(
command.request_id,
TaskProgress {
request_id: command.request_id,
task_name: format!("finnhub:{}", command.symbol),
status: ObservabilityTaskStatus::InProgress,
progress_percent: 10,
details: "Fetching data from Finnhub".to_string(),
started_at: chrono::Utc::now(),
},
);
let provider = match state.get_provider().await {
Some(p) => p,
None => {
let reason = "Execution failed: Finnhub provider is not available (misconfigured).".to_string();
// Return error to trigger outer handler
return Err(AppError::ProviderNotAvailable(reason));
}
};
let persistence_client = PersistenceClient::new(state.config.data_persistence_service_url.clone());
let symbol = command.symbol.to_string();
// --- 1. Check Cache ---
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.details = "Checking cache...".to_string();
}
let cache_key = format!("finnhub:{}:all", symbol);
let (profile, financials) = match persistence_client.get_cache(&cache_key).await.map_err(|e| AppError::Internal(e.to_string()))? {
Some(cache_entry) => {
info!("Cache HIT for {}", cache_key);
let data: (CompanyProfileDto, Vec<TimeSeriesFinancialDto>) = serde_json::from_value(cache_entry.data_payload)
.map_err(|e| AppError::Internal(format!("Failed to deserialize cache: {}", e)))?;
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.details = "Data retrieved from cache".to_string();
task.progress_percent = 50;
}
data
},
None => {
info!("Cache MISS for {}", cache_key);
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.details = "Fetching from Finnhub API...".to_string();
task.progress_percent = 20;
}
let (p, f) = provider.fetch_all_data(command.symbol.as_str()).await?;
// Write to Cache
let payload = serde_json::json!((&p, &f));
persistence_client.set_cache(&ProviderCacheDto {
cache_key,
data_payload: payload,
expires_at: Utc::now() + Duration::hours(24),
updated_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
(p, f)
}
};
// --- 2. Snapshot Data ---
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.details = "Snapshotting data...".to_string();
task.progress_percent = 80;
}
// Global Profile
// REMOVED: upsert_company_profile is deprecated.
// let _ = persistence_client.upsert_company_profile(profile.clone()).await;
// Snapshot Profile
persistence_client.insert_session_data(&SessionDataDto {
request_id: command.request_id,
symbol: symbol.clone(),
provider: "finnhub".to_string(),
data_type: "company_profile".to_string(),
data_payload: serde_json::to_value(&profile).unwrap(),
created_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
// Snapshot Financials
persistence_client.insert_session_data(&SessionDataDto {
request_id: command.request_id,
symbol: symbol.clone(),
provider: "finnhub".to_string(),
data_type: "financial_statements".to_string(),
data_payload: serde_json::to_value(&financials).unwrap(),
created_at: None,
}).await.map_err(|e| AppError::Internal(e.to_string()))?;
// Update Provider Status
// REMOVED: update_provider_status is deprecated or missing in client.
/*
persistence_client.update_provider_status(command.symbol.as_str(), "finnhub", common_contracts::dtos::ProviderStatusDto {
last_updated: chrono::Utc::now(),
status: TaskStatus::Completed,
data_version: None,
}).await?;
*/
// --- 3. Publish events ---
let profile_event = CompanyProfilePersistedEvent {
request_id: command.request_id,
symbol: command.symbol.clone(),
};
publisher
.publish(
"events.data.company_profile_persisted".to_string(),
serde_json::to_vec(&profile_event).unwrap().into(),
)
.await?;
let years_set: std::collections::BTreeSet<u16> =
financials.iter().map(|f| f.period_date.year() as u16).collect();
let summary = format!("Fetched {} years of data from Finnhub", years_set.len());
let financials_event = FinancialsPersistedEvent {
request_id: command.request_id,
symbol: command.symbol.clone(),
years_updated: years_set.into_iter().collect(),
template_id: command.template_id.clone(),
provider_id: Some("finnhub".to_string()),
data_summary: Some(summary),
};
publisher
.publish(
"events.data.financials_persisted".to_string(),
serde_json::to_vec(&financials_event).unwrap().into(),
)
.await?;
// 4. Finalize
if let Some(mut task) = state.tasks.get_mut(&command.request_id) {
task.status = ObservabilityTaskStatus::Completed;
task.progress_percent = 100;
task.details = "Workflow finished successfully".to_string();
}
info!("Task {} completed successfully.", command.request_id);
Ok(())
}
#[cfg(test)]
mod integration_tests {
use super::*;
use crate::config::AppConfig;
use crate::state::AppState;
use common_contracts::symbol_utils::{CanonicalSymbol, Market};
use uuid::Uuid;
#[tokio::test]
async fn test_finnhub_fetch_flow() {
if std::env::var("NATS_ADDR").is_err() {
println!("Skipping integration test (no environment)");
return;
}
// 1. Environment
let api_key = std::env::var("FINNHUB_API_KEY")
.unwrap_or_else(|_| "d3fjs5pr01qolkndil0gd3fjs5pr01qolkndil10".to_string());
let api_url = std::env::var("FINNHUB_API_URL")
.unwrap_or_else(|_| "https://finnhub.io/api/v1".to_string());
let config = AppConfig::load().expect("Failed to load config");
let state = AppState::new(config.clone());
// 2. Manual Init Provider
state.update_provider(
Some(api_key),
Some(api_url)
).await;
assert!(state.get_provider().await.is_some());
// 3. Construct Command (AAPL)
let request_id = Uuid::new_v4();
let cmd = FetchCompanyDataCommand {
request_id,
symbol: CanonicalSymbol::new("AAPL", &Market::US),
market: "US".to_string(),
template_id: Some("default".to_string()),
output_path: None,
};
// 4. NATS
let nats_client = async_nats::connect(&config.nats_addr).await
.expect("Failed to connect to NATS");
// 5. Run
let result = handle_fetch_command_inner(state.clone(), &cmd, &nats_client).await;
// 6. Assert
assert!(result.is_ok(), "Worker execution failed: {:?}", result.err());
let task = state.tasks.get(&request_id).expect("Task should exist");
assert_eq!(task.status, ObservabilityTaskStatus::Completed);
}
}

View File

@ -0,0 +1,97 @@
use async_trait::async_trait;
use anyhow::{Result, anyhow, Context};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::time::Duration;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent, CacheKey};
use common_contracts::data_formatting;
use crate::state::AppState;
pub struct FinnhubNode {
state: AppState,
}
impl FinnhubNode {
pub fn new(state: AppState) -> Self {
Self { state }
}
}
#[async_trait]
impl WorkflowNode for FinnhubNode {
fn node_type(&self) -> &str {
"finnhub"
}
fn get_cache_config(&self, config: &Value) -> Option<(CacheKey, Duration)> {
let symbol = config.get("symbol").and_then(|s| s.as_str())?;
let key_parts = vec![
"finnhub",
"company_data",
symbol,
"all"
];
let cache_key = CacheKey(key_parts.join(":"));
// Finnhub data - 24h TTL
let ttl = Duration::from_secs(86400);
Some((cache_key, ttl))
}
async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult> {
let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string();
if symbol.is_empty() {
return Err(anyhow!("Missing symbol in config"));
}
// 1. Fetch Data
let provider = self.state.get_provider().await
.ok_or_else(|| anyhow!("Finnhub Provider not initialized"))?;
let (profile, financials) = provider.fetch_all_data(&symbol).await
.context("Failed to fetch data from Finnhub")?;
// 2. Artifacts
let mut artifacts = HashMap::new();
artifacts.insert("profile.json".to_string(), json!(profile).into());
artifacts.insert("financials.json".to_string(), json!(financials).into());
Ok(NodeExecutionResult {
artifacts,
meta_summary: Some(json!({
"symbol": symbol,
"records": financials.len()
})),
})
}
fn render_report(&self, result: &NodeExecutionResult) -> Result<String> {
let profile_json = match result.artifacts.get("profile.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing profile.json")),
};
let financials_json = match result.artifacts.get("financials.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing financials.json")),
};
let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown");
let mut report_md = String::new();
report_md.push_str(&format!("# Finnhub Data Report: {}\n\n", symbol));
report_md.push_str("## Company Profile\n\n");
report_md.push_str(&data_formatting::format_data(profile_json));
report_md.push_str("\n\n");
report_md.push_str("## Financial Statements\n\n");
report_md.push_str(&data_formatting::format_data(financials_json));
Ok(report_md)
}
}

View File

@ -2,6 +2,7 @@ use anyhow::Result;
use tracing::{info, error};
use common_contracts::workflow_types::WorkflowTaskCommand;
use common_contracts::subjects::NatsSubject;
use common_contracts::ack::TaskAcknowledgement;
use crate::state::AppState;
use futures_util::StreamExt;
use std::sync::Arc;
@ -20,20 +21,56 @@ pub async fn run_consumer(state: AppState) -> Result<()> {
while let Some(message) = subscriber.next().await {
info!("Received Workflow NATS message.");
// 1. Parse Command eagerly to check config
let cmd = match serde_json::from_slice::<WorkflowTaskCommand>(&message.payload) {
Ok(c) => c,
Err(e) => {
error!("Failed to parse WorkflowTaskCommand: {}", e);
continue;
}
};
// 2. Check Simulation Mode
let mode_raw = cmd.config.get("simulation_mode").and_then(|v| v.as_str()).unwrap_or("normal");
let mode = mode_raw.to_lowercase();
info!("Processing task {} with mode: {}", cmd.task_id, mode);
if mode == "timeout_ack" {
info!("Simulating Timeout (No ACK) for task {}", cmd.task_id);
continue; // Skip processing
}
if mode == "reject" {
info!("Simulating Rejection for task {}", cmd.task_id);
if let Some(reply_to) = message.reply {
let ack = TaskAcknowledgement::Rejected { reason: "Simulated Rejection".into() };
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Rejection ACK: {}", e);
}
}
}
continue;
}
// 3. Normal / Crash / Hang Mode -> Send Accepted
if let Some(reply_to) = message.reply {
let ack = TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Acceptance ACK: {}", e);
}
}
}
let state_clone = state.clone();
let client_clone = client.clone();
tokio::spawn(async move {
match serde_json::from_slice::<WorkflowTaskCommand>(&message.payload) {
Ok(cmd) => {
if let Err(e) = handle_workflow_command(state_clone, client_clone, cmd).await {
error!("Error handling workflow command: {:?}", e);
}
}
Err(e) => {
error!("Failed to deserialize workflow message: {}", e);
}
}
});
}
Ok(())

View File

@ -5,10 +5,11 @@ use serde_json::{json, Value};
use std::collections::HashMap;
use chrono::NaiveDate;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent};
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent, CacheKey};
use common_contracts::data_formatting;
use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto};
use crate::state::AppState;
use std::time::Duration;
pub struct MockNode {
#[allow(dead_code)]
@ -27,7 +28,37 @@ impl WorkflowNode for MockNode {
"mock"
}
fn get_cache_config(&self, config: &Value) -> Option<(CacheKey, Duration)> {
let symbol = config.get("symbol").and_then(|s| s.as_str())?;
let key_parts = vec![
"mock",
"company_data",
symbol,
"all"
];
let cache_key = CacheKey(key_parts.join(":"));
// Mock data is static, but we can cache it for 1 hour
let ttl = Duration::from_secs(3600);
Some((cache_key, ttl))
}
async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult> {
let mode = config.get("simulation_mode").and_then(|v| v.as_str()).unwrap_or("normal");
if mode == "hang" {
tracing::info!("Simulating Hang (Sleep 600s)...");
tokio::time::sleep(Duration::from_secs(600)).await;
}
if mode == "crash" {
tracing::info!("Simulating Crash (Process Exit)...");
tokio::time::sleep(Duration::from_secs(1)).await;
std::process::exit(1);
}
let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("MOCK").to_string();
// Generate Dummy Data

View File

@ -63,6 +63,22 @@ pub async fn subscribe_to_commands(
Ok(task_cmd) => {
info!("Received WorkflowTaskCommand for task_id: {}", task_cmd.task_id);
// --- 0. Immediate Acknowledgement ---
if let Some(reply_subject) = message.reply.clone() {
let ack = common_contracts::ack::TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = nats.publish(reply_subject, payload.into()).await {
error!("Failed to send ACK for task {}: {}", task_cmd.task_id, e);
} else {
info!("ACK sent for task {}", task_cmd.task_id);
}
}
} else {
// This should only happen for fire-and-forget dispatch, which orchestrator doesn't use
// but logging it is good.
tracing::warn!("No reply subject for task {}, cannot send ACK.", task_cmd.task_id);
}
// 1. Extract params from config
let symbol_str = task_cmd.config.get("symbol").and_then(|v| v.as_str());
let market_str = task_cmd.config.get("market").and_then(|v| v.as_str());

View File

@ -25,7 +25,7 @@ impl PersistenceClient {
// --- Config Fetching & Updating Methods ---
pub async fn get_llm_providers_config(&self) -> Result<LlmProvidersConfig> {
let url = format!("{}/configs/llm_providers", self.base_url);
let url = format!("{}/api/v1/configs/llm_providers", self.base_url);
info!("Fetching LLM providers config from {}", url);
let config = self
.client

View File

@ -25,7 +25,7 @@ async fn poll_and_update_config(state: &AppState) -> Result<()> {
info!("Polling for data source configurations...");
let client = reqwest::Client::new();
let url = format!(
"{}/configs/data_sources",
"{}/api/v1/configs/data_sources",
state.config.data_persistence_service_url
);

View File

@ -0,0 +1,556 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap};
/// Tushare 原始数据条目
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TushareMetric {
pub metric_name: String,
pub period_date: String, // YYYY-MM-DD
pub value: Option<f64>,
// source 和 symbol 暂时不用,因为是单只股票的报表
}
/// 格式化后的报表结构
pub struct FormattedReport {
pub title: String,
pub blocks: Vec<YearBlock>,
}
/// 年度/时期数据块
pub struct YearBlock {
pub title: String, // "2024年度" 或 "2020 - 2024"
pub periods: Vec<String>, // 列头: ["2024-12-31", ...] 或 ["2024", "2023", ...]
pub sections: Vec<ReportSection>,
}
/// 报表类型
#[derive(Debug, Clone, Copy)]
pub enum ReportType {
Quarterly, // 季报模式 (默认)
Yearly5Year, // 5年聚合模式
}
/// 报表分段 (如: 资产负债表)
pub struct ReportSection {
pub title: String,
pub rows: Vec<FormatRow>,
}
/// 格式化行
pub struct FormatRow {
pub label: String,
pub values: Vec<String>, // 已经格式化好的字符串 (e.g. "14.20 亿")
}
/// 单位策略
#[derive(Debug, Clone, Copy)]
pub enum UnitStrategy {
CurrencyYi, // 亿 (除以 1e8, 保留2位)
CurrencyWan, // 万 (除以 1e4, 保留2位)
Percent, // 百分比 (乘 100, 保留2位 + %)
Integer, // 整数
Raw, // 原始值
Days, // 天数 (保留1位)
}
/// 字段元数据
struct MetricMeta {
display_name: &'static str,
category: SectionCategory,
strategy: UnitStrategy,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum SectionCategory {
Snapshot,
Income,
Balance,
CashFlow,
Ratios,
Misc, // 兜底
}
impl SectionCategory {
fn title(&self) -> &'static str {
match self {
SectionCategory::Snapshot => "关键指标",
SectionCategory::Income => "利润表",
SectionCategory::Balance => "资产负债表",
SectionCategory::CashFlow => "现金流量表",
SectionCategory::Ratios => "运营与比率",
SectionCategory::Misc => "其他指标",
}
}
}
pub struct TushareFormatter {
meta_map: HashMap<String, MetricMeta>,
}
impl TushareFormatter {
pub fn new() -> Self {
let mut meta_map = HashMap::new();
Self::init_dictionary(&mut meta_map);
Self { meta_map }
}
/// 初始化数据字典
fn init_dictionary(map: &mut HashMap<String, MetricMeta>) {
// 辅助宏
macro_rules! m {
($key:expr, $name:expr, $cat:ident, $strat:ident) => {
map.insert(
$key.to_string(),
MetricMeta {
display_name: $name,
category: SectionCategory::$cat,
strategy: UnitStrategy::$strat,
},
);
};
}
// --- Snapshot & Market ---
m!("total_mv", "总市值", Snapshot, CurrencyYi);
m!("employees", "员工人数", Snapshot, Integer);
m!("holder_num", "股东户数", Snapshot, CurrencyWan);
m!("close", "收盘价", Snapshot, Raw);
m!("pe", "市盈率(PE)", Snapshot, Raw);
m!("pb", "市净率(PB)", Snapshot, Raw);
// --- Income Statement ---
m!("revenue", "营业收入", Income, CurrencyYi);
m!("n_income", "净利润", Income, CurrencyYi);
m!("rd_exp", "研发费用", Income, CurrencyYi);
m!("sell_exp", "销售费用", Income, CurrencyYi);
m!("admin_exp", "管理费用", Income, CurrencyYi);
m!("fin_exp", "财务费用", Income, CurrencyYi);
m!("total_cogs", "营业成本", Income, CurrencyYi);
m!("tax_to_ebt", "实际税率", Income, Percent);
m!("__tax_rate", "所得税率(Est)", Income, Percent);
m!("income_tax_exp", "所得税费用", Income, CurrencyYi);
m!("total_profit", "利润总额", Income, CurrencyYi);
// --- Balance Sheet ---
m!("total_assets", "总资产", Balance, CurrencyYi);
m!("fix_assets", "固定资产", Balance, CurrencyYi);
m!("inventories", "存货", Balance, CurrencyYi);
m!("accounts_receiv", "应收账款", Balance, CurrencyYi);
m!("accounts_pay", "应付账款", Balance, CurrencyYi);
m!("prepayment", "预付款项", Balance, CurrencyYi);
m!("adv_receipts", "预收款项", Balance, CurrencyYi);
m!("contract_liab", "合同负债", Balance, CurrencyYi);
m!("money_cap", "货币资金", Balance, CurrencyYi);
m!("lt_eqt_invest", "长期股权投资", Balance, CurrencyYi);
m!("goodwill", "商誉", Balance, CurrencyYi);
m!("st_borr", "短期借款", Balance, CurrencyYi);
m!("lt_borr", "长期借款", Balance, CurrencyYi);
m!("total_liab", "总负债", Balance, CurrencyYi);
m!("total_hldr_eqy_exc_min_int", "股东权益", Balance, CurrencyYi); // 归母权益
// --- Cash Flow ---
m!("n_cashflow_act", "经营净现金流", CashFlow, CurrencyYi);
m!("c_paid_to_for_empl", "支付职工现金", CashFlow, CurrencyYi);
m!("c_pay_acq_const_fiolta", "购建资产支付", CashFlow, CurrencyYi);
m!("dividend_amount", "分红总额", CashFlow, CurrencyYi);
m!("n_cashflow_inv", "投资净现金流", CashFlow, CurrencyYi);
m!("n_cashflow_fina", "筹资净现金流", CashFlow, CurrencyYi);
// --- Ratios ---
m!("arturn_days", "应收周转天数", Ratios, Days);
m!("invturn_days", "存货周转天数", Ratios, Days);
m!("__gross_margin", "毛利率", Ratios, Percent);
m!("__net_margin", "净利率", Ratios, Percent);
m!("__money_cap_ratio", "现金占比", Ratios, Percent);
m!("__fix_assets_ratio", "固定资产占比", Ratios, Percent);
m!("__lt_invest_ratio", "长投占比", Ratios, Percent);
m!("__goodwill_ratio", "商誉占比", Ratios, Percent);
m!("__ar_ratio", "应收占比", Ratios, Percent);
m!("__ap_ratio", "应付占比", Ratios, Percent);
m!("__st_borr_ratio", "短贷占比", Ratios, Percent);
m!("__lt_borr_ratio", "长贷占比", Ratios, Percent);
m!("__rd_rate", "研发费率", Ratios, Percent);
m!("__sell_rate", "销售费率", Ratios, Percent);
m!("__admin_rate", "管理费率", Ratios, Percent);
m!("roe", "ROE", Ratios, Percent);
m!("roa", "ROA", Ratios, Percent);
m!("grossprofit_margin", "毛利率(原始)", Ratios, Percent);
m!("netprofit_margin", "净利率(原始)", Ratios, Percent);
// --- Derived/Misc (Previously Misc) ---
m!("__depr_ratio", "折旧营收比", Ratios, Percent);
m!("__inventories_ratio", "存货资产比", Ratios, Percent);
m!("__prepay_ratio", "预付资产比", Ratios, Percent);
m!("depr_fa_coga_dpba", "资产折旧摊销", CashFlow, CurrencyYi);
}
/// 格式化数值 (仅返回数值字符串)
fn format_value(&self, val: f64, strategy: UnitStrategy) -> String {
match strategy {
UnitStrategy::CurrencyYi => format!("{:.2}", val / 1e8),
UnitStrategy::CurrencyWan => format!("{:.2}", val / 1e4),
UnitStrategy::Percent => format!("{:.2}", val),
UnitStrategy::Integer => format!("{:.0}", val),
UnitStrategy::Raw => format!("{:.2}", val),
UnitStrategy::Days => format!("{:.1}", val),
}
}
/// 获取单位后缀
fn get_unit_suffix(&self, strategy: UnitStrategy) -> &'static str {
match strategy {
UnitStrategy::CurrencyYi => "(亿)",
UnitStrategy::CurrencyWan => "(万)",
UnitStrategy::Percent => "(%)",
UnitStrategy::Integer => "",
UnitStrategy::Raw => "",
UnitStrategy::Days => "(天)",
}
}
/// 主入口: 将扁平的 Tushare 数据转换为 Markdown 字符串
pub fn format_to_markdown(&self, symbol: &str, metrics: Vec<TushareMetric>) -> Result<String> {
let report_type = self.detect_report_type(&metrics);
let report = match report_type {
ReportType::Yearly5Year => self.pivot_data_yearly(symbol, metrics)?,
ReportType::Quarterly => self.pivot_data_quarterly(symbol, metrics)?,
};
self.render_markdown(&report)
}
/// 检测报表类型策略
fn detect_report_type(&self, metrics: &[TushareMetric]) -> ReportType {
// 策略:检查关键财务指标(如净利润 n_income的日期分布
// 如果 80% 以上的数据都是 12-31 结尾,则认为是年报优先模式
let target_metric = "n_income";
let mut total_count = 0;
let mut year_end_count = 0;
for m in metrics {
if m.metric_name == target_metric {
total_count += 1;
if m.period_date.ends_with("-12-31") {
year_end_count += 1;
}
}
}
// 如果没有净利润数据,退回到检查所有数据
if total_count == 0 {
for m in metrics {
total_count += 1;
if m.period_date.ends_with("-12-31") {
year_end_count += 1;
}
}
}
if total_count > 0 {
let ratio = year_end_count as f64 / total_count as f64;
// 如果年报数据占比超过 80%,或者总数据量很少且都是年报
if ratio > 0.8 {
return ReportType::Yearly5Year;
}
}
// 默认季报模式(原逻辑)
ReportType::Quarterly
}
/// 模式 A: 5年聚合年报模式 (Yearly 5-Year Aggregation)
/// 结构Block = 5年 (e.g., 2020-2024), Columns = [2024, 2023, 2022, 2021, 2020]
/// 注意:对于每一年,选取该年内最新的报告期数据作为代表(通常是年报 12-31如果是当年则是最新季报
fn pivot_data_yearly(&self, symbol: &str, metrics: Vec<TushareMetric>) -> Result<FormattedReport> {
// 1. 按年份分组,找出每一年最新的 period_date
// Map<Year, MaxDate>
let mut year_max_date: HashMap<String, String> = HashMap::new();
for m in &metrics {
let year = m.period_date.split('-').next().unwrap_or("").to_string();
if year.is_empty() { continue; }
year_max_date.entry(year)
.and_modify(|curr| {
if m.period_date > *curr {
*curr = m.period_date.clone();
}
})
.or_insert(m.period_date.clone());
}
// 2. 收集数据,只保留对应年份最大日期的数据
// Map<Year, Map<Metric, Value>>
let mut data_map: HashMap<String, HashMap<String, f64>> = HashMap::new();
let mut all_years: Vec<String> = Vec::new();
for m in metrics {
if let Some(val) = m.value {
let year = m.period_date.split('-').next().unwrap_or("").to_string();
if let Some(max_date) = year_max_date.get(&year) {
// 只有当这条数据的日期匹配该年最大日期时才采纳
// 注意:不同指标的最大日期可能理论上不同(数据缺失),但通常财务报表是整齐的
// 这里简化逻辑:只要该指标的日期等于该年的最大日期(基于所有指标的最大值?还是基于该指标?)
// 严格来说应该是:对于特定年份,我们选定一个“主报告期”(该年所有数据中日期的最大值)。
if m.period_date == *max_date {
data_map
.entry(year.clone())
.or_default()
.insert(m.metric_name, val);
if !all_years.contains(&year) {
all_years.push(year);
}
}
}
}
}
// 排序年份 (倒序: 2024, 2023...)
all_years.sort_by(|a, b| b.cmp(a));
// 3. 按 5 年分块
let chunks = all_years.chunks(5);
let mut blocks = Vec::new();
for chunk in chunks {
if chunk.is_empty() { continue; }
let start_year = chunk.last().unwrap();
let end_year = chunk.first().unwrap();
// 标题显示范围
let block_title = format!("{} - {}", start_year, end_year);
let periods = chunk.to_vec(); // ["2024", "2023", ...]
// 构建 Sections
let sections = self.build_sections(&periods, &|year| data_map.get(year));
blocks.push(YearBlock {
title: block_title,
periods,
sections,
});
}
Ok(FormattedReport {
title: format!("{} 财务年报 (5年聚合)", symbol),
blocks,
})
}
/// 模式 B: 季报模式 (Quarterly) - 原逻辑
/// 结构Block = 1年 (e.g., 2024), Columns = [2024-12-31, 2024-09-30, ...]
fn pivot_data_quarterly(&self, symbol: &str, metrics: Vec<TushareMetric>) -> Result<FormattedReport> {
// Map<Year, Map<Date, Map<Metric, Value>>>
let mut year_map: BTreeMap<String, BTreeMap<String, HashMap<String, f64>>> = BTreeMap::new();
for m in metrics {
if let Some(val) = m.value {
let year = m.period_date.split('-').next().unwrap_or("Unknown").to_string();
year_map
.entry(year)
.or_default()
.entry(m.period_date.clone())
.or_default()
.insert(m.metric_name, val);
}
}
let mut blocks = Vec::new();
// 倒序遍历年份
for (year, date_map) in year_map.iter().rev() {
let mut periods: Vec<String> = date_map.keys().cloned().collect();
periods.sort_by(|a, b| b.cmp(a)); // 倒序日期
// 构建 Sections
// 适配器闭包:给定 period (日期), 返回 MetricMap
let sections = self.build_sections(&periods, &|period| date_map.get(period));
blocks.push(YearBlock {
title: format!("{}年度", year),
periods,
sections,
});
}
Ok(FormattedReport {
title: format!("{} 财务数据明细 (季报视图)", symbol),
blocks,
})
}
/// 通用 Section 构建器
/// periods: 列头列表 (可能是年份 "2024" 也可能是日期 "2024-12-31")
/// data_provider: 闭包,根据 period 获取该列的数据 Map<Metric, Value>
fn build_sections<'a, F>(
&self,
periods: &[String],
data_provider: &F
) -> Vec<ReportSection>
where F: Fn(&str) -> Option<&'a HashMap<String, f64>>
{
// 1. 收集该 Block 下所有出现的 metric keys
let mut all_metric_keys = std::collections::HashSet::new();
for p in periods {
if let Some(map) = data_provider(p) {
for k in map.keys() {
all_metric_keys.insert(k.clone());
}
}
}
// 2. 分类
let mut cat_metrics: BTreeMap<SectionCategory, Vec<String>> = BTreeMap::new();
for key in all_metric_keys {
if let Some(meta) = self.meta_map.get(&key) {
cat_metrics.entry(meta.category).or_default().push(key);
} else {
cat_metrics.entry(SectionCategory::Misc).or_default().push(key);
}
}
// 3. 生成 Sections
let mut sections = Vec::new();
let categories = vec![
SectionCategory::Snapshot,
SectionCategory::Income,
SectionCategory::Balance,
SectionCategory::CashFlow,
SectionCategory::Ratios,
SectionCategory::Misc,
];
for cat in categories {
if let Some(keys) = cat_metrics.get(&cat) {
let mut sorted_keys = keys.clone();
// 按照预定义的顺序或者字母序排序?目前简单用字母序,理想情况应该有 weight
sorted_keys.sort();
let mut rows = Vec::new();
for key in sorted_keys {
let (label, strategy) = if let Some(meta) = self.meta_map.get(&key) {
(meta.display_name.to_string(), meta.strategy)
} else {
(key.clone(), UnitStrategy::Raw)
};
// Append unit suffix to label
let label_with_unit = format!("{}{}", label, self.get_unit_suffix(strategy));
let mut row_vals = Vec::new();
for p in periods {
let val_opt = data_provider(p).and_then(|m| m.get(&key));
if let Some(val) = val_opt {
row_vals.push(self.format_value(*val, strategy));
} else {
row_vals.push("-".to_string());
}
}
rows.push(FormatRow { label: label_with_unit, values: row_vals });
}
sections.push(ReportSection {
title: cat.title().to_string(),
rows,
});
}
}
sections
}
fn render_markdown(&self, report: &FormattedReport) -> Result<String> {
let mut md = String::new();
md.push_str(&format!("# {}\n\n", report.title));
for block in &report.blocks {
md.push_str(&format!("## {}\n\n", block.title));
for section in &block.sections {
if section.rows.is_empty() { continue; }
md.push_str(&format!("### {}\n", section.title));
// Table Header
md.push_str("| 指标 |");
for p in &block.periods {
md.push_str(&format!(" {} |", p));
}
md.push('\n');
// Separator
md.push_str("| :--- |");
for _ in &block.periods {
md.push_str(" :--- |");
}
md.push('\n');
// Rows
for row in &section.rows {
md.push_str(&format!("| **{}** |", row.label));
for v in &row.values {
md.push_str(&format!(" {} |", v));
}
md.push('\n');
}
md.push('\n');
}
}
Ok(md)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::PathBuf;
#[test]
fn test_format_tushare_real_data() {
// Try to locate the assets file relative to the crate root
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("../../assets/tushare.json");
if !path.exists() {
println!("SKIPPED: Test data not found at {:?}", path);
return;
}
println!("Loading test data from: {:?}", path);
let json_content = fs::read_to_string(path).expect("Failed to read tushare.json");
let metrics: Vec<TushareMetric> = serde_json::from_str(&json_content).expect("Failed to parse JSON");
println!("Loaded {} metrics", metrics.len());
let formatter = TushareFormatter::new();
let md = formatter.format_to_markdown("600521.SS", metrics).expect("Format markdown");
println!("\n=== GENERATED MARKDOWN REPORT START ===\n");
println!("{}", md);
println!("\n=== GENERATED MARKDOWN REPORT END ===\n");
// Assertions
// Title adapts to report type (e.g. "财务年报 (5年聚合)")
assert!(md.contains("# 600521.SS 财务"));
// Verify structure: Should have year ranges in 5-year mode
// "2021 - 2025"
assert!(md.contains("## 2021 - 2025"));
assert!(md.contains("## 2016 - 2020"));
// Verify content density
// Ensure we have specific sections populated
assert!(md.contains("### 利润表"));
assert!(md.contains("### 资产负债表"));
// Check for specific data point formatting (based on sample)
// "money_cap": 1420023169.52 (2025-09-30) -> 14.20 亿
// Updated assertion: Value should be "14.20", unit is in label "货币资金(亿)"
assert!(md.contains("14.20"));
assert!(md.contains("货币资金(亿)"));
}
}

View File

@ -1,6 +1,7 @@
mod api;
mod config;
mod error;
mod formatter;
mod mapping;
mod message_consumer;
// mod persistence; // Removed in favor of common_contracts::persistence_client

View File

@ -4,6 +4,7 @@ use common_contracts::messages::FetchCompanyDataCommand;
use common_contracts::workflow_types::WorkflowTaskCommand; // Import
use common_contracts::observability::ObservabilityTaskStatus;
use common_contracts::subjects::NatsSubject;
use common_contracts::ack::TaskAcknowledgement;
use futures_util::StreamExt;
use tracing::{error, info, warn};
@ -16,17 +17,8 @@ pub async fn run(state: AppState) -> Result<()> {
info!("Starting NATS message consumer...");
loop {
let status = state.status.read().await.clone();
if let ServiceOperationalStatus::Degraded { reason } = status {
warn!(
"Service is in degraded state (reason: {}). Pausing message consumption for 5s.",
reason
);
tokio::time::sleep(Duration::from_secs(5)).await;
continue;
}
info!("Service is Active. Connecting to NATS...");
// Always connect, regardless of Degraded status
info!("Connecting to NATS...");
match async_nats::connect(&state.config.nats_addr).await {
Ok(client) => {
info!("Successfully connected to NATS.");
@ -55,7 +47,32 @@ async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Resu
info!("Workflow Consumer started on '{}'", subject);
while let Some(message) = subscriber.next().await {
// Check status check (omitted for brevity, assuming handled)
// Check Status (Handshake)
let current_status = state.status.read().await.clone();
// If Degraded, Reject immediately
if let ServiceOperationalStatus::Degraded { reason } = current_status {
warn!("Rejecting task due to degraded state: {}", reason);
if let Some(reply_to) = message.reply {
let ack = TaskAcknowledgement::Rejected { reason };
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Rejection Ack: {}", e);
}
}
}
continue;
}
// If Active, Accept
if let Some(reply_to) = message.reply.clone() {
let ack = TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Acceptance Ack: {}", e);
}
}
}
let state = state.clone();
let client = client.clone();
@ -63,6 +80,7 @@ async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Resu
tokio::spawn(async move {
match serde_json::from_slice::<WorkflowTaskCommand>(&message.payload) {
Ok(cmd) => {
// TODO: Implement Heartbeat inside handle_workflow_command or wrapper
if let Err(e) = crate::generic_worker::handle_workflow_command(state, client, cmd).await {
error!("Generic worker handler failed: {}", e);
}

View File

@ -3,11 +3,12 @@ use anyhow::{Result, anyhow, Context};
use serde_json::{json, Value};
use std::collections::HashMap;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent};
use common_contracts::data_formatting;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent, CacheKey};
use common_contracts::persistence_client::PersistenceClient;
use common_contracts::workflow_harness::TaskState;
use crate::state::AppState;
use crate::formatter::{TushareFormatter, TushareMetric};
use std::time::Duration;
pub struct TushareNode {
state: AppState,
@ -25,6 +26,24 @@ impl WorkflowNode for TushareNode {
"tushare"
}
fn get_cache_config(&self, config: &Value) -> Option<(CacheKey, Duration)> {
let symbol = config.get("symbol").and_then(|s| s.as_str())?;
// Construct Tuple Key: provider:interface:arg1
let key_parts = vec![
"tushare",
"company_data", // Conceptual interface name
symbol,
"all" // Scope
];
let cache_key = CacheKey(key_parts.join(":"));
// Tushare data is financial reports, valid for at least 7*24 hours
let ttl = Duration::from_secs(7 * 24 * 60 * 60);
Some((cache_key, ttl))
}
async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult> {
let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string();
let _market = config.get("market").and_then(|s| s.as_str()).unwrap_or("CN").to_string();
@ -50,7 +69,22 @@ impl WorkflowNode for TushareNode {
// 3. Artifacts
let mut artifacts = HashMap::new();
artifacts.insert("profile.json".to_string(), json!(profile).into());
artifacts.insert("financials.json".to_string(), json!(financials).into());
// Format Report directly to markdown
let metrics: Vec<TushareMetric> = financials.iter().map(|d| TushareMetric {
metric_name: d.metric_name.clone(),
period_date: d.period_date.to_string(),
value: Some(d.value),
}).collect();
let formatter = TushareFormatter::new();
let report_md = formatter.format_to_markdown(&symbol, metrics.clone())
.context("Failed to format markdown report")?;
artifacts.insert("financials.md".to_string(), ArtifactContent::Text(report_md));
// 4. Dump Metrics for Robustness (Recover from missing financials.md)
artifacts.insert("_metrics_dump.json".to_string(), json!(metrics).into());
Ok(NodeExecutionResult {
artifacts,
@ -62,28 +96,27 @@ impl WorkflowNode for TushareNode {
}
fn render_report(&self, result: &NodeExecutionResult) -> Result<String> {
let profile_json = match result.artifacts.get("profile.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing profile.json")),
};
let financials_json = match result.artifacts.get("financials.json") {
Some(ArtifactContent::Json(v)) => v,
_ => return Err(anyhow!("Missing financials.json")),
};
match result.artifacts.get("financials.md") {
Some(ArtifactContent::Text(s)) => Ok(s.clone()),
_ => {
// Robustness: Try to regenerate if financials.md is missing (e.g. cache hit but old version or partial cache)
if let Some(ArtifactContent::Json(json_val)) = result.artifacts.get("_metrics_dump.json") {
// Clone value to deserialize
if let Ok(metrics) = serde_json::from_value::<Vec<TushareMetric>>(json_val.clone()) {
let formatter = TushareFormatter::new();
let symbol = result.meta_summary.as_ref()
.and_then(|v| v.get("symbol"))
.and_then(|v| v.as_str())
.unwrap_or("Unknown");
let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown");
let mut report_md = String::new();
report_md.push_str(&format!("# Tushare Data Report: {}\n\n", symbol));
report_md.push_str("## Company Profile\n\n");
report_md.push_str(&data_formatting::format_data(profile_json));
report_md.push_str("\n\n");
report_md.push_str("## Financial Statements\n\n");
report_md.push_str(&data_formatting::format_data(financials_json));
Ok(report_md)
tracing::info!("Regenerating financials.md from cached metrics dump for {}", symbol);
return formatter.format_to_markdown(symbol, metrics)
.context("Failed to regenerate markdown report from metrics");
}
}
Err(anyhow!("Missing financials.md"))
}
}
}
}

View File

@ -17,8 +17,31 @@ impl AppConfig {
.unwrap_or_else(|_| "8005".to_string())
.parse()
.context("SERVER_PORT must be a number")?;
// Note: The previous default value included "/api/v1", but PersistenceClient might expect the base URL.
// However, looking at PersistenceClient implementation:
// let url = format!("{}/history", self.base_url);
// And in data-persistence-service api/mod.rs:
// .route("/api/v1/templates", ...)
// So the client should point to the root, OR the routes should not have /api/v1 prefix if client has it.
// Let's check data-persistence-service again.
// It routes:
// .route("/api/v1/templates", ...)
// If PersistenceClient base_url is "http://svc:3000/api/v1", then
// format!("{}/api/v1/templates", base_url) -> "http://svc:3000/api/v1/api/v1/templates" (DOUBLE!)
// Correct fix: The base URL should NOT include /api/v1 if the client methods append it, OR the client methods should not append it.
// Checking common-contracts/persistence_client.rs:
// pub async fn get_templates(&self) -> Result<Vec<AnalysisTemplateSummary>> {
// let url = format!("{}/api/v1/templates", self.base_url);
// }
// So base_url MUST NOT end with /api/v1.
let data_persistence_service_url = env::var("DATA_PERSISTENCE_SERVICE_URL")
.unwrap_or_else(|_| "http://data-persistence-service:3000/api/v1".to_string());
.unwrap_or_else(|_| "http://data-persistence-service:3000".to_string());
let workflow_data_path = env::var("WORKFLOW_DATA_PATH")
.unwrap_or_else(|_| "/mnt/workflow_data".to_string());

View File

@ -32,7 +32,7 @@ impl ContextResolver {
llm_providers: &LlmProvidersConfig,
analysis_prompt: &str,
) -> Result<ResolutionResult> {
match &selector.mode {
match selector {
SelectionMode::Manual { rules } => {
let resolved_rules = rules.iter().map(|r| {
let mut rule = r.clone();
@ -48,7 +48,9 @@ impl ContextResolver {
let system_prompt = "You are an intelligent file selector for a financial analysis system. \
Your goal is to select the specific files from the repository that are necessary to fulfill the user's analysis request.\n\
Return ONLY a JSON array of string file paths (e.g. [\"path/to/file1\", \"path/to/file2\"]). \
Do not include any explanation, markdown formatting, or code blocks.";
Do not include any explanation, markdown formatting, or code blocks.\n\
IMPORTANT: Ignore any files starting with an underscore (e.g. '_metrics_dump.json', '_execution.md'). \
These are internal debug artifacts and should NOT be selected for analysis.";
let user_prompt = format!(
"I need to perform the following analysis task:\n\n\"{}\"\n\n\

View File

@ -7,6 +7,12 @@ use anyhow::Result;
use tracing::info;
use serde::{Serialize, Deserialize};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct TaskExecutionBuffer {
pub logs: Vec<String>,
pub content_buffer: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommitTracker {
/// Maps task_id to the commit hash it produced.
@ -36,26 +42,42 @@ impl CommitTracker {
pub fn record_metadata(&mut self, task_id: &str, meta: serde_json::Value) {
// Convert generic JSON to TaskMetadata
if let Ok(parsed) = serde_json::from_value::<TaskMetadata>(meta.clone()) {
self.task_metadata.insert(task_id.to_string(), parsed);
} else {
// Fallback: store raw JSON in extra fields of a new TaskMetadata
// If the incoming meta already has execution_log_path/output_path at top level,
// we should extract them to avoid duplication in 'extra' if they are also left there.
let mut execution_log_path = None;
let mut output_path = None;
let mut extra = HashMap::new();
if let Some(obj) = meta.as_object() {
for (k, v) in obj {
if k == "execution_log_path" {
execution_log_path = v.as_str().map(|s| s.to_string());
} else if k == "output_path" {
output_path = v.as_str().map(|s| s.to_string());
} else {
// Avoid nested "extra" if the input was already structured
if k == "extra" && v.is_object() {
if let Some(inner_extra) = v.as_object() {
for (ik, iv) in inner_extra {
extra.insert(ik.clone(), iv.clone());
}
}
} else {
extra.insert(k.clone(), v.clone());
}
}
}
}
let metadata = TaskMetadata {
output_path: None,
execution_log_path: None,
output_path,
execution_log_path,
extra,
};
self.task_metadata.insert(task_id.to_string(), metadata);
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DagScheduler {
@ -74,6 +96,10 @@ pub struct DagScheduler {
#[serde(default = "default_start_time")]
pub start_time: chrono::DateTime<chrono::Utc>,
/// In-memory buffer for active tasks' logs and content
#[serde(default)]
pub task_execution_states: HashMap<String, TaskExecutionBuffer>,
}
fn default_start_time() -> chrono::DateTime<chrono::Utc> {
@ -122,8 +148,13 @@ pub struct DagNode {
pub routing_key: String,
/// The commit hash used as input for this task
pub input_commit: Option<String>,
// --- Observability & Watchdog ---
pub started_at: Option<i64>, // Timestamp ms
pub last_heartbeat_at: Option<i64>, // Timestamp ms
}
impl DagScheduler {
pub fn new(request_id: Uuid, initial_commit: String) -> Self {
Self {
@ -134,6 +165,7 @@ impl DagScheduler {
commit_tracker: CommitTracker::new(initial_commit),
workflow_finished_flag: false,
start_time: chrono::Utc::now(),
task_execution_states: HashMap::new(),
}
}
@ -146,6 +178,8 @@ impl DagScheduler {
config,
routing_key,
input_commit: None,
started_at: None,
last_heartbeat_at: None,
});
}
@ -170,10 +204,30 @@ impl DagScheduler {
pub fn update_status(&mut self, task_id: &str, status: TaskStatus) {
if let Some(node) = self.nodes.get_mut(task_id) {
// State transition logic for timestamps
if node.status != TaskStatus::Running && status == TaskStatus::Running {
// Transition to Running: Set start time if not set (or reset?)
// Actually dispatch sets to Scheduled. Worker sets to Running.
// Let's assume Running means "Worker Started".
if node.started_at.is_none() {
node.started_at = Some(chrono::Utc::now().timestamp_millis());
}
node.last_heartbeat_at = Some(chrono::Utc::now().timestamp_millis());
}
// If we are scheduling, we might want to set a "dispatch_time" but let's use started_at for simplicity
// or maybe we strictly follow: Running = Started.
node.status = status;
}
}
pub fn update_heartbeat(&mut self, task_id: &str, timestamp: i64) {
if let Some(node) = self.nodes.get_mut(task_id) {
node.last_heartbeat_at = Some(timestamp);
}
}
pub fn get_status(&self, task_id: &str) -> TaskStatus {
self.nodes.get(task_id).map(|n| n.status).unwrap_or(TaskStatus::Pending)
}
@ -188,6 +242,45 @@ impl DagScheduler {
self.commit_tracker.record_metadata(task_id, meta);
}
pub fn append_log(&mut self, task_id: &str, log: String) {
self.task_execution_states
.entry(task_id.to_string())
.or_default()
.logs
.push(log);
}
pub fn append_content(&mut self, task_id: &str, content_delta: &str) {
self.task_execution_states
.entry(task_id.to_string())
.or_default()
.content_buffer
.push_str(content_delta);
}
/// Recursively cancel downstream tasks of a failed/skipped task.
pub fn cancel_downstream(&mut self, task_id: &str) {
// Clone deps to avoid borrowing self while mutating nodes
if let Some(downstream) = self.forward_deps.get(task_id).cloned() {
for next_id in downstream {
let should_recurse = if let Some(node) = self.nodes.get_mut(&next_id) {
if node.status == TaskStatus::Pending || node.status == TaskStatus::Scheduled {
node.status = TaskStatus::Skipped; // Use Skipped for cascading cancellation
true
} else {
false
}
} else {
false
};
if should_recurse {
self.cancel_downstream(&next_id);
}
}
}
}
/// Check if all tasks in the DAG have reached a terminal state.
pub fn is_workflow_finished(&self) -> bool {
self.nodes.values().all(|n| matches!(n.status,
@ -365,7 +458,7 @@ mod tests {
vgcs.init_repo(&req_id_str)?;
// 0. Create Initial Commit (Common Ancestor)
let mut tx = vgcs.begin_transaction(&req_id_str, "")?;
let tx = vgcs.begin_transaction(&req_id_str, "")?;
let init_commit = Box::new(tx).commit("Initial Commit", "system")?;
// 1. Setup DAG
@ -406,4 +499,42 @@ mod tests {
Ok(())
}
#[test]
fn test_dag_timestamp_updates() {
let req_id = Uuid::new_v4();
let mut dag = DagScheduler::new(req_id, "init".to_string());
dag.add_node("A".to_string(), None, TaskType::DataFetch, "key".into(), json!({}));
// 1. Initial state
let node = dag.nodes.get("A").unwrap();
assert_eq!(node.status, TaskStatus::Pending);
assert!(node.started_at.is_none());
assert!(node.last_heartbeat_at.is_none());
// 2. Transition to Running
dag.update_status("A", TaskStatus::Running);
let node = dag.nodes.get("A").unwrap();
assert_eq!(node.status, TaskStatus::Running);
assert!(node.started_at.is_some());
assert!(node.last_heartbeat_at.is_some());
let start_time = node.started_at.unwrap();
// 3. Update Heartbeat
std::thread::sleep(std::time::Duration::from_millis(10));
let now = chrono::Utc::now().timestamp_millis();
dag.update_heartbeat("A", now);
let node = dag.nodes.get("A").unwrap();
assert!(node.last_heartbeat_at.unwrap() >= now);
assert_eq!(node.started_at.unwrap(), start_time); // Start time shouldn't change
// 4. Complete
dag.update_status("A", TaskStatus::Completed);
let node = dag.nodes.get("A").unwrap();
assert_eq!(node.status, TaskStatus::Completed);
// Timestamps remain
assert!(node.started_at.is_some());
}
}

View File

@ -8,3 +8,5 @@ pub mod dag_scheduler;
pub mod context_resolver;
pub mod io_binder;
pub mod llm_client;
pub mod task_monitor;
pub mod logging;

View File

@ -0,0 +1,208 @@
use std::fs::{self, File, OpenOptions};
use std::io::{Write, Read, BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use tracing::{Event, Subscriber};
use tracing_subscriber::layer::Context;
use tracing_subscriber::Layer;
use chrono::Utc;
use anyhow::Result;
use tokio::sync::broadcast;
/// Manages temporary log files for workflow requests.
#[derive(Clone)]
pub struct LogBufferManager {
root_path: PathBuf,
}
impl LogBufferManager {
pub fn new<P: AsRef<Path>>(root_path: P) -> Self {
let path = root_path.as_ref().to_path_buf();
if !path.exists() {
let _ = fs::create_dir_all(&path);
}
Self { root_path: path }
}
fn get_log_path(&self, request_id: &str) -> PathBuf {
self.root_path.join(format!("{}.log", request_id))
}
/// Appends a log line to the request's temporary file.
pub fn append(&self, request_id: &str, message: &str) {
let path = self.get_log_path(request_id);
// Open for append, create if not exists
if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
let _ = writeln!(file, "{}", message);
}
}
/// Reads the current logs as a vector of strings (for snapshotting).
pub fn read_current_logs(&self, request_id: &str) -> Result<Vec<String>> {
let path = self.get_log_path(request_id);
if !path.exists() {
return Ok(Vec::new());
}
let file = File::open(&path)?;
let reader = BufReader::new(file);
let lines: Result<Vec<String>, _> = reader.lines().collect();
Ok(lines?)
}
/// Reads the full log content and deletes the temporary file.
pub fn finalize(&self, request_id: &str) -> Result<String> {
let path = self.get_log_path(request_id);
if !path.exists() {
return Ok(String::new());
}
let mut file = File::open(&path)?;
let mut content = String::new();
file.read_to_string(&mut content)?;
// Delete the file after reading
let _ = fs::remove_file(path);
Ok(content)
}
/// Cleans up all temporary files (e.g. on startup)
pub fn cleanup_all(&self) {
if let Ok(entries) = fs::read_dir(&self.root_path) {
for entry in entries.flatten() {
if let Ok(file_type) = entry.file_type() {
if file_type.is_file() {
let _ = fs::remove_file(entry.path());
}
}
}
}
}
}
/// Represents a log entry to be broadcasted.
#[derive(Clone, Debug)]
pub struct LogEntry {
pub request_id: String,
pub level: String,
pub message: String,
pub timestamp: i64,
}
/// A Tracing Layer that intercepts logs and writes them to the LogBufferManager
/// if a `request_id` field is present in the span or event.
pub struct FileRequestLogLayer {
manager: Arc<LogBufferManager>,
tx: broadcast::Sender<LogEntry>,
}
impl FileRequestLogLayer {
pub fn new(manager: Arc<LogBufferManager>, tx: broadcast::Sender<LogEntry>) -> Self {
Self { manager, tx }
}
}
impl<S> Layer<S> for FileRequestLogLayer
where
S: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
{
fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) {
let mut request_id = None;
// 1. Try to find request_id in the event fields
let mut visitor = RequestIdVisitor(&mut request_id);
event.record(&mut visitor);
// 2. If not in event, look in the current span's extensions
if request_id.is_none() {
if let Some(span) = ctx.lookup_current() {
let extensions = span.extensions();
if let Some(req_id) = extensions.get::<RequestId>() {
request_id = Some(req_id.0.clone());
} else {
// Fallback: Iterate fields of the span (Less efficient, usually Extensions is better if we set it)
// But tracing doesn't auto-propagate fields to extensions unless we do it manually.
// A common pattern is to use a visitor on the Span during `on_new_span`.
// For simplicity here, we rely on `on_new_span` to extract and store it in Extensions.
}
}
}
if let Some(req_id) = request_id {
// Format the message
let now = Utc::now();
let timestamp_str = now.to_rfc3339();
let timestamp_millis = now.timestamp_millis();
let level_str = event.metadata().level().to_string();
let mut msg_visitor = MessageVisitor(String::new());
event.record(&mut msg_visitor);
let message = msg_visitor.0;
let log_line = format!("[{}] [{}] {}", timestamp_str, level_str, message);
self.manager.append(&req_id, &log_line);
// Broadcast for realtime
let entry = LogEntry {
request_id: req_id,
level: level_str,
message,
timestamp: timestamp_millis,
};
let _ = self.tx.send(entry);
}
}
fn on_new_span(&self, attrs: &tracing::span::Attributes<'_>, id: &tracing::Id, ctx: Context<'_, S>) {
// Extract request_id from span attributes and store in Extensions for easy access in on_event
let mut request_id = None;
let mut visitor = RequestIdVisitor(&mut request_id);
attrs.record(&mut visitor);
if let Some(req_id) = request_id {
if let Some(span) = ctx.span(id) {
span.extensions_mut().insert(RequestId(req_id));
}
}
}
}
// --- Helper Structs for Tracing Visitors ---
#[derive(Clone)]
struct RequestId(String);
struct RequestIdVisitor<'a>(&'a mut Option<String>);
impl<'a> tracing::field::Visit for RequestIdVisitor<'a> {
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
if field.name() == "request_id" {
*self.0 = Some(format!("{:?}", value).replace('"', ""));
}
}
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
if field.name() == "request_id" {
*self.0 = Some(value.to_string());
}
}
}
struct MessageVisitor(String);
impl tracing::field::Visit for MessageVisitor {
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
if field.name() == "message" {
self.0 = format!("{:?}", value);
}
}
// Needed for standard fmt::Display implementation which tracing uses for `message` usually
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
if field.name() == "message" {
self.0 = value.to_string();
}
}
}

View File

@ -1,28 +1,83 @@
use anyhow::Result;
use tracing::info;
use std::sync::Arc;
use workflow_orchestrator_service::{config, state, message_consumer, api};
use workflow_orchestrator_service::{config, state, message_consumer, api, task_monitor};
use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitExt, Layer};
use tokio::sync::broadcast;
use common_contracts::messages::WorkflowEvent;
use common_contracts::subjects::NatsSubject;
#[tokio::main]
async fn main() -> Result<()> {
// Initialize tracing
tracing_subscriber::fmt()
// .with_env_filter(EnvFilter::from_default_env())
.with_env_filter("info")
// Load configuration first
let config = config::AppConfig::load()?;
// Initialize Log Manager
let log_manager = Arc::new(workflow_orchestrator_service::logging::LogBufferManager::new("temp_logs"));
log_manager.cleanup_all(); // Clean up old logs on startup
// Initialize Realtime Log Broadcast Channel
let (log_tx, mut log_rx) = broadcast::channel::<workflow_orchestrator_service::logging::LogEntry>(1000);
// Initialize Tracing with custom layer
let fmt_layer = tracing_subscriber::fmt::layer().with_filter(EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()));
let file_log_layer = workflow_orchestrator_service::logging::FileRequestLogLayer::new(log_manager.clone(), log_tx.clone());
tracing_subscriber::registry()
.with(fmt_layer)
.with(file_log_layer)
.init();
info!("Starting workflow-orchestrator-service...");
// Load configuration
let config = config::AppConfig::load()?;
// Initialize application state
let state = Arc::new(state::AppState::new(config.clone()).await?);
let state = Arc::new(state::AppState::new(config.clone(), log_manager, log_tx).await?);
// Connect to NATS
let nats_client = async_nats::connect(&config.nats_addr).await?;
let nats_client = {
let mut attempts = 0;
loop {
match async_nats::connect(&config.nats_addr).await {
Ok(client) => break client,
Err(e) => {
attempts += 1;
if attempts > 30 {
return Err(anyhow::anyhow!("Failed to connect to NATS after 30 attempts: {}", e));
}
tracing::warn!("Failed to connect to NATS: {}. Retrying in 2s... (Attempt {}/30)", e, attempts);
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
}
}
};
info!("Connected to NATS at {}", config.nats_addr);
// Start Realtime Log Pusher
let nats_pusher = nats_client.clone();
tokio::spawn(async move {
info!("Starting Realtime Log Pusher...");
while let Ok(entry) = log_rx.recv().await {
// Convert to WorkflowEvent::TaskLog
// Since entry.request_id is string, we parse it back to Uuid to get the subject
if let Ok(req_id) = uuid::Uuid::parse_str(&entry.request_id) {
let event = WorkflowEvent::TaskLog {
task_id: "workflow".to_string(), // Ideally we capture task_id too, but for now generic "workflow" or infer from msg
level: entry.level,
message: entry.message,
timestamp: entry.timestamp,
};
let subject = NatsSubject::WorkflowProgress(req_id).to_string();
if let Ok(payload) = serde_json::to_vec(&event) {
if let Err(e) = nats_pusher.publish(subject, payload.into()).await {
tracing::error!("Failed to push realtime log to NATS: {}", e);
}
}
}
}
});
// Start Message Consumer
let state_clone = state.clone();
let nats_clone = nats_client.clone();
@ -32,6 +87,13 @@ async fn main() -> Result<()> {
}
});
// Start Task Monitor (Watchdog)
let state_monitor = state.clone();
let nats_monitor = nats_client.clone();
tokio::spawn(async move {
task_monitor::run(state_monitor, nats_monitor).await;
});
// Start HTTP Server
let app = api::create_router(state.clone());
let addr = format!("0.0.0.0:{}", config.server_port);

View File

@ -4,25 +4,28 @@ use anyhow::Result;
use tracing::{info, error};
use futures::StreamExt;
use crate::state::AppState;
use common_contracts::messages::{StartWorkflowCommand, SyncStateCommand};
use common_contracts::messages::{StartWorkflowCommand, SyncStateCommand, WorkflowEvent};
use common_contracts::workflow_types::WorkflowTaskEvent;
use common_contracts::subjects::NatsSubject;
use crate::workflow::WorkflowEngine;
use uuid::Uuid;
pub async fn run(state: Arc<AppState>, nats: Client) -> Result<()> {
info!("Message Consumer started. Subscribing to topics...");
// Topic 1: Workflow Commands (Start)
// Note: NatsSubject::WorkflowCommandStart string representation is "workflow.commands.start"
let mut start_sub = nats.subscribe(NatsSubject::WorkflowCommandStart.to_string()).await?;
// Topic 1b: Workflow Commands (Sync State)
let mut sync_sub = nats.subscribe(NatsSubject::WorkflowCommandSyncState.to_string()).await?;
// Topic 2: Workflow Task Events (Generic)
// Note: NatsSubject::WorkflowEventTaskCompleted string representation is "workflow.evt.task_completed"
let mut task_sub = nats.subscribe(NatsSubject::WorkflowEventTaskCompleted.to_string()).await?;
// Topic 3: All Workflow Events (for capturing logs/stream)
// events.workflow.> matches events.workflow.{req_id}
let mut events_sub = nats.subscribe("events.workflow.>".to_string()).await?;
let engine = Arc::new(WorkflowEngine::new(state.clone(), nats.clone()));
// --- Task 1: Start Workflow ---
@ -31,9 +34,13 @@ pub async fn run(state: Arc<AppState>, nats: Client) -> Result<()> {
while let Some(msg) = start_sub.next().await {
if let Ok(cmd) = serde_json::from_slice::<StartWorkflowCommand>(&msg.payload) {
info!("Received StartWorkflow: {:?}", cmd);
if let Err(e) = engine1.handle_start_workflow(cmd).await {
let engine_inner = engine1.clone();
tokio::spawn(async move {
// Ensure handle_start_workflow is robust against Send constraints by wrapping if necessary
if let Err(e) = engine_inner.handle_start_workflow(cmd).await {
error!("Failed to handle StartWorkflow: {}", e);
}
});
} else {
error!("Failed to parse StartWorkflowCommand");
}
@ -46,9 +53,12 @@ pub async fn run(state: Arc<AppState>, nats: Client) -> Result<()> {
while let Some(msg) = sync_sub.next().await {
if let Ok(cmd) = serde_json::from_slice::<SyncStateCommand>(&msg.payload) {
info!("Received SyncStateCommand: request_id={}", cmd.request_id);
if let Err(e) = engine_sync.handle_sync_state(cmd).await {
let engine_inner = engine_sync.clone();
tokio::spawn(async move {
if let Err(e) = engine_inner.handle_sync_state(cmd).await {
error!("Failed to handle SyncStateCommand: {}", e);
}
});
} else {
error!("Failed to parse SyncStateCommand");
}
@ -61,14 +71,65 @@ pub async fn run(state: Arc<AppState>, nats: Client) -> Result<()> {
while let Some(msg) = task_sub.next().await {
if let Ok(evt) = serde_json::from_slice::<WorkflowTaskEvent>(&msg.payload) {
info!("Received TaskCompleted: task_id={}", evt.task_id);
if let Err(e) = engine2.handle_task_completed(evt).await {
let engine_inner = engine2.clone();
tokio::spawn(async move {
if let Err(e) = engine_inner.handle_task_completed(evt).await {
error!("Failed to handle TaskCompleted: {}", e);
}
});
} else {
error!("Failed to parse WorkflowTaskEvent");
}
}
});
// --- Task 3: Workflow Events Capture (Logs & Stream) ---
let engine_events = engine.clone();
tokio::spawn(async move {
while let Some(msg) = events_sub.next().await {
// 1. Extract Request ID from Subject
let subject_str = msg.subject.to_string();
let parts: Vec<&str> = subject_str.split('.').collect();
// Expected: events.workflow.{uuid}
let req_id = if parts.len() >= 3 {
match Uuid::parse_str(parts[2]) {
Ok(id) => id,
Err(_) => continue, // Ignore malformed subjects or non-UUID
}
} else {
continue;
};
// 2. Parse Event Payload
if let Ok(evt) = serde_json::from_slice::<WorkflowEvent>(&msg.payload) {
let engine_inner = engine_events.clone();
// 3. Dispatch based on event type
match evt {
WorkflowEvent::TaskStreamUpdate { task_id, content_delta, .. } => {
tokio::spawn(async move {
let _ = engine_inner.handle_task_stream_update(task_id, content_delta, req_id).await;
});
},
WorkflowEvent::TaskLog { task_id, message, level, timestamp } => {
// Format log consistent with frontend expectations or raw?
// Frontend uses: `[${time}] [${p.level}] ${p.message}`
use chrono::{Utc, TimeZone};
let dt = Utc.timestamp_millis_opt(timestamp).unwrap();
let time_str = dt.format("%H:%M:%S").to_string();
let formatted_log = format!("[{}] [{}] {}", time_str, level, message);
tokio::spawn(async move {
let _ = engine_inner.handle_task_log(task_id, formatted_log, req_id).await;
});
},
// Ignore others
_ => {}
}
}
}
});
Ok(())
}

View File

@ -8,6 +8,8 @@ use tokio::sync::Mutex;
// use crate::workflow::WorkflowStateMachine; // Deprecated
use crate::dag_scheduler::DagScheduler;
use workflow_context::Vgcs;
use crate::logging::{LogBufferManager, LogEntry};
use tokio::sync::broadcast;
pub struct AppState {
#[allow(dead_code)]
@ -20,10 +22,13 @@ pub struct AppState {
pub workflows: Arc<DashMap<Uuid, Arc<Mutex<DagScheduler>>>>,
pub vgcs: Arc<Vgcs>,
pub log_manager: Arc<LogBufferManager>,
pub log_broadcast_tx: broadcast::Sender<LogEntry>,
}
impl AppState {
pub async fn new(config: AppConfig) -> Result<Self> {
pub async fn new(config: AppConfig, log_manager: Arc<LogBufferManager>, log_broadcast_tx: broadcast::Sender<LogEntry>) -> Result<Self> {
let persistence_client = PersistenceClient::new(config.data_persistence_service_url.clone());
let vgcs = Arc::new(Vgcs::new(&config.workflow_data_path));
@ -32,6 +37,8 @@ impl AppState {
persistence_client,
workflows: Arc::new(DashMap::new()),
vgcs,
log_manager,
log_broadcast_tx,
})
}
}

View File

@ -0,0 +1,89 @@
use std::sync::Arc;
use std::time::Duration;
use tracing::{info, warn, error};
use crate::state::AppState;
use common_contracts::workflow_types::{WorkflowTaskEvent, TaskStatus, TaskResult};
use common_contracts::subjects::SubjectMessage;
// Configuration
const TASK_MAX_DURATION_SEC: i64 = 300; // 5 minutes absolute timeout
const HEARTBEAT_TIMEOUT_SEC: i64 = 60; // 60 seconds without heartbeat
pub async fn run(state: Arc<AppState>, nats: async_nats::Client) {
info!("Task Monitor (Watchdog) started.");
let mut interval = tokio::time::interval(Duration::from_secs(1));
loop {
interval.tick().await;
let workflow_ids: Vec<uuid::Uuid> = state.workflows.iter().map(|r| *r.key()).collect();
for req_id in workflow_ids {
let mut events_to_publish = Vec::new();
if let Some(dag_arc) = state.workflows.get(&req_id) {
let dag = dag_arc.lock().await;
if dag.is_workflow_finished() {
continue;
}
let now = chrono::Utc::now().timestamp_millis();
for (task_id, node) in dag.nodes.iter() {
if node.status == TaskStatus::Running {
let mut failure_reason = None;
// 1. Check Absolute Timeout
if let Some(start) = node.started_at {
if (now - start) > TASK_MAX_DURATION_SEC * 1000 {
failure_reason = Some("Task execution timed out (5m limit)".to_string());
}
}
// 2. Check Heartbeat
if failure_reason.is_none() {
if let Some(hb) = node.last_heartbeat_at {
if (now - hb) > HEARTBEAT_TIMEOUT_SEC * 1000 {
failure_reason = Some("Task heartbeat lost (Zombie worker)".to_string());
}
} else if let Some(start) = node.started_at {
// If started but no heartbeat yet
if (now - start) > HEARTBEAT_TIMEOUT_SEC * 1000 {
failure_reason = Some("Task unresponsive (No initial heartbeat)".to_string());
}
}
}
if let Some(reason) = failure_reason {
warn!("Watchdog detected failure for task {} in workflow {}: {}", task_id, req_id, reason);
events_to_publish.push(WorkflowTaskEvent {
request_id: req_id,
task_id: task_id.clone(),
status: TaskStatus::Failed,
result: Some(TaskResult {
new_commit: None,
error: Some(reason),
summary: None,
}),
});
}
}
}
}
// Publish events outside the lock
for evt in events_to_publish {
let subject = evt.subject().to_string();
if let Ok(payload) = serde_json::to_vec(&evt) {
if let Err(e) = nats.publish(subject, payload.into()).await {
error!("Failed to publish watchdog event: {}", e);
}
}
}
}
}
}

View File

@ -0,0 +1,133 @@
use anyhow::Result;
use common_contracts::messages::{SyncStateCommand, TaskType, WorkflowEvent};
use common_contracts::workflow_types::TaskStatus;
use serde_json::json;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use uuid::Uuid;
use workflow_orchestrator_service::dag_scheduler::DagScheduler;
use workflow_orchestrator_service::logging::LogBufferManager;
use workflow_orchestrator_service::state::AppState;
use workflow_orchestrator_service::workflow::WorkflowEngine;
use workflow_orchestrator_service::config::AppConfig;
use futures::stream::StreamExt;
// Note: This test requires a running NATS server.
// Set NATS_URL environment variable if needed, otherwise defaults to localhost:4222
#[tokio::test]
async fn test_workflow_rehydration_flow() -> Result<()> {
// 1. Setup NATS
let nats_url = std::env::var("NATS_URL").unwrap_or_else(|_| "nats://localhost:4222".to_string());
let nats_client = async_nats::connect(&nats_url).await;
if nats_client.is_err() {
println!("Skipping test: NATS not available at {}", nats_url);
return Ok(());
}
let nats_client = nats_client?;
// 2. Setup AppState (Mocking Dependencies)
let config = AppConfig {
nats_addr: nats_url.clone(),
data_persistence_service_url: "http://localhost:3001".to_string(), // Mock URL
workflow_data_path: "/tmp/workflow_data".to_string(),
server_port: 0,
};
let (log_tx, _) = tokio::sync::broadcast::channel(100);
let log_manager = Arc::new(LogBufferManager::new("/tmp/workflow_logs"));
let state = Arc::new(AppState::new(config, log_manager.clone(), log_tx).await?);
let engine = WorkflowEngine::new(state.clone(), nats_client.clone());
// 3. Construct a Fake Workflow State (In-Memory)
let req_id = Uuid::new_v4();
let task_id = "task:fake_analysis".to_string();
let mut dag = DagScheduler::new(req_id, "init_commit".to_string());
dag.add_node(
task_id.clone(),
Some("Fake Analysis".to_string()),
TaskType::Analysis,
"fake.routing".to_string(),
json!({"some": "config"})
);
// Mark it as running so it captures stream
dag.update_status(&task_id, TaskStatus::Running);
// Insert into State
state.workflows.insert(req_id, Arc::new(Mutex::new(dag)));
// 4. Subscribe to Workflow Events (Simulating Frontend)
let subject = common_contracts::subjects::NatsSubject::WorkflowProgress(req_id).to_string();
let mut sub = nats_client.subscribe(subject.clone()).await?;
// 5. Simulate Receiving Stream Data & Logs
// In real world, MessageConsumer calls these. Here we call Engine methods directly
// to simulate "Consumer received NATS msg -> updated DAG".
let log_msg = "[INFO] Starting deep analysis...".to_string();
let content_part1 = "Analysis Part 1...".to_string();
let content_part2 = "Analysis Part 2 [Done]".to_string();
engine.handle_task_log(task_id.clone(), log_msg.clone(), req_id).await?;
engine.handle_task_stream_update(task_id.clone(), content_part1.clone(), req_id).await?;
engine.handle_task_stream_update(task_id.clone(), content_part2.clone(), req_id).await?;
println!("State injected. Now simulating Page Refresh (SyncState)...");
// 6. Simulate Page Refresh -> Send SyncStateCommand
let sync_cmd = SyncStateCommand { request_id: req_id };
// We can call handle_sync_state directly or publish command.
// Let's call directly to ensure we test the logic, but verify the OUTPUT via NATS subscription.
engine.handle_sync_state(sync_cmd).await?;
// 7. Verify Snapshot Received on NATS
let mut snapshot_received = false;
// We might receive other events, loop until snapshot or timeout
let timeout = tokio::time::sleep(Duration::from_secs(2));
tokio::pin!(timeout);
loop {
tokio::select! {
Some(msg) = sub.next() => {
if let Ok(event) = serde_json::from_slice::<WorkflowEvent>(&msg.payload) {
match event {
WorkflowEvent::WorkflowStateSnapshot { task_states, .. } => {
println!("Received Snapshot!");
// Verify Task State
if let Some(ts) = task_states.get(&task_id) {
// Check Logs
assert!(ts.logs.contains(&log_msg), "Snapshot missing logs");
// Check Content
let full_content = format!("{}{}", content_part1, content_part2);
assert_eq!(ts.content.as_ref().unwrap(), &full_content, "Snapshot content mismatch");
println!("Snapshot verification passed!");
snapshot_received = true;
break;
} else {
panic!("Task state not found in snapshot");
}
},
_ => println!("Ignored other event: {:?}", event),
}
}
}
_ = &mut timeout => {
break;
}
}
}
assert!(snapshot_received, "Did not receive WorkflowStateSnapshot within timeout");
Ok(())
}

View File

@ -56,6 +56,8 @@ async fn subscribe_legacy(state: AppState, client: async_nats::Client) -> Result
Ok(())
}
use common_contracts::ack::TaskAcknowledgement;
async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Result<()> {
let routing_key = "provider.yfinance".to_string();
let subject = NatsSubject::WorkflowCommand(routing_key).to_string();
@ -65,6 +67,18 @@ async fn subscribe_workflow(state: AppState, client: async_nats::Client) -> Resu
while let Some(message) = subscriber.next().await {
info!("Received Workflow NATS message.");
// --- ACKNOWLEDGEMENT HANDSHAKE ---
if let Some(reply_to) = message.reply.clone() {
let ack = TaskAcknowledgement::Accepted;
if let Ok(payload) = serde_json::to_vec(&ack) {
if let Err(e) = client.publish(reply_to, payload.into()).await {
error!("Failed to send Acceptance Ack: {}", e);
}
}
}
// ---------------------------------
let state_clone = state.clone();
let client_clone = client.clone();

View File

@ -3,10 +3,11 @@ use anyhow::{Result, anyhow, Context};
use serde_json::{json, Value};
use std::collections::HashMap;
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent};
use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent, CacheKey};
use common_contracts::data_formatting;
use common_contracts::persistence_client::PersistenceClient;
use crate::state::AppState;
use std::time::Duration;
pub struct YFinanceNode {
state: AppState,
@ -24,6 +25,23 @@ impl WorkflowNode for YFinanceNode {
"yfinance"
}
fn get_cache_config(&self, config: &Value) -> Option<(CacheKey, Duration)> {
let symbol = config.get("symbol").and_then(|s| s.as_str())?;
let key_parts = vec![
"yfinance",
"company_data",
symbol,
"all"
];
let cache_key = CacheKey(key_parts.join(":"));
// YFinance data (US market) - 24h TTL
let ttl = Duration::from_secs(86400);
Some((cache_key, ttl))
}
async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult> {
let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string();
let _market = config.get("market").and_then(|s| s.as_str()).unwrap_or("US").to_string();