diff --git a/docker-compose.yml b/docker-compose.yml index c7fb353..7b0bfe7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,8 @@ services: retries: 10 networks: - app-network + ports: + - "5434:5432" nats: image: nats:2.9 volumes: diff --git a/docs/2_architecture/20251129_refactor_history_context.md b/docs/2_architecture/20251129_refactor_history_context.md new file mode 100644 index 0000000..df11d5b --- /dev/null +++ b/docs/2_architecture/20251129_refactor_history_context.md @@ -0,0 +1,112 @@ +# 设计文档:统一历史记录与上下文管理重构 + +## 1. 目标 +实现一个统一且一致的历史管理系统,达成以下目标: +1. **原子化历史记录**:一个“历史记录”严格对应**一次 Workflow 执行**(由 `request_id` 标识),彻底解决历史列表重复/碎片化问题。 +2. **单一数据源**:全局上下文(VGCS/Git)作为所有文件产物(报告、日志、数据)的唯一真实存储源。 +3. **轻量化索引**:数据库(`session_data` 或新表)仅存储结构化的“索引”(Snapshot),指向 VGCS 中的 Commit 和文件路径。 + +## 2. 现状分析 +- **碎片化**:目前 `analysis_results` 表存储的是单个 Task 的结果。如果一个工作流包含 N 个分析步骤,历史列表中就会出现 N 条记录。 +- **数据冗余**:结果内容(Markdown 等)既作为文件存在 VGCS 中,又作为文本列存在 Postgres 数据库中。 +- **历史视图缺失**:缺乏一个能够代表整次执行状态(包含拓扑结构、状态、所有产物引用)的根对象,导致查询历史列表时困难。 + +## 3. 架构方案 + +### 3.1. 核心概念:工作流快照 (Workflow Snapshot) +不再将每个 Task 视为独立的历史记录,我们定义 **Workflow Snapshot** 为历史的原子单位。 + +一个 Snapshot 包含: +- **元数据**:`request_id`(请求ID), `symbol`(标的), `market`(市场), `template_id`(模板ID), `start_time`(开始时间), `end_time`(结束时间), `final_status`(最终状态)。 +- **拓扑结构**:DAG 结构(节点与边)。 +- **执行状态**:针对每个节点记录: + - `status`:状态 (Completed, Failed, Skipped) + - `output_commit`:该节点产生的 VGCS Commit Hash。 + - `artifacts`:产物映射表,Key 为产物名称,Value 为 VGCS 文件路径 (例如 `{"report": "analysis/summary.md", "log": "analysis/execution.log"}`)。 + +### 3.2. 数据存储变更 + +#### A. `workflow_history` 表 (或重构后的 `session_data`) +我们将引入一张专用表(或规范化 `session_data` 的使用)来存储 **Workflow Manifest**。 + +```sql +CREATE TABLE workflow_history ( + request_id UUID PRIMARY KEY, + symbol VARCHAR(20) NOT NULL, + market VARCHAR(10) NOT NULL, + template_id VARCHAR(50), + status VARCHAR(20) NOT NULL, -- 'Completed', 'Failed' + start_time TIMESTAMPTZ NOT NULL, + end_time TIMESTAMPTZ, + + -- Snapshot JSON 数据 + snapshot_data JSONB NOT NULL + -- { + -- "dag": { ... }, + -- "tasks": { + -- "task_id_1": { "status": "Completed", "commit": "abc1234", "paths": { "report": "..." } } + -- } + -- } +); +``` + +*注:为了减少迁移摩擦,我们可以继续使用 `session_data` 表,并指定 `data_type = 'workflow_snapshot'`,但建立专用表更有利于查询和维护。* + +#### B. VGCS (Git 上下文) 的使用规范 +- **输入**:初始 Commit 包含 `request.json`。 +- **过程**:每个 Task (Worker) 检出基础 Commit,执行工作,写入文件(报告、日志),并创建 **New Commit**。 +- **合并**:Orchestrator 负责追踪这些 Commit 的 DAG 关系。 +- **终态**:Orchestrator 创建最终的“Merge Commit”(可选,或仅引用各叶子节点的 Commit)并在 `workflow_history` 中记录。 + +### 3.3. 组件职责划分 + +#### 1. Worker 服务 (Report Gen, Providers) +- **输入**:接收 `base_commit`, `task_id`, `output_path_config`。 +- **动作**: + - 初始化 `WorkerContext` (VGCS)。 + - 将 `report.md` 写入 `output_path`。 + - 将 `_execution.md` (日志) 写入 `log_path`。 + - **Commit**:提交更改,生成 Commit Hash。 +- **输出**:返回 `new_commit_hash` 和 `artifact_paths` (Map) 给 Orchestrator。 +- **禁止**:Worker 不再直接向数据库的 `analysis_results` 表写入数据。 + +#### 2. Workflow Orchestrator (编排器) +- **协调**:从 `TaskCompleted` 事件中收集 `new_commit_hash` 和 `artifact_paths`。 +- **状态追踪**:更新内存中的 DAG 状态。 +- **完成处理**: + - 当所有任务结束后,生成 **Workflow Snapshot**。 + - 调用 `persistence-service` 将 Snapshot 保存至 `workflow_history`。 + - 发送 `WorkflowCompleted` 事件。 + +#### 3. Data Persistence Service (持久化服务) +- **新接口**:`GET /api/v1/history` + - 返回 `workflow_history` 列表(摘要信息)。 +- **新接口**:`GET /api/v1/history/{request_id}` + - 返回完整的 Snapshot(详情信息)。 +- **旧接口处理**:废弃 `GET /api/v1/analysis-results` 或将其重定向为查询 `workflow_history`。 + +#### 4. Frontend (前端) +- **历史页**:调用 `/api/v1/history`。每个 `request_id` 只展示一行。 +- **报告页**: + - 获取特定的历史详情。 + - 使用 `artifact_paths` + `commit_hash` 通过 VGCS API (或代理)以此获取文件内容。 + +## 4. 实施计划 + +1. **Schema 定义**:定义 `WorkflowSnapshot` 结构体及 SQL 迁移脚本 (`workflow_history`)。 +2. **Orchestrator 改造**: + - 修改 `handle_task_completed` 以聚合 `artifact_paths`。 + - 实现 `finalize_workflow` 逻辑,用于构建并保存 Snapshot。 +3. **Worker 改造**: + - 确保 `report-generator` 在 `TaskResult` 中返回结构化的 `artifact_paths`。 + - 移除 `report-generator` 中对 `create_analysis_result` 的数据库调用。 +4. **Persistence Service 改造**: + - 实现 `workflow_history` 的 CRUD 操作。 +5. **Frontend 改造**: + - 更新 API 调用以适配新的历史记录接口。 + +## 5. 核心收益 +- **单一事实来源**:文件存 Git,元数据存 DB,杜绝数据不同步。 +- **历史记录原子性**:一次运行 = 一条记录。 +- **可追溯性**:每个产物都精确关联到一个 Git Commit。 + diff --git a/docs/3_project_management/tasks/pending/20251126_design_0_overview.md b/docs/3_project_management/tasks/completed/20251126_design_0_overview.md similarity index 100% rename from docs/3_project_management/tasks/pending/20251126_design_0_overview.md rename to docs/3_project_management/tasks/completed/20251126_design_0_overview.md diff --git a/docs/3_project_management/tasks/pending/20251126_design_1_vgcs.md b/docs/3_project_management/tasks/completed/20251126_design_1_vgcs.md similarity index 100% rename from docs/3_project_management/tasks/pending/20251126_design_1_vgcs.md rename to docs/3_project_management/tasks/completed/20251126_design_1_vgcs.md diff --git a/docs/3_project_management/tasks/pending/20251126_design_2_doc_os.md b/docs/3_project_management/tasks/completed/20251126_design_2_doc_os.md similarity index 100% rename from docs/3_project_management/tasks/pending/20251126_design_2_doc_os.md rename to docs/3_project_management/tasks/completed/20251126_design_2_doc_os.md diff --git a/docs/3_project_management/tasks/pending/20251126_design_3_worker_runtime.md b/docs/3_project_management/tasks/completed/20251126_design_3_worker_runtime.md similarity index 100% rename from docs/3_project_management/tasks/pending/20251126_design_3_worker_runtime.md rename to docs/3_project_management/tasks/completed/20251126_design_3_worker_runtime.md diff --git a/docs/3_project_management/tasks/pending/20251126_design_4_orchestrator.md b/docs/3_project_management/tasks/completed/20251126_design_4_orchestrator.md similarity index 100% rename from docs/3_project_management/tasks/pending/20251126_design_4_orchestrator.md rename to docs/3_project_management/tasks/completed/20251126_design_4_orchestrator.md diff --git a/docs/3_project_management/tasks/pending/20251128_design_deep_research.md b/docs/3_project_management/tasks/pending/20251128_design_deep_research.md new file mode 100644 index 0000000..058a0b3 --- /dev/null +++ b/docs/3_project_management/tasks/pending/20251128_design_deep_research.md @@ -0,0 +1,171 @@ +# 设计方案: Deep Research Service (Reactive & Git-Native) + +## 1. 定位与核心理念 (Overview) + +**Deep Research** 是一个独立的、正交的数据降维与提炼服务。它的核心任务解决 "Context Overflow" 问题——当业务数据量超过大模型上下文窗口,或数据复杂度超过单次 Prompt 处理能力时,通过**自主智能体 (Autonomous Agent)** 进行迭代式的分析、摘要和结构化提取。 + +### 1.1 核心原则 +1. **独立性 (Independence)**: 作为一个独立的微服务 (`deep-research-service`) 运行,拥有独立的资源配额和生命周期。 +2. **Git 原生 (Git-Native)**: 利用底层的 VGCS (Virtual Git Context System) 的分支 (Branching) 和合并 (Merging) 机制,实现原始数据与研究成果的**并存**。 +3. **响应式闭环 (Reactive Loop)**: 摒弃线性的 Map-Reduce,采用 "Plan -> Act -> Critic" 的动态循环,根据当前发现的信息实时调整研究目标。 +4. **安全护栏 (Guardrails)**: 引入熵减检查、语义收敛检测和硬性预算约束,防止智能体陷入死循环或发散。 + +--- + +## 2. 系统架构 (Architecture) + +### 2.1 交互流程图 + +```mermaid +graph TD + subgraph "Orchestration Layer" + Orch[Orchestrator Service] + DAG[DAG Scheduler] + end + + subgraph "Storage Layer (VGCS)" + Repo[Git Repo] + MainBr[Branch: main] + DRBr[Branch: feat/deep_research] + end + + subgraph "Deep Research Layer" + Monitor[Resource Monitor] + Reactor[Reactive Engine] + Guard[Safety Guardrails] + end + + %% Flow + Orch -->|1. Task Pending (Check Size)| Monitor + Monitor -->|2. Threshold Exceeded| Orch + Orch -->|3. Suspend Task & Create Branch| Repo + Repo -.-> MainBr + MainBr -.-> DRBr + + Orch -->|4. Dispatch Job| Reactor + Reactor <-->|5. Read/Write| DRBr + Reactor <-->|6. Validate Step| Guard + + Reactor -->|7. Completion| Orch + Orch -->|8. Merge/Rebase| Repo + DRBr -->|Merge| MainBr + Orch -->|9. Resume Task| DAG +``` + +### 2.2 分支策略 (Branching Strategy) + +我们采用 **"并存 (Co-exist)"** 策略,而非简单的替换。 + +* **Main Branch**: 包含原始海量数据 (e.g., 100份 PDF, 10万行 CSV)。 +* **Research Branch**: 从 Main 分出。Deep Research Agent 在此分支上工作,生成新的提炼文件 (e.g., `research_summary.md`, `knowledge_graph.json`)。 +* **Merge**: 任务完成后,Research Branch 被 merge 回 Main。 + * **下游任务视角**: 下游任务可以看到原始数据(如果在 `input` 中显式请求),但默认通过 Context Selector 优先获取 `research_summary.md`。 + * **User Option**: 用户可以选择 "Re-run from Deep Research",即基于 merge 后的 commit 继续执行;也可以选择 "Raw Run",强制使用原始数据(虽然可能失败)。 + +--- + +## 3. 核心逻辑: The Reactive Engine + +Deep Research 不仅仅是摘要,而是一个**有状态的探索过程**。 + +### 3.1 状态循环 (The Loop) + +Agent 维护一个动态的 `State`: +* `Goal Stack`: 当前的研究目标栈(初始由 Prompt 决定,后续动态分裂)。 +* `Knowledge Base`: 已获取的确信事实。 +* `Trace Log`: 思考路径。 + +**执行步骤**: +1. **Observe**: 读取当前分支的数据(或上一轮的输出)。 +2. **Plan**: 基于当前 Goal 和 Observe 的结果,制定下一步行动(e.g., "搜索关键词 X", "读取文件 Y 的第 10-20 页", "对文件 Z 进行摘要")。 +3. **Act**: 执行行动,写入中间结果到 Git。 +4. **Reflect (Critic)**: 检查结果是否满足 Goal?是否发现了新线索需要更新 Goal? + +### 3.2 保驾护航机制 (Guardrails) + +为了防止 Agent "幻觉"、"死循环" 或 "跑题",必须实施严格的监控。 + +#### A. 熵减检查 (Entropy/Novelty Check) +* **目的**: 防止 Agent 反复咀嚼相同的信息。 +* **机制**: 计算新生成的 Research Note 与已有 Knowledge Base 的 **Embedding 相似度**。 +* **策略**: 如果 `Similarity > 0.95`,判定为无效迭代,强制终止当前子路径或回退。 + +#### B. 语义距离收敛 (Semantic Convergence) +* **目的**: 防止 Agent 跑题(Divergence)。 +* **机制**: 实时计算 `Current Research Content` 向量与 `Initial User Query` 向量的距离。 +* **策略**: 距离应随迭代呈**震荡下降**趋势。如果距离显著增大(e.g., 从研究 "Apple 财务" 跑题到 "越南美食"),Supervisor 介入干预。 + +#### C. 预算硬约束 (Hard Budgeting) +* **Token Cap**: 单次 Research 任务上限(e.g., 200k tokens)。 +* **Step Cap**: 最大迭代轮数(e.g., 10 loops)。 +* **Time Cap**: 超时强制终止。 + +--- + +## 4. 触发与配置 (Triggering & Config) + +### 4.1 自动触发 (Auto-Trigger) +* **Pre-flight Check**: Orchestrator 在调度 Analysis Task 前,先调用 `ContextResolver` 获取输入文件的 Metadata。 +* **Threshold Logic**: + ```rust + let total_tokens = estimate_tokens(file_list); + let model_limit = llm_config.context_window; // e.g. 128k + + if total_tokens > (model_limit * 0.8) { + trigger_deep_research(); + } + ``` + +### 4.2 配置驱动 (Config-Driven) +允许在 `AnalysisTemplate` 中显式控制: + +```json +{ + "modules": { + "financial_analysis": { + "deep_research": { + "strategy": "auto", // auto | always | never + "threshold_tokens": 50000, + "focus_areas": ["risk", "cash_flow"] // Initial Goals + } + } + } +} +``` + +### 4.3 基础设施配置更新 + +需要在 `LlmProvidersConfig` 中补充模型能力参数,以便系统知道 "上限" 是多少。 + +```rust +pub struct ModelConfig { + pub model_id: String, + pub context_window: u32, // e.g., 128000 + pub pricing_tier: String, // e.g., "high" + // ... +} +``` + +--- + +## 5. 实施路线图 (Roadmap) + +1. **Phase 1: Infrastructure** + * 在 `common-contracts` 定义 Deep Research 的 Task 结构和配置。 + * 更新 `LlmProvidersConfig` 支持 context window 参数。 + * 在 `Orchestrator` 实现 "Suspend & Branch" 逻辑。 + +2. **Phase 2: The Service (MVP)** + * 构建 `deep-research-service` (Rust)。 + * 实现基础的 **Map-Reduce** 逻辑作为 v0.1(先跑通流程)。 + * 集成 VGCS 分支操作。 + +3. **Phase 3: The Reactive Agent** + * 引入向量数据库(或内存向量索引)用于 Guardrails。 + * 实现 Plan-Act-Critic 循环。 + * 实现熵减和收敛检查。 + +4. **Phase 4: Integration** + * 全自动触发测试。 + * 前端可视化:展示 Research 过程的 Trace 和中间产物。 + diff --git a/docs/3_project_management/tasks/pending/20251128_dry_run_report.md b/docs/3_project_management/tasks/pending/20251128_dry_run_report.md new file mode 100644 index 0000000..1f50f97 --- /dev/null +++ b/docs/3_project_management/tasks/pending/20251128_dry_run_report.md @@ -0,0 +1,70 @@ +# 历史报告功能实施前 Dry Run 报告 + +## 概览 +本报告对“历史报告”功能的实施方案进行了全面的“Dry Run”检查。我们检查了后端服务 (`data-persistence-service`, `api-gateway`, `report-generator-service`) 和前端代码 (`schema.gen.ts`, UI 组件),确认了所有必要的变更点和潜在的遗漏。 + +## 检查清单与发现 (Dry Run Findings) + +### 1. 后端: Report Generator Service (Fix Missing Persistence) +* **现状**: `run_vgcs_based_generation` 函数在生成报告后,仅提交到了 VGCS (Git),没有调用 `persistence_client` 写入数据库。 +* **影响**: 数据库中没有记录,导致历史查询接口返回空列表。 +* **修正动作**: + * 在 `worker.rs` 的 `run_vgcs_based_generation` 函数末尾(生成 commit 后),构建 `NewAnalysisResult` 结构体。 + * 调用 `persistence_client.create_analysis_result`。 + * 注意:`NewAnalysisResult` 需要 `module_id` 和 `template_id`,目前这些信息在 `command` 中是可选的 (`Option`)。在 `message_consumer.rs` 中解析 `WorkflowTaskCommand` 时,我们已经提取了这些信息并放入了 `GenerateReportCommand`。需要确保传递链路完整。 + +### 2. 后端: Data Persistence Service (Query Update) +* **现状**: + * `AnalysisQuery` 结构体强制要求 `symbol: String`。 + * DB 查询 `get_analysis_results` 强制 `WHERE symbol = $1`。 +* **修正动作**: + * 修改 `AnalysisQuery`,将 `symbol` 改为 `Option`。 + * 修改 `get_analysis_results` SQL,使其根据 symbol 是否存在动态构建 `WHERE` 子句(或者使用 `COALESCE` 技巧,但动态构建更清晰)。 + * 在 SQL 中强制加上 `LIMIT 10`(或者通过参数控制,为了简化本次只做最近10条,建议硬编码默认值或加 `limit` 参数)。 + +### 3. 后端: API Gateway (Endpoint Update) +* **现状**: + * `AnalysisResultQuery` 结构体定义了 `symbol: String`。 + * `get_analysis_results_by_symbol` 处理器绑定了该 Query。 + * 没有 `GET /api/v1/analysis-results/:id` 接口。 +* **修正动作**: + * 修改 `AnalysisResultQuery` 为 `symbol: Option`。 + * 更新 `get_analysis_results` 处理器以适应可选参数。 + * 新增 `get_analysis_result_by_id` 处理器,代理请求到 persistence service。 + * **关键遗漏检查**: 确保 `utoipa` 的宏定义 (`#[utoipa::path(...)]`) 也同步更新,否则生成的 OpenAPI 文档不对,前端 schema 就不会更新。 + +### 4. 前端: Schema与API客户端 +* **现状**: `schema.gen.ts` 是自动生成的。 +* **动作**: + * 后端修改完并启动后,运行脚本 `scripts/update_api_spec.sh` (或类似机制) 重新生成 `openapi.json`。 + * 前端运行 `npm run openapi-ts` 更新 `schema.gen.ts`。 + * **确认**: `get_analysis_results` 的参数应变为可选,且新增 `get_analysis_result_by_id` 方法。 + +### 5. 前端: UI 组件 +* **Dashboard / Header**: + * 需要新增 `RecentReportsDropdown` 组件。 + * 逻辑:Mount 时调用 `api.get_analysis_results()` (无参),获取列表。 + * 渲染:下拉列表,点击项使用 `Link` 跳转。 +* **HistoricalReportPage**: + * 新增路由 `/history/:id` 在 `App.tsx`。 + * 组件逻辑:获取 `id` 参数 -> 调用 `api.get_analysis_result_by_id({ id })` -> 渲染 Markdown。 + * **复用**: 可以复用 `TaskDetailView` 中的 Markdown 渲染逻辑(样式一致性)。 + +## 风险评估与应对 +* **数据不一致**: 如果 Worker 写入 Git 成功但写入 DB 失败怎么办? + * *应对*: 记录 Error 日志。本次暂不引入复杂的分布式事务。DB 缺失仅导致历史列表少一条,不影响核心业务(报告已生成且在 Git 中)。 +* **Schema 类型不匹配**: 手动修改后端 struct 后,前端生成的 TS 类型可能报错。 + * *应对*: 严格按照 `common-contracts` 定义 DTO,确保 `utoipa` 宏准确描述 `Option` 类型。 + +## 执行结论 +计划可行。已识别关键遗漏(Worker 持久化缺失)。 + +**执行顺序**: +1. **Fix Worker Persistence** (最关键,确保新数据能进去) +2. **Update Persistence Service** (支持无 symbol 查询) +3. **Update API Gateway** (暴露接口) +4. **Update OpenAPI & Frontend Client** +5. **Implement Frontend UI** + +准备开始执行。 + diff --git a/docs/3_project_management/tasks/pending/20251128_unified_architecture_design.md b/docs/3_project_management/tasks/pending/20251128_unified_architecture_design.md new file mode 100644 index 0000000..2ab542d --- /dev/null +++ b/docs/3_project_management/tasks/pending/20251128_unified_architecture_design.md @@ -0,0 +1,137 @@ +# 全栈一致性与历史回放增强设计 (Unified Architecture Design) + +## 1. 背景与目标 + +当前系统存在三个主要割裂点: +1. **节点行为不一致**: Analysis 节点输出报告,而部分 Data Provider (YFinance/Mock) 仅抓取数据无可视化输出(虽已部分修复,但缺乏强制规范)。 +2. **实时与历史割裂**: 实时页面依赖 SSE 推送,历史页面缺乏状态恢复机制,导致无法查看 DAG 结构和执行日志。 +3. **日志展示分散**: 实时日志是全局流,难以对应到具体任务;历史日志未与 UI 深度集成。 + +**本设计旨在将上述问题合并为一个系统性工程,实现以下目标:** +* **后端标准化**: 所有节点必须通过统一 Trait 实现,强制产出 Markdown 报告和流式更新。 +* **前端统一化**: 使用同一套 `ReportPage` 逻辑处理“实时监控”和“历史回放”。 +* **上下文完整性**: 无论是实时还是历史,都能查看 DAG 状态、节点报告、以及执行日志 (`_execution.md`)。 + +--- + +## 2. 后端架构升级 (Backend Architecture) + +### 2.1. 统一节点运行时 (`WorkflowNode` Trait) + +引入强制性接口,确保所有 Worker 行为一致。 + +* **Trait 定义**: + ```rust + #[async_trait] + pub trait WorkflowNode { + async fn execute(&self, ctx: &NodeContext, config: &Value) -> Result; + fn render_report(&self, result: &NodeExecutionResult) -> Result; + } + ``` +* **`WorkflowNodeRunner` (Harness)**: + * **职责**: 负责 NATS 订阅、VGCS 读写、Git Commit、错误处理。 + * **增强**: + * 自动将 `execute` 产生的日志写入 `_execution.md`。 + * 自动推送 `TaskStreamUpdate` (包含 Report Markdown)。 + * 自动推送 `TaskLog` (带 `task_id` 的结构化日志,用于前端分流)。 + +### 2.2. 工作流快照持久化 (Snapshot Persistence) + +为了支持历史回放,Orchestrator 必须在工作流结束时保存“案发现场”。 + +* **触发时机**: `handle_task_completed` 检测到 Workflow 结束 (Completed/Failed)。 +* **保存内容**: `WorkflowStateSnapshot` (DAG 结构、每个 Task 的最终 Status、Output Commit Hash)。 +* **存储位置**: `data-persistence-service` -> `session_data` 表。 + * `data_type`: "workflow_snapshot" + * `request_id`: workflow_id +* **API**: `GET /api/v1/workflow/snapshot/{request_id}` (API Gateway 转发)。 + +--- + +## 3. 前端架构升级 (Frontend Architecture) + +### 3.1. 统一状态管理 (`useWorkflowStore`) + +改造 Store 以支持“双模式”加载。 + +* **State**: 增加 `mode: 'realtime' | 'historical'`。 +* **Action `initialize(id)`**: + 1. 重置 Store。 + 2. **尝试 SSE 连接** (`/api/v1/workflow/events/{id}`)。 + * 如果连接成功且收到 `WorkflowStarted` / `TaskStateChanged`,进入 **Realtime Mode**。 + 3. **并行/Fallback 调用 Snapshot API** (`/api/v1/workflow/snapshot/{id}`)。 + * 如果 SSE 连接失败(404/Closed,说明任务已结束),或者 Snapshot 返回了数据: + * 调用 `loadFromSnapshot(snapshot)` 填充 DAG 和 Task 状态。 + * 进入 **Historical Mode**。 + +### 3.2. 统一页面逻辑 (`ReportPage.tsx`) + +不再区分 `HistoricalReportPage`,统一使用 `ReportPage`。 + +* **DAG 可视化**: 复用 `WorkflowVisualizer`,数据源由 Store 提供(无论是 SSE 来的还是 Snapshot 来的)。 +* **状态指示**: + * 实时模式:显示 Spinner 和实时进度。 + * 历史模式:显示最终结果 Badge,并提示“Viewing History”。 + +### 3.3. 沉浸式报告与调试面板 (Immersive Report & Debugging Panel) + +为了提升用户体验,我们将摒弃原本“三等分标签页”的设计,采用 **"主视图 + 侧边栏"** 的布局策略,实现“隐形调试”。 + +* **主视图 (Main View - The Report)**: + * **定位**: 面向最终用户,强调阅读体验。 + * **布局**: 占据屏幕中央核心区域,无干扰展示 Markdown 渲染结果。 + * **状态栏**: 顶部仅保留最关键信息(任务状态 Badge、耗时)。 +* **调试面板 (Debug Panel - The Inspector)**: + * **定位**: 面向开发者和排查问题,默认隐藏。 + * **入口**: 顶部导航栏右侧的 "Terminal/Code" 图标按钮。 + * **交互**: 点击从屏幕右侧滑出 (Sheet/Drawer),支持拖拽调整宽度。 + * **内容结构**: 面板内采用 Tabs 组织调试信息: + 1. **Logs**: 聚合实时流日志与 `_execution.md` 回放。 + 2. **Context**: 文件系统快照 (Context Explorer),展示 Input/Output 文件及 Diff。 + 3. **Raw**: 任务原始配置 (Config) 与元数据 (Metadata)。 + +这样的设计实现了“一步到位”:普通用户完全感知不到调试页面的存在,而开发者只需一次点击即可获得所有深层上下文。 + +### 3.4. 历史记录入口 (`RecentReports`) + +* **组件**: `RecentReportsDropdown` (Header 区域)。 +* **逻辑**: 调用 `GET /api/v1/analysis-results?limit=10`。 +* **跳转**: 点击跳转到 `/report/{id}` (复用统一页面)。 + +--- + +## 4. 实施计划 (Implementation Plan) + +### Phase 1: 后端 - 统一运行时 (Backend Standardization) +1. 在 `common-contracts` 实现 `WorkflowNode` Trait 和 `WorkflowNodeRunner`。 +2. 重构 `yfinance`, `tushare`, `mock` Provider 使用新架构。 + * 确保它们都生成 `report.md` 和 `_execution.md`。 +3. 验证实时流推送是否包含正确的 `task_id` 日志。 + +### Phase 2: 后端 - 快照持久化 (Snapshot Persistence) +1. `workflow-orchestrator`: 结束时保存 `WorkflowStateSnapshot` 到 Session Data。 +2. `api-gateway`: 暴露 Snapshot 查询接口。 +3. `data-persistence`: 优化 `get_analysis_results` 支持全局最近 10 条查询。 + +### Phase 3: 前端 - 统一页面与日志 (Frontend Unification) +1. 改造 `useWorkflowStore` 支持 Snapshot Hydration。 +2. 改造 `ReportPage` 实现 SSE + Snapshot 双重加载策略。 +3. 改造 `TaskDetailView` 为“沉浸式”布局: + * 默认仅展示 Markdown Viewer。 + * 添加 Right Sheet 组件承载 Logs/Context。 +4. 实现 `RecentReportsDropdown`。 + +### Phase 4: 清理与验证 +1. 删除旧的 `HistoricalReportPage` 路由和组件。 +2. 验证全流程: + * **实时**: 启动新任务 -> 看到 DAG 生长 -> 看到节点实时 Log -> 看到 Report 生成。 + * **历史**: 点击下拉框 -> 加载旧任务 -> 看到完整 DAG -> 点击节点看到 Report -> 打开 Inspector 看到 Logs。 + +--- + +## 5. 废弃文档 + +本设计取代以下文档: +- `docs/3_project_management/tasks/pending/20251128_backend_unified_worker_trait.md` +- `tasks/pending/20251128_historical_playback_design.md` +- `docs/3_project_management/tasks/pending/20251128_historical_reports_design.md` diff --git a/docs/tasks/pending/20251129_refactor_remove_analysis_results.md b/docs/tasks/pending/20251129_refactor_remove_analysis_results.md new file mode 100644 index 0000000..185acfb --- /dev/null +++ b/docs/tasks/pending/20251129_refactor_remove_analysis_results.md @@ -0,0 +1,83 @@ +# 任务:重构历史记录与上下文管理(破坏性拆除与重建) + +## 1. 目标 +彻底移除旧的 `analysis_results` 表及其相关基础设施,建立基于 `workflow_history` + `VGCS` 的新一代历史记录与上下文管理系统。 + +## 2. 执行策略 +**破坏性拆除 (Destructive Refactoring)**:直接删除数据库表定义,依靠编译器报错和搜索工具定位并清除所有相关代码,确保不留死代码。 + +## 3. 详细步骤 + +### 阶段一:破坏性拆除 (Demolition) + +1. **数据库层** + * 删除 `analysis_results` 表的 SQL 定义 (Migration)。 + * 创建新的 Migration:`DROP TABLE analysis_results;`。 + * 删除 `AnalysisResult` 相关的 Model 定义 (`services/data-persistence-service/src/models.rs`, `common-contracts`). + +2. **持久化服务层 (Data Persistence Service)** + * 删除 `src/api/analysis.rs` (API Handler)。 + * 删除 `src/db/analysis_results.rs` (DB Access)。 + * 清理 `src/api/mod.rs` 路由注册。 + +3. **公共契约层 (Common Contracts)** + * 删除 `AnalysisResultDto`, `NewAnalysisResult` 等 DTO。 + * 删除 `PersistenceClient` 中的 `create_analysis_result`, `get_analysis_results` 方法。 + +4. **生产者层 (Report Generator Service)** + * 移除 `worker.rs` 中调用 `create_analysis_result` 的代码。 + * **保留**:文件写入 VGCS 的逻辑(这是我们新方案的基础)。 + +5. **网关与前端层 (API Gateway & Frontend)** + * 移除 API Gateway 中 `/analysis-results` 的转发。 + * 前端相关调用代码暂时注释或标记为 TODO (待对接新接口)。 + +### 阶段二:基础设施重建 (Reconstruction) + +1. **数据库层** + * 新建 `workflow_history` 表。 + * Schema 定义: + ```sql + CREATE TABLE workflow_history ( + request_id UUID PRIMARY KEY, + symbol VARCHAR(20) NOT NULL, + market VARCHAR(10) NOT NULL, + template_id VARCHAR(50), + status VARCHAR(20) NOT NULL, + start_time TIMESTAMPTZ NOT NULL, + end_time TIMESTAMPTZ, + snapshot_data JSONB NOT NULL -- 包含 DAG 结构、Commit Hash、Artifact Paths + ); + ``` + +2. **公共契约层 (Common Contracts)** + * 定义 `WorkflowHistoryDto` 和 `WorkflowSnapshot` 结构。 + * 更新 `PersistenceClient`,增加 `create_workflow_history` 和 `get_workflow_history` 方法。 + +3. **持久化服务层 (Data Persistence Service)** + * 实现 `src/api/history.rs` 和 `src/db/history.rs`。 + * 支持按 `request_id` 查询详情,按 `symbol`/`template_id` 查询列表。 + +### 阶段三:编排与集成 (Orchestration & Integration) + +1. **Orchestrator Service** + * **收集逻辑**:修改 `handle_task_completed`,从 Task Result 中收集 `artifact_paths` 和 `commit_hash`。 + * **结束逻辑**:实现 `finalize_workflow`。 + * 当工作流结束时,构建 `WorkflowSnapshot`。 + * 调用 `persistence-service` 写入 `workflow_history`。 + +2. **Frontend** + * 重写 `useHistory` Hook,对接 `/api/v1/history`。 + * 重写 `ReportPage`,使用 Snapshot 中的 Commit Hash + Path 通过 VGCS API 获取报告内容。 + +## 4. 验证标准 +1. **编译通过**:所有服务无 `analysis_results` 相关报错。 +2. **数据纯净**:数据库中无 `analysis_results` 表,只有 `workflow_history`。 +3. **功能正常**: + * 运行一个 Workflow,能在“历史记录”页看到**一条**记录。 + * 点击进入详情,能正确加载各步骤的报告文件(从 VGCS)。 + +## 5. 注意事项 +* 这是一个 Breaking Change,执行期间历史数据会不可见(直到前端适配新接口)。 +* VGCS 的读取接口 (`read_file`) 需要确保可用性,供前端/网关调用。 + diff --git a/frontend/package-lock.json b/frontend/package-lock.json index a9816b2..b5b9080 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -10,6 +10,8 @@ "dependencies": { "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.15", + "@radix-ui/react-dropdown-menu": "^2.1.16", + "@radix-ui/react-icons": "^1.3.2", "@radix-ui/react-navigation-menu": "^1.2.14", "@radix-ui/react-popover": "^1.1.15", "@radix-ui/react-progress": "^1.1.8", @@ -23,6 +25,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^1.1.1", + "date-fns": "^4.1.0", "elkjs": "^0.11.0", "lucide-react": "^0.554.0", "react": "^19.2.0", @@ -1477,6 +1480,35 @@ } } }, + "node_modules/@radix-ui/react-dropdown-menu": { + "version": "2.1.16", + "resolved": "http://npm.repo.lan/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz", + "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-menu": "2.1.16", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-focus-guards": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", @@ -1517,6 +1549,15 @@ } } }, + "node_modules/@radix-ui/react-icons": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-icons/-/react-icons-1.3.2.tgz", + "integrity": "sha512-fyQIhGDhzfc9pK2kH6Pl9c4BDJGfMkPqkyIgYDthyNYoNg3wVhoJMMh19WS4Up/1KMPFVpNsT2q3WmXn2N1m6g==", + "license": "MIT", + "peerDependencies": { + "react": "^16.x || ^17.x || ^18.x || ^19.0.0 || ^19.0.0-rc" + } + }, "node_modules/@radix-ui/react-id": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", @@ -1535,6 +1576,64 @@ } } }, + "node_modules/@radix-ui/react-menu": { + "version": "2.1.16", + "resolved": "http://npm.repo.lan/@radix-ui/react-menu/-/react-menu-2.1.16.tgz", + "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": { + "version": "1.2.3", + "resolved": "http://npm.repo.lan/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", + "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/@radix-ui/react-navigation-menu": { "version": "1.2.14", "resolved": "https://registry.npmjs.org/@radix-ui/react-navigation-menu/-/react-navigation-menu-1.2.14.tgz", @@ -4423,6 +4522,16 @@ "node": ">=12" } }, + "node_modules/date-fns": { + "version": "4.1.0", + "resolved": "http://npm.repo.lan/date-fns/-/date-fns-4.1.0.tgz", + "integrity": "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" + } + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", diff --git a/frontend/package.json b/frontend/package.json index 4edaba1..69ea00c 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,6 +13,8 @@ "dependencies": { "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-dialog": "^1.1.15", + "@radix-ui/react-dropdown-menu": "^2.1.16", + "@radix-ui/react-icons": "^1.3.2", "@radix-ui/react-navigation-menu": "^1.2.14", "@radix-ui/react-popover": "^1.1.15", "@radix-ui/react-progress": "^1.1.8", @@ -26,6 +28,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^1.1.1", + "date-fns": "^4.1.0", "elkjs": "^0.11.0", "lucide-react": "^0.554.0", "react": "^19.2.0", diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 01b106e..7f75ae7 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -6,6 +6,7 @@ import { ConfigPage } from '@/pages/config/ConfigPage'; import { Dashboard } from '@/pages/Dashboard'; import { ReportPage } from '@/pages/ReportPage'; +import { HistoricalReportPage } from '@/pages/HistoricalReportPage'; import { DocsPage } from '@/pages/DocsPage'; @@ -16,6 +17,7 @@ function App() { }> } /> } /> + } /> } /> } /> diff --git a/frontend/src/api/schema.gen.ts b/frontend/src/api/schema.gen.ts index 34557ec..b6d080a 100644 --- a/frontend/src/api/schema.gen.ts +++ b/frontend/src/api/schema.gen.ts @@ -33,6 +33,17 @@ export type LlmConfig = Partial<{ model_id: string | null; temperature: number | null; }>; +export type AnalysisResultDto = { + content: string; + created_at: string; + id: string; + meta_data: Value; + module_id: string; + request_id: string; + symbol: string; + template_id: string; +}; +export type Value = unknown; export type AnalysisTemplateSet = { modules: {}; name: string; @@ -190,13 +201,29 @@ export type WorkflowEvent = | { payload: { task_graph: WorkflowDag; + tasks_metadata: {}; tasks_output: {}; tasks_status: {}; timestamp: number; }; type: "WorkflowStateSnapshot"; }; +export type TaskMetadata = Partial<{ + execution_log_path: string | null; + output_path: string | null; +}>; +export const Value = z.unknown(); +export const AnalysisResultDto = z.object({ + content: z.string(), + created_at: z.string().datetime({ offset: true }), + id: z.string().uuid(), + meta_data: Value, + module_id: z.string(), + request_id: z.string().uuid(), + symbol: z.string(), + template_id: z.string(), +}); export const LlmConfig = z .object({ max_tokens: z.union([z.number(), z.null()]), @@ -364,6 +391,12 @@ export const TaskDependency = z.object({ from: z.string(), to: z.string(), }); +export const TaskMetadata = z + .object({ + execution_log_path: z.union([z.string(), z.null()]), + output_path: z.union([z.string(), z.null()]), + }) + .partial(); export const TaskStatus = z.enum([ "Pending", "Scheduled", @@ -463,6 +496,7 @@ export const WorkflowEvent = z.union([ payload: z .object({ task_graph: WorkflowDag, + tasks_metadata: z.record(TaskMetadata), tasks_output: z.record(z.union([z.string(), z.null()])), tasks_status: z.record(TaskStatus), timestamp: z.number().int(), @@ -474,6 +508,8 @@ export const WorkflowEvent = z.union([ ]); export const schemas = { + Value, + AnalysisResultDto, LlmConfig, SelectionMode, ContextSelectorConfig, @@ -505,6 +541,7 @@ export const schemas = { HealthStatus, StartWorkflowCommand, TaskDependency, + TaskMetadata, TaskStatus, TaskType, TaskNode, @@ -513,6 +550,41 @@ export const schemas = { }; export const endpoints = makeApi([ + { + method: "get", + path: "/api/v1/analysis-results", + alias: "get_analysis_results_by_symbol", + requestFormat: "json", + parameters: [ + { + name: "symbol", + type: "Query", + schema: z.string().optional(), + }, + ], + response: z.array(AnalysisResultDto), + }, + { + method: "get", + path: "/api/v1/analysis-results/:id", + alias: "get_analysis_result_by_id", + requestFormat: "json", + parameters: [ + { + name: "id", + type: "Path", + schema: z.string().uuid(), + }, + ], + response: AnalysisResultDto, + errors: [ + { + status: 404, + description: `Not found`, + schema: z.void(), + }, + ], + }, { method: "get", path: "/api/v1/configs/analysis_template_sets", diff --git a/frontend/src/components/RealtimeLogs.tsx b/frontend/src/components/RealtimeLogs.tsx deleted file mode 100644 index 7c665b5..0000000 --- a/frontend/src/components/RealtimeLogs.tsx +++ /dev/null @@ -1,75 +0,0 @@ -import { useState } from 'react'; -import { Terminal, ChevronUp, ChevronDown } from 'lucide-react'; -import { Card } from "@/components/ui/card"; -import { Button } from "@/components/ui/button"; -import { useAutoScroll } from '@/hooks/useAutoScroll'; -import { cn } from "@/lib/utils"; - -interface LogEntry { - taskId: string; - log: string; -} - -interface RealtimeLogsProps { - logs: LogEntry[]; - className?: string; -} - -export function RealtimeLogs({ logs, className }: RealtimeLogsProps) { - const [isExpanded, setIsExpanded] = useState(false); - const logsViewportRef = useAutoScroll(logs.length); - - const toggleExpand = () => { - setIsExpanded(!isExpanded); - }; - - return ( - -
-
- - Real-time Logs - - {/* Preview last log when collapsed */} - {!isExpanded && logs.length > 0 && ( -
- [{logs[logs.length - 1].taskId}] - {logs[logs.length - 1].log} -
- )} - {!isExpanded && logs.length === 0 && ( - Waiting for logs... - )} -
- - -
- - {/* Expanded Content */} -
-
-
- {logs.length === 0 && Waiting for logs...} - {logs.map((entry, i) => ( -
- [{entry.taskId}] - {entry.log} -
- ))} -
-
-
-
- ); -} - diff --git a/frontend/src/components/RecentReportsDropdown.tsx b/frontend/src/components/RecentReportsDropdown.tsx new file mode 100644 index 0000000..3549e38 --- /dev/null +++ b/frontend/src/components/RecentReportsDropdown.tsx @@ -0,0 +1,82 @@ +import { useState } from 'react'; +import { useNavigate } from 'react-router-dom'; +import { Button } from '@/components/ui/button'; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuTrigger, +} from '@/components/ui/dropdown-menu'; +import { History, Loader2 } from 'lucide-react'; + +interface AnalysisResultSummary { + id: string; + request_id: string; + symbol: string; + template_id: string; + created_at: string; +} + +export function RecentReportsDropdown() { + const [reports, setReports] = useState([]); + const [loading, setLoading] = useState(false); + const navigate = useNavigate(); + + const loadReports = async () => { + setLoading(true); + try { + // TEMPORARY: /api/v1/analysis-results removed for refactor + // const response = await fetch('/api/v1/analysis-results?limit=10'); + // if (response.ok) { + // const data = await response.json(); + // setReports(data); + // } + setReports([]); + } catch (e) { + console.error("Failed to load reports", e); + } finally { + setLoading(false); + } + }; + + return ( + { if(open) loadReports(); }}> + + + + + Recent Reports + + {loading ? ( +
+ +
+ ) : reports.length === 0 ? ( +
No recent reports
+ ) : ( + reports.map((report) => ( + navigate(`/report/${report.request_id}`)} + className="flex flex-col items-start gap-1 cursor-pointer py-3" + > +
+ {report.symbol} + {new Date(report.start_time).toLocaleDateString()} +
+
+ {report.template_id || 'Default'} + {report.status} +
+
+ )) + )} +
+
+ ); +} diff --git a/frontend/src/components/layout/Header.tsx b/frontend/src/components/layout/Header.tsx index b851c3f..f5e981b 100644 --- a/frontend/src/components/layout/Header.tsx +++ b/frontend/src/components/layout/Header.tsx @@ -1,12 +1,6 @@ import { Link, useLocation } from 'react-router-dom'; import { cn } from '@/lib/utils'; -import { - NavigationMenu, - NavigationMenuContent, - NavigationMenuItem, - NavigationMenuList, - NavigationMenuTrigger, -} from "@/components/ui/navigation-menu" +import { RecentReportsDropdown } from '../RecentReportsDropdown'; export function Header() { const location = useLocation(); @@ -28,21 +22,6 @@ export function Header() { > 首页 - - - - - 历史报告 - - -
-
最近的分析
-

暂无历史记录 (Mock)

-
-
-
-
-
- {/* User profile or system status could go here */} +
diff --git a/frontend/src/components/ui/dropdown-menu.tsx b/frontend/src/components/ui/dropdown-menu.tsx new file mode 100644 index 0000000..aa4f33d --- /dev/null +++ b/frontend/src/components/ui/dropdown-menu.tsx @@ -0,0 +1,198 @@ +import * as React from "react" +import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu" +import { cn } from "@/lib/utils" +import { CheckIcon, ChevronRightIcon, DotFilledIcon } from "@radix-ui/react-icons" + +const DropdownMenu = DropdownMenuPrimitive.Root + +const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger + +const DropdownMenuGroup = DropdownMenuPrimitive.Group + +const DropdownMenuPortal = DropdownMenuPrimitive.Portal + +const DropdownMenuSub = DropdownMenuPrimitive.Sub + +const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup + +const DropdownMenuSubTrigger = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, children, ...props }, ref) => ( + + {children} + + +)) +DropdownMenuSubTrigger.displayName = + DropdownMenuPrimitive.SubTrigger.displayName + +const DropdownMenuSubContent = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +DropdownMenuSubContent.displayName = + DropdownMenuPrimitive.SubContent.displayName + +const DropdownMenuContent = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, sideOffset = 4, ...props }, ref) => ( + + + +)) +DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName + +const DropdownMenuItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, ...props }, ref) => ( + svg]:size-4 [&>svg]:shrink-0", + inset && "pl-8", + className + )} + {...props} + /> +)) +DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName + +const DropdownMenuCheckboxItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, children, checked, ...props }, ref) => ( + + + + + + + {children} + +)) +DropdownMenuCheckboxItem.displayName = + DropdownMenuPrimitive.CheckboxItem.displayName + +const DropdownMenuRadioItem = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, children, ...props }, ref) => ( + + + + + + + {children} + +)) +DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName + +const DropdownMenuLabel = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef & { + inset?: boolean + } +>(({ className, inset, ...props }, ref) => ( + +)) +DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName + +const DropdownMenuSeparator = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + +)) +DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName + +const DropdownMenuShortcut = ({ + className, + ...props +}: React.HTMLAttributes) => { + return ( + + ) +} +DropdownMenuShortcut.displayName = "DropdownMenuShortcut" + +export { + DropdownMenu, + DropdownMenuTrigger, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuCheckboxItem, + DropdownMenuRadioItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuShortcut, + DropdownMenuGroup, + DropdownMenuPortal, + DropdownMenuSub, + DropdownMenuSubContent, + DropdownMenuSubTrigger, + DropdownMenuRadioGroup, +} diff --git a/frontend/src/pages/HistoricalReportPage.tsx b/frontend/src/pages/HistoricalReportPage.tsx new file mode 100644 index 0000000..1c98bf3 --- /dev/null +++ b/frontend/src/pages/HistoricalReportPage.tsx @@ -0,0 +1,495 @@ +import { useState, useEffect } from 'react'; +import { useParams, useSearchParams } from 'react-router-dom'; +import { Badge } from '@/components/ui/badge'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs" +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card" +import { WorkflowVisualizer } from '@/components/workflow/WorkflowVisualizer'; +import { ContextExplorer } from '@/components/workflow/ContextExplorer'; +import { useWorkflowStore } from '@/stores/useWorkflowStore'; +import { TaskStatus, schemas } from '@/api/schema.gen'; +import { Loader2, CheckCircle2, AlertCircle, Clock, PanelLeftClose, PanelLeftOpen, TerminalSquare, X } from 'lucide-react'; +import { Button } from '@/components/ui/button'; +import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; +import { useAnalysisTemplates } from "@/hooks/useConfig" +import { RecentReportsDropdown } from '@/components/RecentReportsDropdown'; +import { WorkflowStatus, ConnectionStatus, TaskState, TaskNode } from '@/types/workflow'; +import { Progress } from "@/components/ui/progress" +import { cn, formatNodeName } from '@/lib/utils'; + +export function HistoricalReportPage() { + const { id } = useParams(); + const [searchParams] = useSearchParams(); + const symbol = searchParams.get('symbol'); + const market = searchParams.get('market'); + const templateId = searchParams.get('templateId'); + const [isSidebarCollapsed, setIsSidebarCollapsed] = useState(false); + const [isWorkflowSticky, setIsWorkflowSticky] = useState(false); + + useEffect(() => { + const handleScroll = () => { + setIsWorkflowSticky(window.scrollY > 10); + }; + window.addEventListener('scroll', handleScroll); + return () => window.removeEventListener('scroll', handleScroll); + }, []); + + const { + initialize, + status, + mode, + loadFromSnapshot, + tasks, + dag, + activeTab, + setActiveTab + } = useWorkflowStore(); + + const { data: templates } = useAnalysisTemplates(); + const templateName = templates && templateId ? templates[templateId]?.name : templateId; + + // Initialization Logic - Historical Mode Only + useEffect(() => { + if (!id) return; + + // Initialize store but don't set status to Connecting + initialize(id); + + const loadSnapshot = async () => { + try { + const res = await fetch(`/api/v1/workflow/snapshot/${id}`); + if (res.ok) { + const snapshot = await res.json(); + loadFromSnapshot(snapshot.data_payload); + + // Rehydrate content for completed analysis tasks + const payload = snapshot.data_payload; + if (payload.task_graph?.nodes) { + payload.task_graph.nodes.forEach(async (node: TaskNode) => { + const status = payload.tasks_status?.[node.id]; + const outputCommit = payload.tasks_output?.[node.id]; + + // We need the output path to know what file to fetch. + // It should be injected into the config by the Orchestrator. + // @ts-ignore + const outputPath = node.config?.output_path; + + if (status === schemas.TaskStatus.enum.Completed && outputCommit && outputPath) { + try { + // Fetch the file content from the Context Inspector API + const contentUrl = `/api/context/${id}/blob/${outputCommit}/${outputPath}`; + const contentRes = await fetch(contentUrl); + if (contentRes.ok) { + const text = await contentRes.text(); + useWorkflowStore.getState().setTaskContent(node.id, text); + } + } catch (err) { + console.warn(`[Historical] Failed to load content for ${node.id}:`, err); + } + } + }); + } + } else { + console.error("Failed to load snapshot:", res.statusText); + } + } catch (e) { + console.error("Snapshot load failed:", e); + } + }; + + loadSnapshot(); + + // No SSE connection here + + }, [id, initialize, loadFromSnapshot]); + + // Include ALL nodes in tabs to allow debugging context for DataFetch tasks + const tabNodes = dag?.nodes || []; + + return ( +
+ {/* Header Area */} +
+
+

+ {symbol} + {market} + +

+
+ Request ID: {id} + {templateName && Template: {templateName}} +
+
+
+ + +
+
+ + {/* Main Content Grid */} +
+ {/* Left Col: Visualizer */} +
+ + setIsSidebarCollapsed(!isSidebarCollapsed)} + > + {!isSidebarCollapsed ? ( + Workflow Status + ) : ( +
+ Workflow Status +
+ )} + +
+ +
+ +
+
+
+
+ + {/* Right Col: Detail Tabs */} +
+ +
+ + + Overview + + {tabNodes.map((node: TaskNode) => ( + + {node.display_name || formatNodeName(node.name)} + + + ))} + +
+ + {/* Content Area */} +
+ + t.status === schemas.TaskStatus.enum.Completed).length} + /> + + + {tabNodes.map((node: TaskNode) => ( + + + + ))} +
+
+
+
+
+ ); +} + + +function OverviewTabContent({ status, tasks, totalTasks, completedTasks }: { + status: WorkflowStatus, + tasks: Record, + totalTasks: number, + completedTasks: number +}) { + const progress = totalTasks > 0 ? (completedTasks / totalTasks) * 100 : 0; + + // Find errors + const failedTasks = Object.entries(tasks).filter(([_, t]) => t.status === schemas.TaskStatus.enum.Failed); + + return ( +
+ {/* Hero Status */} + + +
+ {status === schemas.TaskStatus.enum.Completed ? ( + + ) : status === schemas.TaskStatus.enum.Failed ? ( + + ) : ( + + )} +
+ + {status === schemas.TaskStatus.enum.Completed ? "Analysis Completed" : + status === schemas.TaskStatus.enum.Failed ? "Analysis Failed" : + "Analysis In Progress"} + +
+ +
+
+ Overall Progress + {Math.round(progress)}% ({completedTasks}/{totalTasks} tasks) +
+ +
+ + {/* Failed Tasks Warning */} + {failedTasks.length > 0 && ( +
+ +
+

Some tasks failed:

+
    + {failedTasks.map(([id, t]) => ( +
  • + {id}: {t.message || "Unknown error"} +
  • + ))} +
+
+
+ )} +
+
+ + {/* Stats Grid */} +
+ + + Total Tasks + + +
{totalTasks}
+
+
+ + + Completed + + +
{completedTasks}
+
+
+ + + Duration + + +
+ + --:-- +
+
+
+
+
+ ) +} + +function TaskDetailView({ task, requestId, mode: _mode }: { task?: TaskState, requestId?: string, mode: 'realtime' | 'historical' }) { + const [isInspectorOpen, setIsInspectorOpen] = useState(false); + + // Only show context if we have commits + const hasContext = task?.inputCommit || task?.outputCommit; + + return ( +
+ {/* Main Report View */} +
+
+
+ {task?.content ? ( + + {task.content || ''} + + ) : ( +
+ {task?.status === schemas.TaskStatus.enum.Pending &&

Waiting to start...

} + {task?.status === schemas.TaskStatus.enum.Running && !task?.content && } + {task?.status === schemas.TaskStatus.enum.Failed && ( +
+ +

Task Failed

+

{task.message}

+
+ )} +
+ )} + {task?.status === schemas.TaskStatus.enum.Running && ( + + )} +
+
+
+ + {/* Inspector Toggle (Floating) */} +
+ +
+ + {/* Inspector Panel (Right Side Sheet) */} +
+
+

+ + Task Inspector +

+ +
+ + +
+ + Logs + {hasContext && ( + Context + )} + Metadata + +
+ + + {task?.logs && task.logs.length > 0 ? ( +
+ {task.logs.map((log, i) => ( +
{log}
+ ))} +
+ ) : ( +
+ No logs available +
+ )} + {/* TODO: Add support for loading _execution.md in historical mode */} +
+ + + {requestId && (task?.inputCommit || task?.outputCommit) && ( + + )} + + + +
+                            {JSON.stringify({
+                                status: task?.status,
+                                progress: task?.progress,
+                                message: task?.message,
+                                inputCommit: task?.inputCommit,
+                                outputCommit: task?.outputCommit
+                            }, null, 2)}
+                        
+
+
+
+
+ ); +} + +function WorkflowStatusBadge({ status, mode }: { status: WorkflowStatus, mode: 'realtime' | 'historical' }) { + const content = ( +
+ {status === schemas.TaskStatus.enum.Running && } + {status} + {mode === 'historical' && ( + + Historical + + )} +
+ ); + + if (status === schemas.TaskStatus.enum.Running) { + return {content}; + } + if (status === schemas.TaskStatus.enum.Completed) { + return {content}; + } + if (status === schemas.TaskStatus.enum.Failed) { + return {content}; + } + if (status === ConnectionStatus.Connecting) { + return CONNECTING...; + } + + return {content}; +} + +function TaskStatusIndicator({ status }: { status: TaskStatus }) { + switch (status) { + case schemas.TaskStatus.enum.Running: return ; + case schemas.TaskStatus.enum.Completed: return ; + case schemas.TaskStatus.enum.Failed: return
; + default: return null; + } +} + diff --git a/frontend/src/pages/ReportPage.tsx b/frontend/src/pages/ReportPage.tsx index a5638b9..cfe7d09 100644 --- a/frontend/src/pages/ReportPage.tsx +++ b/frontend/src/pages/ReportPage.tsx @@ -7,20 +7,18 @@ import { WorkflowVisualizer } from '@/components/workflow/WorkflowVisualizer'; import { ContextExplorer } from '@/components/workflow/ContextExplorer'; import { useWorkflowStore } from '@/stores/useWorkflowStore'; import { TaskStatus, schemas } from '@/api/schema.gen'; -import { Loader2, CheckCircle2, AlertCircle, Clock, PanelLeftClose, PanelLeftOpen, FileText, GitBranch, ArrowRight } from 'lucide-react'; +import { Loader2, CheckCircle2, AlertCircle, Clock, PanelLeftClose, PanelLeftOpen, FileText, GitBranch, TerminalSquare, X, List, Trash2 } from 'lucide-react'; import { Button } from '@/components/ui/button'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import { useAnalysisTemplates } from "@/hooks/useConfig" -import { RealtimeLogs } from '@/components/RealtimeLogs'; +import { RecentReportsDropdown } from '@/components/RecentReportsDropdown'; import { WorkflowStatus, ConnectionStatus, TaskState } from '@/types/workflow'; import { Progress } from "@/components/ui/progress" import { cn, formatNodeName } from '@/lib/utils'; -import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog"; export function ReportPage() { const { id } = useParams(); - // ... (rest of the imports) const [searchParams] = useSearchParams(); const symbol = searchParams.get('symbol'); const market = searchParams.get('market'); @@ -30,9 +28,6 @@ export function ReportPage() { useEffect(() => { const handleScroll = () => { - // Detect if user has scrolled down enough to trigger visual sticky change - // Header (64) + Padding (16) = 80px. - // We add a small threshold to avoid flickering setIsWorkflowSticky(window.scrollY > 10); }; window.addEventListener('scroll', handleScroll); @@ -43,6 +38,8 @@ export function ReportPage() { initialize, handleEvent, status, + mode, + loadFromSnapshot, tasks, dag, activeTab, @@ -52,38 +49,56 @@ export function ReportPage() { const { data: templates } = useAnalysisTemplates(); const templateName = templates && templateId ? templates[templateId]?.name : templateId; - // SSE Connection Logic + // Initialization & Connection Logic useEffect(() => { if (!id) return; initialize(id); - // Connect to real backend SSE - const eventSource = new EventSource(`/api/v1/workflow/events/${id}`); - - eventSource.onmessage = (event) => { + let eventSource: EventSource | null = null; + + // 1. Attempt to load snapshot (Parallel / Fallback) + // If the workflow is already finished, SSE might close immediately or 404. + const loadSnapshot = async () => { try { - const parsedEvent = JSON.parse(event.data); - handleEvent(parsedEvent); + const res = await fetch(`/api/v1/workflow/snapshot/${id}`); + if (res.ok) { + const snapshot = await res.json(); + loadFromSnapshot(snapshot.data_payload); + } } catch (e) { - console.error("Failed to parse SSE event:", e); + console.warn("Snapshot load failed (normal for new tasks):", e); } }; - eventSource.onerror = (err) => { - console.error("SSE Connection Error:", err); - // Optional: Retry logic or error state update - // eventSource.close(); - }; + loadSnapshot(); + + // 2. Connect to Real-time Stream + try { + eventSource = new EventSource(`/api/v1/workflow/events/${id}`); + + eventSource.onmessage = (event) => { + try { + const parsedEvent = JSON.parse(event.data); + handleEvent(parsedEvent); + } catch (e) { + console.error("Failed to parse SSE event:", e); + } + }; + + eventSource.onerror = (err) => { + // Standard behavior: if connection closes, it might be finished or failed. + // We rely on Snapshot for history if SSE fails. + console.warn("SSE Connection Closed/Error", err); + eventSource?.close(); + }; + } catch (e) { + console.error("Failed to init SSE:", e); + } return () => { - eventSource.close(); + eventSource?.close(); }; - }, [id, initialize, handleEvent]); - - // Combine logs from all tasks for the "Global Log" view - const allLogs = useMemo(() => Object.entries(tasks).flatMap(([taskId, state]) => - state.logs.map(log => ({ taskId, log })) - ), [tasks]); + }, [id, initialize, handleEvent, loadFromSnapshot]); // Include ALL nodes in tabs to allow debugging context for DataFetch tasks const tabNodes = dag?.nodes || []; @@ -96,7 +111,7 @@ export function ReportPage() {

{symbol} {market} - +

Request ID: {id} @@ -104,12 +119,37 @@ export function ReportPage() {
+ +
{/* Main Content Grid */} -
+
{/* Left Col: Visualizer */}
{/* Right Col: Detail Tabs */} -
- +
+
{/* Content Area */} -
- +
+ {tabNodes.map(node => ( - - + + ))}
- -
); } @@ -238,7 +276,6 @@ function OverviewTabContent({ status, tasks, totalTasks, completedTasks }: { totalTasks: number, completedTasks: number }) { - // ... (implementation remains same) const progress = totalTasks > 0 ? (completedTasks / totalTasks) * 100 : 0; // Find errors @@ -326,40 +363,155 @@ function OverviewTabContent({ status, tasks, totalTasks, completedTasks }: { ) } -function TaskDetailView({ task, requestId }: { task?: TaskState, requestId?: string }) { - // Only show context tab if we have commits +function TaskDetailView({ taskId, task, requestId, mode }: { taskId: string, task?: TaskState, requestId?: string, mode: 'realtime' | 'historical' }) { + const [isInspectorOpen, setIsInspectorOpen] = useState(false); + const { setTaskContent } = useWorkflowStore(); + + // Fetch content for historical tasks if missing + useEffect(() => { + if (!requestId || !task || !task.outputCommit) return; + + // Only proceed if content is missing and task is finished (or has output commit) + if (task.content || (task.status !== schemas.TaskStatus.enum.Completed && task.status !== schemas.TaskStatus.enum.Failed)) { + return; + } + + const fetchContent = async () => { + try { + let targetFile = null; + + // Strategy 1: Use Metadata (Preferred) + // Now task.metadata is strongly typed as TaskMetadata from generated schema + if (task.metadata && task.metadata.output_path) { + targetFile = task.metadata.output_path; + } + // Strategy 2: Infer from Diff (Fallback) + else if (task.inputCommit) { + const diffRes = await fetch(`/api/context/${requestId}/diff/${task.inputCommit}/${task.outputCommit}`); + if (diffRes.ok) { + const changes = await diffRes.json(); + const files: string[] = changes.map((c: any) => c.Added || c.Modified).filter(Boolean); + + // Heuristic to find the "Main Report" or "Output" + const reportFile = files.find((f: string) => f.endsWith('.md') && !f.endsWith('_execution.md') && !f.endsWith('_trace.md')); + const execFile = files.find((f: string) => f.endsWith('_execution.md')); + const anyMd = files.find((f: string) => f.endsWith('.md')); + const anyJson = files.find((f: string) => f.endsWith('.json')); + + targetFile = reportFile || execFile || anyMd || anyJson || files[0]; + } + } + + if (targetFile) { + const contentRes = await fetch(`/api/context/${requestId}/blob/${task.outputCommit}/${encodeURIComponent(targetFile)}`); + if (contentRes.ok) { + const text = await contentRes.text(); + + if (targetFile.endsWith('.json')) { + try { + const obj = JSON.parse(text); + setTaskContent(taskId, JSON.stringify(obj, null, 2)); + } catch { + setTaskContent(taskId, text); + } + } else { + setTaskContent(taskId, text); + } + } + } + } catch (e) { + console.error("Auto-fetch content failed", e); + } + }; + + fetchContent(); + + }, [requestId, taskId, task?.outputCommit, task?.inputCommit, task?.status, task?.content, task?.metadata, setTaskContent]); + + // Only show context if we have commits const hasContext = task?.inputCommit || task?.outputCommit; - if (task?.status === schemas.TaskStatus.enum.Failed && !task.content) { - return ( -
- - {hasContext && ( -
- - - Report Content - - - Context Inspector - - -
- )} - -
- -

Analysis Failed

-
-

The task encountered an error and could not complete.

-

- {task.message || "Unknown error occurred."} -

+ return ( +
+ {/* Main Report View */} +
+
+
+ {task?.content ? ( + + {task.content || ''} + + ) : ( +
+ {task?.status === schemas.TaskStatus.enum.Pending &&

Waiting to start...

} + {task?.status === schemas.TaskStatus.enum.Running && !task?.content && } + {task?.status === schemas.TaskStatus.enum.Failed && ( +
+ +

Task Failed

+

{task.message}

+
+ )}
-
+ )} + {task?.status === schemas.TaskStatus.enum.Running && ( + + )} +
+
+
+ + {/* Inspector Toggle (Floating) */} +
+ +
+ + {/* Inspector Panel (Right Side Sheet) */} +
+
+

+ + Task Inspector +

+ +
+ + +
+ + Logs + {hasContext && ( + Context + )} + Metadata + +
+ + + {task?.logs && task.logs.length > 0 ? ( +
+ {task.logs.map((log, i) => ( +
{log}
+ ))} +
+ ) : ( +
+ No logs available +
+ )} + {/* TODO: Add support for loading _execution.md in historical mode */}
- - {requestId && (task.inputCommit || task.outputCommit) && ( + + + {requestId && (task?.inputCommit || task?.outputCommit) && ( )} + + +
+                            {JSON.stringify({
+                                status: task?.status,
+                                progress: task?.progress,
+                                message: task?.message,
+                                inputCommit: task?.inputCommit,
+                                outputCommit: task?.outputCommit
+                            }, null, 2)}
+                        
+
- ); - } - - return ( -
- - {hasContext && ( -
- - - Report Content - - - Context Inspector - - -
- )} - - -
-
- {task?.content ? ( - - {task.content || ''} - - ) : ( -
- {task?.status === schemas.TaskStatus.enum.Pending &&

Waiting to start...

} - {task?.status === schemas.TaskStatus.enum.Running && !task?.content && } -
- )} - {task?.status === schemas.TaskStatus.enum.Running && ( - - )} -
-
-
- - - {requestId && (task?.inputCommit || task?.outputCommit) && ( - - )} - -
); } -function WorkflowStatusBadge({ status }: { status: WorkflowStatus }) { +function WorkflowStatusBadge({ status, mode }: { status: WorkflowStatus, mode: 'realtime' | 'historical' }) { // Map local store status to TaskStatus enum for consistency where possible - // These comparisons are now type-safe against the WorkflowStatus literal union type + const content = ( +
+ {status === schemas.TaskStatus.enum.Running && } + {status} + {mode === 'historical' && ( + + Historical + + )} +
+ ); + if (status === schemas.TaskStatus.enum.Running) { - return ( - - - {schemas.TaskStatus.enum.Running} - - ); + return {content}; } - if (status === schemas.TaskStatus.enum.Completed) { - return ( - - {schemas.TaskStatus.enum.Completed} - - ); + return {content}; } - if (status === schemas.TaskStatus.enum.Failed) { - return ( - - {schemas.TaskStatus.enum.Failed} - - ); + return {content}; } - if (status === ConnectionStatus.Connecting) { - return CONNECTING; + return CONNECTING...; } - return {status}; + return {content}; } function TaskStatusIndicator({ status }: { status: TaskStatus }) { diff --git a/frontend/src/stores/useWorkflowStore.ts b/frontend/src/stores/useWorkflowStore.ts index ff6acd1..590c3fb 100644 --- a/frontend/src/stores/useWorkflowStore.ts +++ b/frontend/src/stores/useWorkflowStore.ts @@ -5,6 +5,7 @@ import { WorkflowDag, TaskState, TaskStatus, WorkflowEvent, WorkflowStatus, Conn interface WorkflowStoreState { requestId: string | null; status: WorkflowStatus; + mode: 'realtime' | 'historical'; dag: WorkflowDag | null; tasks: Record; error: string | null; @@ -12,6 +13,7 @@ interface WorkflowStoreState { // Actions initialize: (requestId: string) => void; + setMode: (mode: 'realtime' | 'historical') => void; setDag: (dag: WorkflowDag) => void; updateTaskStatus: (taskId: string, status: TaskStatus, message?: string, progress?: number, inputCommit?: string, outputCommit?: string) => void; updateTaskContent: (taskId: string, delta: string) => void; // Stream content (append) @@ -21,12 +23,14 @@ interface WorkflowStoreState { completeWorkflow: (result: unknown) => void; failWorkflow: (reason: string) => void; handleEvent: (event: WorkflowEvent) => void; + loadFromSnapshot: (snapshotPayload: any) => void; reset: () => void; } export const useWorkflowStore = create((set, get) => ({ requestId: null, status: ConnectionStatus.Idle, + mode: 'realtime', dag: null, tasks: {}, error: null, @@ -35,11 +39,14 @@ export const useWorkflowStore = create((set, get) => ({ initialize: (requestId) => set({ requestId, status: ConnectionStatus.Connecting, + mode: 'realtime', error: null, tasks: {}, activeTab: 'overview' }), + setMode: (mode) => set({ mode }), + setDag: (dag) => { // Initialize tasks based on DAG const initialTasks: Record = {}; @@ -155,16 +162,15 @@ export const useWorkflowStore = create((set, get) => ({ handleEvent: (event: WorkflowEvent) => { const state = get(); - console.log('Handling Event:', event.type, event); + // console.log('Handling Event:', event.type, event); switch (event.type) { case 'WorkflowStarted': state.setDag(event.payload.task_graph); break; case 'TaskStateChanged': { - // Explicit typing to help TS const p = event.payload; - // @ts-ignore - input_commit/output_commit added + // @ts-ignore state.updateTaskStatus( p.task_id, p.status, @@ -180,7 +186,7 @@ export const useWorkflowStore = create((set, get) => ({ state.updateTaskContent(p.task_id, p.content_delta); break; } - // @ts-ignore - TaskLog is manually added to schema.gen.ts + // @ts-ignore case 'TaskLog': { const p = event.payload; const time = new Date(p.timestamp).toLocaleTimeString(); @@ -197,7 +203,7 @@ export const useWorkflowStore = create((set, get) => ({ break; } case 'WorkflowStateSnapshot': { - // Re-hydrate state + // Used for real-time rehydration (e.g. page refresh) if (event.payload.task_graph) { state.setDag(event.payload.task_graph); } @@ -216,8 +222,17 @@ export const useWorkflowStore = create((set, get) => ({ if (event.payload.tasks_output) { Object.entries(event.payload.tasks_output).forEach(([taskId, outputCommit]) => { if (newTasks[taskId] && outputCommit) { - // Correctly mapping outputCommit, not content - newTasks[taskId] = { ...newTasks[taskId], outputCommit: outputCommit }; + newTasks[taskId] = { ...newTasks[taskId], outputCommit: outputCommit as string }; + } + }); + } + + if (event.payload.tasks_metadata) { + Object.entries(event.payload.tasks_metadata).forEach(([taskId, metadata]) => { + if (newTasks[taskId] && metadata) { + // Note: The generated client types define metadata as TaskMetadata which includes optional paths. + // We store it directly as it matches our TaskState.metadata shape partially. + newTasks[taskId] = { ...newTasks[taskId], metadata: metadata }; } }); } @@ -228,9 +243,40 @@ export const useWorkflowStore = create((set, get) => ({ } }, + loadFromSnapshot: (payload: any) => { + const dag = payload.task_graph; + const tasks_status = payload.tasks_status; + const tasks_output = payload.tasks_output; + const tasks_metadata = payload.tasks_metadata; + + const newTasks: Record = {}; + + if (dag) { + dag.nodes.forEach((node: any) => { + newTasks[node.id] = { + status: tasks_status?.[node.id] || node.initial_status, + logs: [], + progress: 100, + content: '', // Content is not in snapshot, needs on-demand loading + outputCommit: tasks_output?.[node.id], + metadata: tasks_metadata?.[node.id] + }; + }); + } + + set({ + dag, + tasks: newTasks, + status: schemas.TaskStatus.enum.Completed, + mode: 'historical', + error: null + }); + }, + reset: () => set({ requestId: null, status: ConnectionStatus.Idle, + mode: 'realtime', dag: null, tasks: {}, error: null, diff --git a/frontend/src/types/workflow.ts b/frontend/src/types/workflow.ts index dcefe0c..1dbce30 100644 --- a/frontend/src/types/workflow.ts +++ b/frontend/src/types/workflow.ts @@ -44,4 +44,5 @@ export interface TaskState { // Context Inspector inputCommit?: string; outputCommit?: string; + metadata?: import('../api/schema.gen').TaskMetadata; // (New) Metadata from backend } diff --git a/openapi.json b/openapi.json index abc6246..d66b041 100644 --- a/openapi.json +++ b/openapi.json @@ -9,6 +9,77 @@ "version": "0.1.0" }, "paths": { + "/api/v1/analysis-results": { + "get": { + "tags": [ + "api" + ], + "summary": "[GET /v1/analysis-results?symbol=...]", + "operationId": "get_analysis_results_by_symbol", + "parameters": [ + { + "name": "symbol", + "in": "query", + "description": "Optional symbol to filter results", + "required": false, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "List of analysis results", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AnalysisResultDto" + } + } + } + } + } + } + } + }, + "/api/v1/analysis-results/{id}": { + "get": { + "tags": [ + "api" + ], + "summary": "[GET /api/v1/analysis-results/:id]", + "operationId": "get_analysis_result_by_id", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Analysis result ID", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "200": { + "description": "Analysis result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AnalysisResultDto" + } + } + } + }, + "404": { + "description": "Not found" + } + } + } + }, "/api/v1/configs/analysis_template_sets": { "get": { "tags": [ @@ -460,6 +531,50 @@ }, "additionalProperties": false }, + "AnalysisResultDto": { + "type": "object", + "description": "Represents a persisted analysis result read from the database.", + "required": [ + "id", + "request_id", + "symbol", + "template_id", + "module_id", + "content", + "meta_data", + "created_at" + ], + "properties": { + "content": { + "type": "string" + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "id": { + "type": "string", + "format": "uuid" + }, + "meta_data": { + "$ref": "#/components/schemas/Value" + }, + "module_id": { + "type": "string" + }, + "request_id": { + "type": "string", + "format": "uuid" + }, + "symbol": { + "type": "string" + }, + "template_id": { + "type": "string" + } + }, + "additionalProperties": false + }, "AnalysisTemplateSet": { "type": "object", "description": "A single, self-contained set of analysis modules representing a complete workflow.\ne.g., \"Standard Fundamental Analysis\"", @@ -1021,6 +1136,27 @@ }, "additionalProperties": false }, + "TaskMetadata": { + "type": "object", + "description": "Metadata produced by a task execution.", + "properties": { + "execution_log_path": { + "type": [ + "string", + "null" + ], + "description": "The execution trace log path" + }, + "output_path": { + "type": [ + "string", + "null" + ], + "description": "The primary output file path (e.g. analysis report)" + } + }, + "additionalProperties": false + }, "TaskNode": { "type": "object", "required": [ @@ -1156,6 +1292,7 @@ }, "additionalProperties": false }, + "Value": {}, "WorkflowDag": { "type": "object", "required": [ @@ -1427,12 +1564,22 @@ "timestamp", "task_graph", "tasks_status", - "tasks_output" + "tasks_output", + "tasks_metadata" ], "properties": { "task_graph": { "$ref": "#/components/schemas/WorkflowDag" }, + "tasks_metadata": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/TaskMetadata" + }, + "propertyNames": { + "type": "string" + } + }, "tasks_output": { "type": "object", "additionalProperties": { diff --git a/services/alphavantage-provider-service/Cargo.lock b/services/alphavantage-provider-service/Cargo.lock index 57d9704..dbc4ba9 100644 --- a/services/alphavantage-provider-service/Cargo.lock +++ b/services/alphavantage-provider-service/Cargo.lock @@ -252,6 +252,16 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -296,6 +306,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -343,6 +355,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] @@ -829,6 +842,34 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.12" @@ -893,6 +934,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.3.1" @@ -1196,6 +1243,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1229,6 +1286,46 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1403,6 +1500,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-src" +version = "300.5.4+3.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.111" @@ -1411,6 +1517,7 @@ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -2112,6 +2219,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -3041,6 +3157,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3174,6 +3300,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -3415,6 +3550,22 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "workflow-context" +version = "0.1.0" +dependencies = [ + "anyhow", + "git2", + "globset", + "hex", + "regex", + "serde", + "serde_json", + "sha2", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/services/alphavantage-provider-service/src/worker.rs b/services/alphavantage-provider-service/src/worker.rs index a6c2f16..1f87262 100644 --- a/services/alphavantage-provider-service/src/worker.rs +++ b/services/alphavantage-provider-service/src/worker.rs @@ -414,6 +414,7 @@ mod integration_tests { symbol: CanonicalSymbol::new("IBM", &Market::US), market: "US".to_string(), template_id: Some("default".to_string()), + output_path: None, }; // 4. NATS diff --git a/services/api-gateway/Cargo.lock b/services/api-gateway/Cargo.lock index d970ca9..d12d052 100644 --- a/services/api-gateway/Cargo.lock +++ b/services/api-gateway/Cargo.lock @@ -289,6 +289,16 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -333,6 +343,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -380,6 +392,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] @@ -814,6 +827,34 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.12" @@ -878,6 +919,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.3.1" @@ -1175,6 +1222,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1208,6 +1265,34 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + [[package]] name = "libz-rs-sys" version = "0.5.2" @@ -1217,6 +1302,18 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1411,6 +1508,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-src" +version = "300.5.4+3.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.111" @@ -1419,6 +1525,7 @@ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -3459,6 +3566,22 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "workflow-context" +version = "0.1.0" +dependencies = [ + "anyhow", + "git2", + "globset", + "hex", + "regex", + "serde", + "serde_json", + "sha2", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/services/api-gateway/Dockerfile b/services/api-gateway/Dockerfile index c3a2de8..9492e24 100644 --- a/services/api-gateway/Dockerfile +++ b/services/api-gateway/Dockerfile @@ -1,10 +1,11 @@ # 1. Build Stage -FROM rust:1.90 as builder +FROM rust:1.90-bookworm as builder WORKDIR /usr/src/app # Deterministic dependency caching without shipping a stub binary COPY ./services/common-contracts /usr/src/app/services/common-contracts +COPY ./crates/workflow-context /usr/src/app/crates/workflow-context COPY ./services/api-gateway/Cargo.toml ./services/api-gateway/Cargo.lock* ./services/api-gateway/ WORKDIR /usr/src/app/services/api-gateway # Copy the full source code and build the final binary (Debug mode for speed) diff --git a/services/api-gateway/src/api.rs b/services/api-gateway/src/api.rs index cee2f41..1f59abb 100644 --- a/services/api-gateway/src/api.rs +++ b/services/api-gateway/src/api.rs @@ -11,6 +11,7 @@ use common_contracts::config_models::{ AnalysisTemplateSets, DataSourceProvider, DataSourcesConfig, LlmProvider, LlmProvidersConfig, }; +use common_contracts::dtos::{SessionDataDto, WorkflowHistoryDto, WorkflowHistorySummaryDto}; use common_contracts::messages::GenerateReportCommand; use common_contracts::observability::{TaskProgress, ObservabilityTaskStatus}; use common_contracts::registry::ProviderMetadata; @@ -49,10 +50,10 @@ pub struct AnalysisRequest { pub template_id: String, } -#[derive(Deserialize)] -pub struct AnalysisResultQuery { - pub symbol: String, -} +// #[derive(Deserialize)] +// pub struct AnalysisResultQuery { +// pub symbol: Option, +// } #[api_dto] pub struct SymbolResolveRequest { @@ -66,6 +67,12 @@ pub struct SymbolResolveResponse { pub market: String, } +#[derive(Deserialize)] +pub struct WorkflowHistoryQuery { + pub symbol: Option, + pub limit: Option, +} + // --- Dynamic Schema Structs (Replaced by Dynamic Registry) --- // Legacy endpoint /configs/data_sources/schema removed. @@ -127,6 +134,19 @@ async fn mock_models() -> impl IntoResponse { use common_contracts::messages::{StartWorkflowCommand, SyncStateCommand, WorkflowEvent}; +/// [DELETE /v1/system/history] +#[utoipa::path( + delete, + path = "/api/v1/system/history", + responses( + (status = 204, description = "History cleared") + ) +)] +async fn clear_history(State(state): State) -> Result { + state.persistence_client.clear_history().await?; + Ok(StatusCode::NO_CONTENT) +} + fn create_v1_router() -> Router { Router::new() // Mock LLM for E2E @@ -135,7 +155,10 @@ fn create_v1_router() -> Router { // New Workflow API .route("/workflow/start", post(start_workflow)) .route("/workflow/events/{request_id}", get(workflow_events_stream)) + .route("/workflow/snapshot/{request_id}", get(get_workflow_snapshot)) .route("/workflow/{request_id}/graph", get(get_workflow_graph_proxy)) + // System + .route("/system/history", axum::routing::delete(clear_history)) // Tools .route("/tools/resolve-symbol", post(resolve_symbol)) // Legacy routes (marked for removal or compatibility) @@ -146,7 +169,14 @@ fn create_v1_router() -> Router { "/analysis-requests/{symbol}", post(trigger_analysis_generation), ) - .route("/analysis-results", get(get_analysis_results_by_symbol)) + .route( + "/history", + get(get_workflow_histories), + ) + .route( + "/history/{request_id}", + get(get_workflow_history_by_id), + ) .route("/companies/{symbol}/profile", get(get_company_profile)) .route( "/market-data/financial-statements/{symbol}", @@ -393,6 +423,35 @@ async fn start_workflow( )) } +/// [GET /v1/workflow/snapshot/:request_id] +#[utoipa::path( + get, + path = "/api/v1/workflow/snapshot/{request_id}", + params( + ("request_id" = Uuid, Path, description = "Workflow Request ID") + ), + responses( + (status = 200, description = "Workflow snapshot", body = SessionDataDto), + (status = 404, description = "Snapshot not found") + ) +)] +async fn get_workflow_snapshot( + State(state): State, + Path(request_id): Path, +) -> Result { + let snapshots = state.persistence_client.get_session_data(request_id, None, None).await?; + + if let Some(snapshot) = snapshots.into_iter().next() { + Ok((StatusCode::OK, Json(snapshot)).into_response()) + } else { + Ok(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"error": "Snapshot not found"})), + ) + .into_response()) + } +} + /// [GET /v1/workflow/events/:request_id] /// SSE endpoint that proxies events from NATS to the frontend. async fn workflow_events_stream( @@ -460,6 +519,9 @@ async fn proxy_get_session_data( State(_state): State, Path(_request_id): Path, ) -> Result { + // Deprecated route, but if we wanted to implement it: + // let data = state.persistence_client.get_session_data(request_id, None, None).await?; + // Ok(Json(data)) Ok(( StatusCode::NOT_IMPLEMENTED, Json(serde_json::json!({"error": "Not implemented"})), @@ -530,18 +592,55 @@ async fn trigger_analysis_generation( )) } -/// [GET /v1/analysis-results?symbol=...] -async fn get_analysis_results_by_symbol( +// --- New Handlers for Workflow History --- + +/// [GET /v1/history] +#[utoipa::path( + get, + path = "/api/v1/history", + params( + ("symbol" = Option, Query, description = "Filter by symbol"), + ("limit" = Option, Query, description = "Limit number of results") + ), + responses( + (status = 200, description = "Workflow history summaries", body = Vec) + ) +)] +async fn get_workflow_histories( State(state): State, - Query(query): Query, + Query(query): Query, ) -> Result { - let results = state + let histories = state .persistence_client - .get_analysis_results(&query.symbol) + .get_workflow_histories(query.symbol.as_deref(), query.limit) .await?; - Ok(Json(results)) + Ok(Json(histories)) } +/// [GET /v1/history/:request_id] +#[utoipa::path( + get, + path = "/api/v1/history/{request_id}", + params( + ("request_id" = Uuid, Path, description = "Workflow Request ID") + ), + responses( + (status = 200, description = "Workflow history details", body = WorkflowHistoryDto), + (status = 404, description = "History not found") + ) +)] +async fn get_workflow_history_by_id( + State(state): State, + Path(request_id): Path, +) -> Result { + let history = state + .persistence_client + .get_workflow_history_by_id(request_id) + .await?; + Ok(Json(history)) +} + + /// [GET /v1/companies/:symbol/profile] /// Queries the persisted company profile from the data-persistence-service. async fn get_company_profile( @@ -1135,4 +1234,4 @@ async fn proxy_context_diff( StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR), axum::body::Body::from(body), )) -} +} \ No newline at end of file diff --git a/services/api-gateway/src/openapi.rs b/services/api-gateway/src/openapi.rs index 9198c20..22561a1 100644 --- a/services/api-gateway/src/openapi.rs +++ b/services/api-gateway/src/openapi.rs @@ -3,6 +3,7 @@ use common_contracts::messages::*; use common_contracts::observability::*; use common_contracts::config_models::*; use common_contracts::registry::{ProviderMetadata, ConfigFieldSchema, FieldType, ConfigKey}; +use common_contracts::dtos::{WorkflowHistoryDto, WorkflowHistorySummaryDto}; use crate::api; #[derive(OpenApi)] @@ -23,6 +24,8 @@ use crate::api; api::discover_models, api::discover_models_preview, api::get_registered_providers, // New endpoint + api::get_workflow_histories, + api::get_workflow_history_by_id, ), components( schemas( @@ -62,6 +65,9 @@ use crate::api; api::TestConfigRequest, api::TestConnectionResponse, api::TestLlmConfigRequest, + // DTOs + WorkflowHistoryDto, + WorkflowHistorySummaryDto, ) ), tags( diff --git a/services/api-gateway/src/persistence.rs b/services/api-gateway/src/persistence.rs index 5a3c16b..b107b4c 100644 --- a/services/api-gateway/src/persistence.rs +++ b/services/api-gateway/src/persistence.rs @@ -6,7 +6,8 @@ use crate::error::Result; use common_contracts::config_models::{ AnalysisTemplateSets, DataSourcesConfig, LlmProvidersConfig, }; -use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto}; +use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto, WorkflowHistoryDto, WorkflowHistorySummaryDto}; +use uuid::Uuid; #[derive(Clone)] pub struct PersistenceClient { @@ -54,12 +55,21 @@ impl PersistenceClient { #[allow(dead_code)] pub async fn get_session_data( &self, - request_id: uuid::Uuid, + request_id: Uuid, + provider: Option<&str>, + data_type: Option<&str>, ) -> Result> { let url = format!("{}/session-data/{}", self.base_url, request_id); - let data = self - .client - .get(&url) + let mut req = self.client.get(&url); + + if let Some(p) = provider { + req = req.query(&[("provider", p)]); + } + if let Some(d) = data_type { + req = req.query(&[("data_type", d)]); + } + + let data = req .send() .await? .error_for_status()? @@ -68,20 +78,35 @@ impl PersistenceClient { Ok(data) } - pub async fn get_analysis_results( - &self, - symbol: &str, - ) -> Result> { - let url = format!("{}/analysis-results?symbol={}", self.base_url, symbol); - let results = self - .client - .get(&url) + pub async fn get_workflow_histories(&self, symbol: Option<&str>, limit: Option) -> Result> { + let url = format!("{}/history", self.base_url); + let mut req = self.client.get(&url); + if let Some(s) = symbol { + req = req.query(&[("symbol", s)]); + } + if let Some(l) = limit { + req = req.query(&[("limit", l)]); + } + let resp = req.send().await?.error_for_status()?; + let results = resp.json().await?; + Ok(results) + } + + pub async fn get_workflow_history_by_id(&self, request_id: Uuid) -> Result { + let url = format!("{}/history/{}", self.base_url, request_id); + let resp = self.client.get(&url).send().await?.error_for_status()?; + let result = resp.json().await?; + Ok(result) + } + + pub async fn clear_history(&self) -> Result<()> { + let url = format!("{}/system/history", self.base_url); + self.client + .delete(&url) .send() .await? - .error_for_status()? - .json::>() - .await?; - Ok(results) + .error_for_status()?; + Ok(()) } // --- Config Methods --- diff --git a/services/common-contracts/Cargo.lock b/services/common-contracts/Cargo.lock index 2d8eab8..3b83478 100644 --- a/services/common-contracts/Cargo.lock +++ b/services/common-contracts/Cargo.lock @@ -228,6 +228,16 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -278,6 +288,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -326,6 +338,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] @@ -750,6 +763,34 @@ dependencies = [ "wasip2", ] +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.12" @@ -1132,6 +1173,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1157,6 +1208,20 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + [[package]] name = "libm" version = "0.2.15" @@ -1184,6 +1249,32 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1381,6 +1472,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-src" +version = "300.5.4+3.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.111" @@ -1389,6 +1489,7 @@ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -1901,6 +2002,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2962,6 +3072,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3088,6 +3208,15 @@ dependencies = [ "wasite", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -3395,6 +3524,22 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "workflow-context" +version = "0.1.0" +dependencies = [ + "anyhow", + "git2", + "globset", + "hex", + "regex", + "serde", + "serde_json", + "sha2", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/services/common-contracts/Cargo.toml b/services/common-contracts/Cargo.toml index 6776b88..761646e 100644 --- a/services/common-contracts/Cargo.toml +++ b/services/common-contracts/Cargo.toml @@ -30,3 +30,4 @@ service_kit = { version = "0.1.2" } reqwest = { version = "0.12", features = ["json"] } tokio = { version = "1", features = ["time", "sync", "macros"] } log = "0.4" +workflow-context = { path = "../../crates/workflow-context" } diff --git a/services/common-contracts/src/data_formatting.rs b/services/common-contracts/src/data_formatting.rs new file mode 100644 index 0000000..15010ba --- /dev/null +++ b/services/common-contracts/src/data_formatting.rs @@ -0,0 +1,115 @@ +use serde_json::Value; +use std::collections::BTreeSet; + +/// Formats a JSON Value into a Markdown string with intelligent rendering strategies. +pub fn format_data(data: &Value) -> String { + match data { + Value::Object(map) => { + // Heuristic: If it looks like a "summary" object with a "records" array, + // likely the "records" are the main content. + if map.len() <= 3 && map.contains_key("records") && map["records"].is_array() { + let mut md = String::new(); + // Render non-records fields first + for (k, v) in map { + if k != "records" { + md.push_str(&format!("**{}:** {}\n\n", k, format_primitive(v))); + } + } + md.push_str(&format_table(&map["records"])); + return md; + } + + // Default Object rendering + let mut md = String::new(); + for (k, v) in map { + md.push_str(&format!("### {}\n\n", k)); + md.push_str(&format_data(v)); + md.push_str("\n\n"); + } + md + }, + Value::Array(arr) => { + if arr.is_empty() { + return "_No Data_".to_string(); + } + // Check if array of objects (Table candidate) + if arr.iter().all(|v| v.is_object()) { + return format_table(data); + } + // Simple list + let mut md = String::new(); + for item in arr { + md.push_str(&format!("- {}\n", format_primitive(item))); + } + md + }, + _ => format_primitive(data), + } +} + +fn format_primitive(v: &Value) -> String { + match v { + Value::String(s) => s.clone(), + Value::Number(n) => n.to_string(), + Value::Bool(b) => b.to_string(), + Value::Null => "null".to_string(), + _ => format!("`{}`", v), + } +} + +fn format_table(data: &Value) -> String { + let arr = data.as_array().unwrap(); + if arr.is_empty() { + return "".to_string(); + } + + // Collect all unique keys for headers + let mut keys: BTreeSet = BTreeSet::new(); + for item in arr { + if let Value::Object(map) = item { + for k in map.keys() { + keys.insert(k.clone()); + } + } + } + + if keys.is_empty() { + return "".to_string(); + } + + let header_list: Vec = keys.into_iter().collect(); + + let mut md = String::new(); + + // Header Row + md.push_str("| "); + md.push_str(&header_list.join(" | ")); + md.push_str(" |\n"); + + // Separator Row + md.push_str("|"); + for _ in 0..header_list.len() { + md.push_str(" --- |"); + } + md.push_str("\n"); + + // Data Rows + for item in arr { + let map = item.as_object().unwrap(); + md.push_str("|"); + for key in &header_list { + let val = map.get(key).unwrap_or(&Value::Null); + // Truncate long values for table + let mut val_str = format_primitive(val); + val_str = val_str.replace('\n', " "); // No newlines in table cells + if val_str.len() > 50 { + val_str = format!("{}...", &val_str[0..47]); + } + md.push_str(&format!(" {} |", val_str)); + } + md.push_str("\n"); + } + + md +} + diff --git a/services/common-contracts/src/dtos.rs b/services/common-contracts/src/dtos.rs index 5b4a19d..0adcef9 100644 --- a/services/common-contracts/src/dtos.rs +++ b/services/common-contracts/src/dtos.rs @@ -49,30 +49,52 @@ pub struct DailyMarketDataBatchDto { pub records: Vec, } -// Analysis Results API DTOs (NEW) -#[api_dto] -pub struct NewAnalysisResult { - pub request_id: Uuid, - pub symbol: String, - pub template_id: String, - pub module_id: String, - pub content: String, - pub meta_data: JsonValue, -} -/// Represents a persisted analysis result read from the database. +// Analysis Results API DTOs (REMOVED) +// #[api_dto] +// pub struct NewAnalysisResult { ... } +// #[api_dto] +// pub struct AnalysisResultDto { ... } + +// Workflow History DTOs (NEW) #[api_dto] -pub struct AnalysisResultDto { - pub id: Uuid, +pub struct WorkflowHistoryDto { pub request_id: Uuid, pub symbol: String, - pub template_id: String, - pub module_id: String, - pub content: String, - pub meta_data: JsonValue, + pub market: String, + pub template_id: Option, + pub status: String, + pub start_time: chrono::DateTime, + pub end_time: Option>, + pub snapshot_data: JsonValue, pub created_at: chrono::DateTime, } +#[api_dto] +pub struct NewWorkflowHistory { + pub request_id: Uuid, + pub symbol: String, + pub market: String, + pub template_id: Option, + pub status: String, + pub start_time: chrono::DateTime, + pub end_time: Option>, + pub snapshot_data: JsonValue, +} + +#[api_dto] +pub struct WorkflowHistorySummaryDto { + pub request_id: Uuid, + pub symbol: String, + pub market: String, + pub template_id: Option, + pub status: String, + pub start_time: chrono::DateTime, + pub end_time: Option>, +} + + + // Realtime Quotes DTOs #[api_dto] diff --git a/services/common-contracts/src/lib.rs b/services/common-contracts/src/lib.rs index 3c5ec75..8360736 100644 --- a/services/common-contracts/src/lib.rs +++ b/services/common-contracts/src/lib.rs @@ -14,3 +14,6 @@ pub mod abstraction; pub mod workflow_harness; // Export the harness pub mod workflow_types; pub mod configs; +pub mod data_formatting; +pub mod workflow_node; +pub mod workflow_runner; diff --git a/services/common-contracts/src/messages.rs b/services/common-contracts/src/messages.rs index 59ed5e2..349d426 100644 --- a/services/common-contracts/src/messages.rs +++ b/services/common-contracts/src/messages.rs @@ -87,6 +87,18 @@ impl SubjectMessage for GenerateReportCommand { // --- Events --- +/// Metadata produced by a task execution. +#[api_dto] +pub struct TaskMetadata { + /// The primary output file path (e.g. analysis report) + pub output_path: Option, + /// The execution trace log path + pub execution_log_path: Option, + /// Additional arbitrary metadata + #[serde(flatten)] + pub extra: HashMap, +} + // Topic: events.workflow.{request_id} /// Unified event stream for frontend consumption. #[api_dto] @@ -145,7 +157,8 @@ pub enum WorkflowEvent { timestamp: i64, task_graph: WorkflowDag, tasks_status: HashMap, // 当前所有任务的最新状态 - tasks_output: HashMap> // (可选) 已完成任务的关键输出摘要 + tasks_output: HashMap>, // (可选) 已完成任务的关键输出摘要 (commit hash) + tasks_metadata: HashMap // (New) 任务的关键元数据 } } diff --git a/services/common-contracts/src/models.rs b/services/common-contracts/src/models.rs index 190b0c6..c8004df 100644 --- a/services/common-contracts/src/models.rs +++ b/services/common-contracts/src/models.rs @@ -36,16 +36,8 @@ pub struct DailyMarketData { pub total_mv: Option, } -#[derive(Debug, Clone, FromRow)] -pub struct AnalysisResult { - pub id: Uuid, - pub symbol: String, - pub module_id: String, - pub generated_at: DateTime, - pub model_name: Option, - pub content: String, - pub meta_data: Option, -} +// AnalysisResult struct removed as it is deprecated and table is dropped. + #[derive(Debug, Clone, FromRow)] pub struct SystemConfig { diff --git a/services/common-contracts/src/persistence_client.rs b/services/common-contracts/src/persistence_client.rs index e4cbb67..5ec72aa 100644 --- a/services/common-contracts/src/persistence_client.rs +++ b/services/common-contracts/src/persistence_client.rs @@ -1,6 +1,7 @@ use crate::dtos::{ SessionDataDto, ProviderCacheDto, CompanyProfileDto, - TimeSeriesFinancialBatchDto, TimeSeriesFinancialDto, ProviderStatusDto + TimeSeriesFinancialBatchDto, TimeSeriesFinancialDto, ProviderStatusDto, + NewWorkflowHistory, WorkflowHistoryDto, WorkflowHistorySummaryDto }; use crate::config_models::{ DataSourcesConfig, LlmProvidersConfig, AnalysisTemplateSets @@ -23,6 +24,41 @@ impl PersistenceClient { } } + // --- Workflow History (NEW) --- + + pub async fn create_workflow_history(&self, dto: &NewWorkflowHistory) -> Result { + let url = format!("{}/history", self.base_url); + let resp = self.client + .post(&url) + .json(dto) + .send() + .await? + .error_for_status()?; + let result = resp.json().await?; + Ok(result) + } + + pub async fn get_workflow_histories(&self, symbol: Option<&str>, limit: Option) -> Result> { + let url = format!("{}/history", self.base_url); + let mut req = self.client.get(&url); + if let Some(s) = symbol { + req = req.query(&[("symbol", s)]); + } + if let Some(l) = limit { + req = req.query(&[("limit", l)]); + } + let resp = req.send().await?.error_for_status()?; + let results = resp.json().await?; + Ok(results) + } + + pub async fn get_workflow_history_by_id(&self, request_id: Uuid) -> Result { + let url = format!("{}/history/{}", self.base_url, request_id); + let resp = self.client.get(&url).send().await?.error_for_status()?; + let result = resp.json().await?; + Ok(result) + } + // --- Session Data --- pub async fn insert_session_data(&self, dto: &SessionDataDto) -> Result<()> { diff --git a/services/common-contracts/src/workflow_node.rs b/services/common-contracts/src/workflow_node.rs new file mode 100644 index 0000000..162e549 --- /dev/null +++ b/services/common-contracts/src/workflow_node.rs @@ -0,0 +1,87 @@ +use async_trait::async_trait; +use anyhow::Result; +use serde_json::Value; +use std::collections::HashMap; + +/// Context provided to the node execution +pub struct NodeContext { + pub request_id: String, + pub base_commit: String, + pub root_path: String, +} + +impl NodeContext { + pub fn new(request_id: String, base_commit: String, root_path: String) -> Self { + Self { + request_id, + base_commit, + root_path, + } + } +} + +/// Content of an artifact +pub enum ArtifactContent { + Json(Value), + Text(String), + Bytes(Vec), +} + +impl From for ArtifactContent { + fn from(v: Value) -> Self { + ArtifactContent::Json(v) + } +} + +impl From for ArtifactContent { + fn from(s: String) -> Self { + ArtifactContent::Text(s) + } +} + +impl ArtifactContent { + pub fn as_bytes(&self) -> Result> { + match self { + ArtifactContent::Json(v) => Ok(serde_json::to_vec_pretty(v)?), + ArtifactContent::Text(s) => Ok(s.as_bytes().to_vec()), + ArtifactContent::Bytes(b) => Ok(b.clone()), + } + } +} + +/// Result of the node execution +pub struct NodeExecutionResult { + /// Artifacts to be saved to VGCS. + /// Key: Relative file path (e.g., "profile.json") + /// Value: Content + pub artifacts: HashMap, + + /// Metadata summary for the task result event + pub meta_summary: Option, +} + +#[async_trait] +pub trait WorkflowNode: Send + Sync { + /// Unique identifier/type of the node (e.g., "yfinance", "analysis") + fn node_type(&self) -> &str; + + /// Core execution logic + /// + /// # Arguments + /// * `context` - Context including request_id, base_commit, etc. + /// * `config` - Task configuration (e.g., symbol, market) + async fn execute( + &self, + context: &NodeContext, + config: &Value + ) -> Result; + + /// Render the report from the execution result + /// + /// This method is synchronous as data should be available in the result. + fn render_report( + &self, + result: &NodeExecutionResult + ) -> Result; +} + diff --git a/services/common-contracts/src/workflow_runner.rs b/services/common-contracts/src/workflow_runner.rs new file mode 100644 index 0000000..b24c197 --- /dev/null +++ b/services/common-contracts/src/workflow_runner.rs @@ -0,0 +1,154 @@ +use std::sync::Arc; +use anyhow::Result; +use tracing::{info, error}; +use async_nats::Client; + +use crate::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskStatus, TaskResult}; +use crate::messages::WorkflowEvent as CommonWorkflowEvent; +use crate::workflow_node::{WorkflowNode, NodeContext}; +use crate::subjects::SubjectMessage; +use workflow_context::WorkerContext; + +pub struct WorkflowNodeRunner { + nats: Client, +} + +impl WorkflowNodeRunner { + pub fn new(nats: Client) -> Self { + Self { nats } + } + + pub async fn run(&self, node: Arc, cmd: WorkflowTaskCommand) -> Result<()> + where + N: WorkflowNode + 'static + { + let task_id = cmd.task_id.clone(); + info!("Starting node execution: type={}, task_id={}", node.node_type(), task_id); + + // 1. Prepare Context + let root_path = cmd.storage.root_path.clone(); + let req_id = cmd.request_id.to_string(); + let base_commit = cmd.context.base_commit.clone().unwrap_or_default(); + + let context = NodeContext::new(req_id.clone(), base_commit.clone(), root_path.clone()); + + // 2. Execute Node Logic (Async) + let exec_result = match node.execute(&context, &cmd.config).await { + Ok(res) => res, + Err(e) => { + return self.handle_failure(&cmd, &e.to_string()).await; + } + }; + + // 3. Render Report (Sync) + let report_md = match node.render_report(&exec_result) { + Ok(md) => md, + Err(e) => { + return self.handle_failure(&cmd, &format!("Report rendering failed: {}", e)).await; + } + }; + + // 4. VGCS Operations (Blocking) + let node_clone = node.clone(); + let task_id_clone = task_id.clone(); + let base_commit_clone = base_commit.clone(); + let root_path_clone = root_path.clone(); + let req_id_clone = req_id.clone(); + let exec_result_artifacts = exec_result.artifacts; + let report_md_clone = report_md.clone(); + let symbol = cmd.config.get("symbol").and_then(|s| s.as_str()).unwrap_or("unknown").to_string(); + + // We also want to generate an execution log (basic one for now) + // In future, we might want to capture logs during execute() + let execution_log = format!("# Execution Log for {}\n\nTask ID: {}\nNode Type: {}\nStatus: Success\n", task_id, task_id, node.node_type()); + + let commit_res = tokio::task::spawn_blocking(move || -> Result { + let mut ctx = WorkerContext::new(&root_path_clone, &req_id_clone, &base_commit_clone); + + // Define output directory convention + let base_dir = format!("raw/{}/{}", node_clone.node_type(), symbol); + + // Write Artifacts + for (filename, content) in exec_result_artifacts { + let full_path = format!("{}/{}", base_dir, filename); + let bytes = content.as_bytes()?; + // WorkerContext write_file takes &str for now + ctx.write_file(&full_path, std::str::from_utf8(&bytes).unwrap_or(""))?; + } + + // Write Report + let report_path = format!("{}/report.md", base_dir); + ctx.write_file(&report_path, &report_md_clone)?; + + // Write Execution Log + let log_path = format!("{}/_execution.md", base_dir); + ctx.write_file(&log_path, &execution_log)?; + + // Commit + let commit_msg = format!("Task {} ({}) completed", task_id_clone, node_clone.node_type()); + let new_commit = ctx.commit(&commit_msg)?; + + Ok(new_commit) + }).await; + + let new_commit = match commit_res { + Ok(Ok(commit)) => commit, + Ok(Err(e)) => return self.handle_failure(&cmd, &format!("VGCS error: {}", e)).await, + Err(e) => return self.handle_failure(&cmd, &format!("Task join error: {}", e)).await, + }; + + // 5. Publish Stream Update + let stream_event = CommonWorkflowEvent::TaskStreamUpdate { + task_id: task_id.clone(), + content_delta: report_md.clone(), + index: 0, + }; + self.publish_common(&cmd.request_id, stream_event).await?; + + // 6. Publish Completion Event + let event = WorkflowTaskEvent { + request_id: cmd.request_id, + task_id: task_id, + status: TaskStatus::Completed, + result: Some(TaskResult { + new_commit: Some(new_commit), + error: None, + summary: exec_result.meta_summary, + }), + }; + self.publish_event(event).await?; + + info!("Task {} finished successfully", cmd.task_id); + Ok(()) + } + + async fn handle_failure(&self, cmd: &WorkflowTaskCommand, error_msg: &str) -> Result<()> { + error!("Task {} failed: {}", cmd.task_id, error_msg); + let event = WorkflowTaskEvent { + request_id: cmd.request_id, + task_id: cmd.task_id.clone(), + status: TaskStatus::Failed, + result: Some(TaskResult { + new_commit: None, + error: Some(error_msg.to_string()), + summary: None, + }), + }; + self.publish_event(event).await + } + + async fn publish_event(&self, event: WorkflowTaskEvent) -> Result<()> { + let subject = event.subject().to_string(); + let payload = serde_json::to_vec(&event)?; + self.nats.publish(subject, payload.into()).await?; + Ok(()) + } + + async fn publish_common(&self, req_id: &uuid::Uuid, event: CommonWorkflowEvent) -> Result<()> { + let subject = crate::subjects::NatsSubject::WorkflowProgress(*req_id).to_string(); + let payload = serde_json::to_vec(&event)?; + self.nats.publish(subject, payload.into()).await?; + Ok(()) + } +} + diff --git a/services/data-persistence-service/Cargo.lock b/services/data-persistence-service/Cargo.lock index 062bcb2..8e9f970 100644 --- a/services/data-persistence-service/Cargo.lock +++ b/services/data-persistence-service/Cargo.lock @@ -284,6 +284,16 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -334,6 +344,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -382,6 +394,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] @@ -929,6 +942,34 @@ dependencies = [ "wasip2", ] +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.12" @@ -1324,6 +1365,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1349,6 +1400,20 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + [[package]] name = "libm" version = "0.2.15" @@ -1376,6 +1441,20 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + [[package]] name = "libz-rs-sys" version = "0.5.2" @@ -1385,6 +1464,18 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1620,6 +1711,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-src" +version = "300.5.4+3.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.111" @@ -1628,6 +1728,7 @@ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -3895,6 +3996,22 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "workflow-context" +version = "0.1.0" +dependencies = [ + "anyhow", + "git2", + "globset", + "hex", + "regex", + "serde", + "serde_json", + "sha2", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/services/data-persistence-service/Dockerfile b/services/data-persistence-service/Dockerfile index e4c1e9c..799685f 100644 --- a/services/data-persistence-service/Dockerfile +++ b/services/data-persistence-service/Dockerfile @@ -6,6 +6,7 @@ FROM chef AS planner WORKDIR /app/services/data-persistence-service # 仅复制必要的 Cargo 清单,避免大体积上下文 COPY services/common-contracts/Cargo.toml /app/services/common-contracts/Cargo.toml +COPY crates/workflow-context/Cargo.toml /app/crates/workflow-context/Cargo.toml COPY services/data-persistence-service/Cargo.toml /app/services/data-persistence-service/Cargo.toml # Copy service_kit mirror for dependency resolution COPY ref/service_kit_mirror /app/ref/service_kit_mirror @@ -19,6 +20,7 @@ COPY --from=planner /app/services/data-persistence-service/recipe.json /app/serv # 为了支持 path 依赖,先拷贝依赖源码再 cook ENV FORCE_REBUILD=2 COPY services/common-contracts /app/services/common-contracts +COPY crates/workflow-context /app/crates/workflow-context # Copy service_kit mirror again for build COPY ref/service_kit_mirror /app/ref/service_kit_mirror diff --git a/services/data-persistence-service/src/api/analysis.rs b/services/data-persistence-service/src/api/analysis.rs deleted file mode 100644 index b40f6d6..0000000 --- a/services/data-persistence-service/src/api/analysis.rs +++ /dev/null @@ -1,110 +0,0 @@ -use crate::models::AnalysisResult; -use crate::{AppState, ServerError}; -use axum::{ - extract::{Path, Query, State}, - http::StatusCode, - response::IntoResponse, - Json, -}; -use common_contracts::dtos::{AnalysisResultDto, NewAnalysisResult}; -use serde::Deserialize; -use service_kit::api; -use tracing::{instrument, error}; -use anyhow::Error as AnyhowError; -use uuid::Uuid; - -#[derive(Debug, Deserialize, utoipa::IntoParams, utoipa::ToSchema)] -pub struct AnalysisQuery { - pub symbol: String, - pub module_id: Option, -} - -/// Creates a new analysis result and returns the created record. -#[instrument(skip(state, payload), fields(request_id = %payload.request_id, symbol = %payload.symbol, module_id = %payload.module_id))] -#[api(POST, "/api/v1/analysis-results")] -pub async fn create_analysis_result( - State(state): State, - Json(payload): Json, -) -> Result { - let result = crate::db::create_analysis_result(state.pool(), &payload) - .await - .map_err(|e| { - error!("Database error inserting analysis result: {}", e); - AnyhowError::from(e) - })?; - - let dto = AnalysisResultDto { - id: result.id, - request_id: result.request_id, - symbol: result.symbol, - template_id: result.template_id, - module_id: result.module_id, - content: result.content, - meta_data: result.meta_data, - created_at: result.created_at, - }; - - Ok((StatusCode::CREATED, Json(dto))) -} - -/// Retrieves all analysis results for a given symbol. -#[instrument(skip(state))] -#[api(GET, "/api/v1/analysis-results", output(list = "AnalysisResultDto"))] -pub async fn get_analysis_results( - State(state): State, - Query(query): Query, -) -> Result>, ServerError> { - let results = crate::db::get_analysis_results(state.pool(), &query.symbol, query.module_id.as_deref()) - .await - .map_err(|e| { - error!("Database error fetching analysis results: {}", e); - AnyhowError::from(e) - })?; - - let dtos = results - .into_iter() - .map(|r| AnalysisResultDto { - id: r.id, - request_id: r.request_id, - symbol: r.symbol, - template_id: r.template_id, - module_id: r.module_id, - content: r.content, - meta_data: r.meta_data, - created_at: r.created_at, - }) - .collect(); - - Ok(Json(dtos)) -} - -/// Retrieves a single analysis result by its primary ID. -#[instrument(skip(state))] -#[api(GET, "/api/v1/analysis-results/{id}", output(detail = "AnalysisResultDto"))] -pub async fn get_analysis_result_by_id( - State(state): State, - Path(id): Path, -) -> Result, ServerError> { - let id = Uuid::parse_str(&id).map_err(|_| ServerError::NotFound(format!("Invalid UUID: {}", id)))?; - - let result = crate::db::get_analysis_result_by_id(state.pool(), id) - .await - .map_err(|e| { - error!("Database error fetching analysis result by id: {}", e); - AnyhowError::from(e) - })? - .ok_or_else(|| ServerError::NotFound(format!("Analysis result not found: {}", id)))?; - - let dto = AnalysisResultDto { - id: result.id, - request_id: result.request_id, - symbol: result.symbol, - template_id: result.template_id, - module_id: result.module_id, - content: result.content, - meta_data: result.meta_data, - created_at: result.created_at, - }; - - Ok(Json(dto)) -} diff --git a/services/data-persistence-service/src/api/history.rs b/services/data-persistence-service/src/api/history.rs new file mode 100644 index 0000000..2d734aa --- /dev/null +++ b/services/data-persistence-service/src/api/history.rs @@ -0,0 +1,110 @@ +use crate::{AppState, ServerError}; +use axum::{ + extract::{Path, Query, State}, + http::StatusCode, + response::IntoResponse, + Json, +}; +use common_contracts::dtos::{NewWorkflowHistory, WorkflowHistoryDto, WorkflowHistorySummaryDto}; +// use serde::Deserialize; +use service_kit::{api, api_dto}; +use tracing::{error, instrument}; +use anyhow::Error as AnyhowError; +use uuid::Uuid; + +#[api_dto] +#[derive(utoipa::IntoParams)] +pub struct HistoryQuery { + pub symbol: Option, + pub limit: Option, +} + +#[instrument(skip(state, payload))] +#[api(POST, "/api/v1/history")] +pub async fn create_workflow_history( + State(state): State, + Json(payload): Json, +) -> Result { + let result = crate::db::history::create_workflow_history(state.pool(), &payload) + .await + .map_err(|e| { + error!("Database error inserting workflow history: {}", e); + AnyhowError::from(e) + })?; + + let dto = WorkflowHistoryDto { + request_id: result.request_id, + symbol: result.symbol, + market: result.market, + template_id: result.template_id, + status: result.status, + start_time: result.start_time, + end_time: result.end_time, + snapshot_data: result.snapshot_data, + created_at: result.created_at, + }; + + Ok((StatusCode::CREATED, Json(dto))) +} + +#[instrument(skip(state))] +#[api(GET, "/api/v1/history")] +pub async fn get_workflow_histories( + State(state): State, + Query(query): Query, +) -> Result>, ServerError> { + let limit = query.limit.unwrap_or(20).min(100); + let results = crate::db::history::get_workflow_histories(state.pool(), query.symbol.as_deref(), limit) + .await + .map_err(|e| { + error!("Database error fetching workflow histories: {}", e); + AnyhowError::from(e) + })?; + + let dtos = results + .into_iter() + .map(|r| WorkflowHistorySummaryDto { + request_id: r.request_id, + symbol: r.symbol, + market: r.market, + template_id: r.template_id, + status: r.status, + start_time: r.start_time, + end_time: r.end_time, + }) + .collect(); + + Ok(Json(dtos)) +} + +#[instrument(skip(state))] +#[api(GET, "/api/v1/history/{request_id}")] +pub async fn get_workflow_history_by_id( + State(state): State, + Path(request_id_str): Path, +) -> Result, ServerError> { + let request_id = Uuid::parse_str(&request_id_str).map_err(|_| ServerError::NotFound(format!("Invalid UUID: {}", request_id_str)))?; + + let result = crate::db::history::get_workflow_history_by_id(state.pool(), request_id) + .await + .map_err(|e| { + error!("Database error fetching workflow history by id: {}", e); + AnyhowError::from(e) + })? + .ok_or_else(|| ServerError::NotFound(format!("History not found: {}", request_id)))?; + + let dto = WorkflowHistoryDto { + request_id: result.request_id, + symbol: result.symbol, + market: result.market, + template_id: result.template_id, + status: result.status, + start_time: result.start_time, + end_time: result.end_time, + snapshot_data: result.snapshot_data, + created_at: result.created_at, + }; + + Ok(Json(dto)) +} + diff --git a/services/data-persistence-service/src/api/mod.rs b/services/data-persistence-service/src/api/mod.rs index 73e5118..20f3ab7 100644 --- a/services/data-persistence-service/src/api/mod.rs +++ b/services/data-persistence-service/src/api/mod.rs @@ -1,10 +1,10 @@ -mod analysis; +mod provider_cache; +mod history; mod companies; mod configs; mod market_data; mod system; mod session_data; -mod provider_cache; use crate::AppState; use axum::{ @@ -16,6 +16,7 @@ pub fn create_router(_state: AppState) -> Router { let router: Router = Router::new() // System .route("/health", get(system::get_health)) + .route("/api/v1/system/history", axum::routing::delete(system::clear_history)) // Configs .route( "/configs/llm_providers", @@ -40,15 +41,15 @@ pub fn create_router(_state: AppState) -> Router { "/market-data/financial-statements/{symbol}", get(market_data::get_financials_by_symbol), ) - // Analysis Results - .route( - "/analysis-results", - post(analysis::create_analysis_result).get(analysis::get_analysis_results), - ) - .route( - "/analysis-results/{id}", - get(analysis::get_analysis_result_by_id), - ) + // Analysis Results (REMOVED) + // .route( + // "/analysis-results", + // post(analysis::create_analysis_result).get(analysis::get_analysis_results), + // ) + // .route( + // "/analysis-results/{id}", + // get(analysis::get_analysis_result_by_id), + // ) // Session Data .route( "/session-data", @@ -62,6 +63,15 @@ pub fn create_router(_state: AppState) -> Router { .route( "/provider-cache", get(provider_cache::get_cache).post(provider_cache::set_cache), + ) + // Workflow History (NEW) + .route( + "/history", + post(history::create_workflow_history).get(history::get_workflow_histories), + ) + .route( + "/history/{request_id}", + get(history::get_workflow_history_by_id), ); router diff --git a/services/data-persistence-service/src/api/system.rs b/services/data-persistence-service/src/api/system.rs index 71a5b3d..ba52654 100644 --- a/services/data-persistence-service/src/api/system.rs +++ b/services/data-persistence-service/src/api/system.rs @@ -27,6 +27,19 @@ pub async fn get_health(State(state): State) -> Result) -> Result { + // Clear session data + crate::db::session_data::clear_all_session_data(state.pool()) + .await + .map_err(|e| { + tracing::error!("Failed to clear session data: {}", e); + ServerError::from(anyhow::anyhow!(e)) + })?; + + Ok(axum::http::StatusCode::NO_CONTENT) +} + #[api(GET, "/tasks", output(list = "TaskProgress"))] pub async fn get_tasks(_state: State) -> Result>, ServerError> { // data-persistence-service 当前不进行异步任务处理,返回空列表 diff --git a/services/data-persistence-service/src/db/analysis_results.rs b/services/data-persistence-service/src/db/analysis_results.rs deleted file mode 100644 index 50a944e..0000000 --- a/services/data-persistence-service/src/db/analysis_results.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::models::AnalysisResult; -use common_contracts::dtos::NewAnalysisResult; -use sqlx::PgPool; -use uuid::Uuid; - -pub async fn create_analysis_result( - pool: &PgPool, - payload: &NewAnalysisResult, -) -> Result { - sqlx::query_as::<_, AnalysisResult>( - r#" - INSERT INTO analysis_results (request_id, symbol, template_id, module_id, content, meta_data) - VALUES ($1, $2, $3, $4, $5, $6) - RETURNING id, request_id, symbol, template_id, module_id, content, meta_data, created_at - "# - ) - .bind(&payload.request_id) - .bind(&payload.symbol) - .bind(&payload.template_id) - .bind(&payload.module_id) - .bind(&payload.content) - .bind(&payload.meta_data) - .fetch_one(pool) - .await -} - -pub async fn get_analysis_results( - pool: &PgPool, - symbol: &str, - module_id: Option<&str>, -) -> Result, sqlx::Error> { - if let Some(mid) = module_id { - sqlx::query_as::<_, AnalysisResult>( - r#" - SELECT id, request_id, symbol, template_id, module_id, content, meta_data, created_at - FROM analysis_results - WHERE symbol = $1 AND module_id = $2 - ORDER BY created_at DESC - "# - ) - .bind(symbol) - .bind(mid) - .fetch_all(pool) - .await - } else { - sqlx::query_as::<_, AnalysisResult>( - r#" - SELECT id, request_id, symbol, template_id, module_id, content, meta_data, created_at - FROM analysis_results - WHERE symbol = $1 - ORDER BY created_at DESC - "# - ) - .bind(symbol) - .fetch_all(pool) - .await - } -} - -pub async fn get_analysis_result_by_id( - pool: &PgPool, - id: Uuid, -) -> Result, sqlx::Error> { - sqlx::query_as::<_, AnalysisResult>( - r#" - SELECT id, request_id, symbol, template_id, module_id, content, meta_data, created_at - FROM analysis_results - WHERE id = $1 - "# - ) - .bind(&id) - .fetch_optional(pool) - .await -} - diff --git a/services/data-persistence-service/src/db/history.rs b/services/data-persistence-service/src/db/history.rs new file mode 100644 index 0000000..c2db85e --- /dev/null +++ b/services/data-persistence-service/src/db/history.rs @@ -0,0 +1,81 @@ +use crate::models::WorkflowHistory; +use common_contracts::dtos::NewWorkflowHistory; +use sqlx::PgPool; +use uuid::Uuid; + +pub async fn create_workflow_history( + pool: &PgPool, + payload: &NewWorkflowHistory, +) -> Result { + sqlx::query_as::<_, WorkflowHistory>( + r#" + INSERT INTO workflow_history (request_id, symbol, market, template_id, status, start_time, end_time, snapshot_data) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + RETURNING request_id, symbol, market, template_id, status, start_time, end_time, snapshot_data, created_at + "# + ) + .bind(&payload.request_id) + .bind(&payload.symbol) + .bind(&payload.market) + .bind(&payload.template_id) + .bind(&payload.status) + .bind(&payload.start_time) + .bind(&payload.end_time) + .bind(&payload.snapshot_data) + .fetch_one(pool) + .await +} + +pub async fn get_workflow_history_by_id( + pool: &PgPool, + request_id: Uuid, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, WorkflowHistory>( + r#" + SELECT request_id, symbol, market, template_id, status, start_time, end_time, snapshot_data, created_at + FROM workflow_history + WHERE request_id = $1 + "# + ) + .bind(request_id) + .fetch_optional(pool) + .await +} + +pub async fn get_workflow_histories( + pool: &PgPool, + symbol: Option<&str>, + limit: i64, +) -> Result, sqlx::Error> { + match symbol { + Some(s) => { + sqlx::query_as::<_, WorkflowHistory>( + r#" + SELECT request_id, symbol, market, template_id, status, start_time, end_time, snapshot_data, created_at + FROM workflow_history + WHERE symbol = $1 + ORDER BY created_at DESC + LIMIT $2 + "# + ) + .bind(s) + .bind(limit) + .fetch_all(pool) + .await + }, + None => { + sqlx::query_as::<_, WorkflowHistory>( + r#" + SELECT request_id, symbol, market, template_id, status, start_time, end_time, snapshot_data, created_at + FROM workflow_history + ORDER BY created_at DESC + LIMIT $1 + "# + ) + .bind(limit) + .fetch_all(pool) + .await + } + } +} + diff --git a/services/data-persistence-service/src/db/mod.rs b/services/data-persistence-service/src/db/mod.rs index 3e9f847..9ebcc1d 100644 --- a/services/data-persistence-service/src/db/mod.rs +++ b/services/data-persistence-service/src/db/mod.rs @@ -9,7 +9,7 @@ pub mod companies; pub mod market_data; pub mod session_data; pub mod provider_cache; -pub mod analysis_results; +pub mod history; pub use companies::{get_company_by_symbol, upsert_company}; pub use market_data::{ @@ -18,4 +18,3 @@ pub use market_data::{ }; pub use session_data::{insert_session_data, get_session_data, delete_session_data}; pub use provider_cache::{get_cache, set_cache}; -pub use analysis_results::{create_analysis_result, get_analysis_results, get_analysis_result_by_id}; diff --git a/services/data-persistence-service/src/db/session_data.rs b/services/data-persistence-service/src/db/session_data.rs index d72e115..2308c7f 100644 --- a/services/data-persistence-service/src/db/session_data.rs +++ b/services/data-persistence-service/src/db/session_data.rs @@ -66,3 +66,12 @@ pub async fn delete_session_data( Ok(()) } +pub async fn clear_all_session_data( + pool: &PgPool, +) -> Result<(), sqlx::Error> { + sqlx::query("TRUNCATE TABLE session_raw_data") + .execute(pool) + .await?; + Ok(()) +} + diff --git a/services/data-persistence-service/src/models.rs b/services/data-persistence-service/src/models.rs index 73cfe3d..b52b876 100644 --- a/services/data-persistence-service/src/models.rs +++ b/services/data-persistence-service/src/models.rs @@ -13,15 +13,16 @@ pub struct SystemConfig { } #[derive(Debug, Clone, Serialize, sqlx::FromRow)] -pub struct AnalysisResult { - pub id: Uuid, - #[sqlx(default)] // request_id is missing in some schema versions, handle gracefully or ensure migration runs +pub struct WorkflowHistory { pub request_id: Uuid, pub symbol: String, - // template_id/module_id might be missing if schema is very old, but we rely on migrations - pub template_id: String, - pub module_id: String, - pub content: String, - pub meta_data: serde_json::Value, + pub market: String, + pub template_id: Option, + pub status: String, + pub start_time: chrono::DateTime, + pub end_time: Option>, + pub snapshot_data: serde_json::Value, pub created_at: chrono::DateTime, } + + diff --git a/services/data-persistence-service/tests/api_tests.rs b/services/data-persistence-service/tests/api_tests.rs index 6bf2d47..8741051 100644 --- a/services/data-persistence-service/tests/api_tests.rs +++ b/services/data-persistence-service/tests/api_tests.rs @@ -8,8 +8,8 @@ use axum::{ use data_persistence_service::{ self as app, dtos::{ - AnalysisResultDto, CompanyProfileDto, DailyMarketDataBatchDto, DailyMarketDataDto, - NewAnalysisResult, TimeSeriesFinancialBatchDto, TimeSeriesFinancialDto, + CompanyProfileDto, DailyMarketDataBatchDto, DailyMarketDataDto, + TimeSeriesFinancialBatchDto, TimeSeriesFinancialDto, }, AppState, }; @@ -168,63 +168,3 @@ async fn test_api_batch_insert_and_get_daily(pool: PgPool) { assert_eq!(fetched_data[0].close_price, Some(250.5)); } -#[sqlx::test] -async fn test_api_create_and_get_analysis(pool: PgPool) { - let state = AppState::new(pool); - let openapi = app::build_openapi_spec(); - let app = app::build_rest_router_with_state(openapi, state).unwrap(); - - // 1. Act: Create a new analysis result - let new_analysis = app::dtos::NewAnalysisResult { - request_id: Uuid::new_v4(), - symbol: "API.AI".to_string(), - template_id: "api_template".to_string(), - module_id: "bull_case".to_string(), - content: "This is a test analysis from an API test.".to_string(), - meta_data: serde_json::json!({ "model": "test-gpt" }), - }; - - let request = Request::builder() - .method("POST") - .uri("/api/v1/analysis-results") - .header("content-type", "application/json") - .body(Body::from(serde_json::to_string(&new_analysis).unwrap())) - .unwrap(); - - let response = ServiceExt::oneshot(app.clone().into_service(), request).await.unwrap(); - assert_eq!(response.status(), StatusCode::CREATED); - - let body = response.into_body().collect().await.unwrap().to_bytes(); - let created_analysis: app::dtos::AnalysisResultDto = serde_json::from_slice(&body).unwrap(); - - // 2. Act: Get the analysis by ID - let request_get = Request::builder() - .method("GET") - .uri(format!("/api/v1/analysis-results/{}", created_analysis.id)) - .body(Body::empty()) - .unwrap(); - - let response_get = ServiceExt::oneshot(app.clone().into_service(), request_get).await.unwrap(); - assert_eq!(response_get.status(), StatusCode::OK); - - // 3. Assert: Check the response body - let body_get = response_get.into_body().collect().await.unwrap().to_bytes(); - let fetched_analysis: app::dtos::AnalysisResultDto = serde_json::from_slice(&body_get).unwrap(); - - assert_eq!(fetched_analysis.id, created_analysis.id); - assert_eq!(fetched_analysis.symbol, "API.AI"); - - // 4. Act: Get by query params - let request_query = Request::builder() - .method("GET") - .uri("/api/v1/analysis-results?symbol=API.AI&module_id=bull_case") - .body(Body::empty()) - .unwrap(); - - let response_query = ServiceExt::oneshot(app.clone().into_service(), request_query).await.unwrap(); - assert_eq!(response_query.status(), StatusCode::OK); - let body_query = response_query.into_body().collect().await.unwrap().to_bytes(); - let fetched_list: Vec = serde_json::from_slice(&body_query).unwrap(); - assert_eq!(fetched_list.len(), 1); - assert_eq!(fetched_list[0].id, created_analysis.id); -} diff --git a/services/data-persistence-service/tests/db_tests.rs b/services/data-persistence-service/tests/db_tests.rs index 20487e9..ba8b921 100644 --- a/services/data-persistence-service/tests/db_tests.rs +++ b/services/data-persistence-service/tests/db_tests.rs @@ -7,7 +7,7 @@ use data_persistence_service::{ db, - dtos::{CompanyProfileDto, TimeSeriesFinancialDto, DailyMarketDataDto, NewAnalysisResult}, + dtos::{CompanyProfileDto, TimeSeriesFinancialDto, DailyMarketDataDto}, models, }; use sqlx::{postgres::PgPoolOptions, PgPool}; @@ -153,35 +153,3 @@ async fn test_batch_insert_and_get_daily_data(pool: PgPool) { assert!(close_str == "102.0" || close_str == "102"); } -#[sqlx::test] -async fn test_create_and_get_analysis_results(pool: PgPool) { - // 1. Setup: Create a test analysis result DTO - let new_analysis = NewAnalysisResult { - request_id: Uuid::new_v4(), - symbol: "TEST.AI".to_string(), - template_id: "default_template".to_string(), - module_id: "bull_case".to_string(), - content: "This is a bullish analysis.".to_string(), - meta_data: serde_json::json!({ "tokens": 123 }), - }; - - // 2. Act: Call the create function - let created_result = db::create_analysis_result(&pool, &new_analysis).await.unwrap(); - assert_eq!(created_result.symbol, "TEST.AI"); - assert_eq!(created_result.module_id, "bull_case"); - - // 3. Assert: Get by symbol and module_id - let fetched_by_symbol = db::get_analysis_results(&pool, "TEST.AI", Some("bull_case")).await.unwrap(); - assert_eq!(fetched_by_symbol.len(), 1); - assert_eq!(fetched_by_symbol[0].id, created_result.id); - assert_eq!(fetched_by_symbol[0].content, new_analysis.content); - - // 4. Assert: Get by ID - let fetched_by_id = db::get_analysis_result_by_id(&pool, created_result.id).await.unwrap().unwrap(); - assert_eq!(fetched_by_id.symbol, "TEST.AI"); - assert_eq!(fetched_by_id.content, new_analysis.content); - - // 5. Assert: Get by symbol only - let fetched_all_for_symbol = db::get_analysis_results(&pool, "TEST.AI", None).await.unwrap(); - assert_eq!(fetched_all_for_symbol.len(), 1); -} diff --git a/services/finnhub-provider-service/Cargo.lock b/services/finnhub-provider-service/Cargo.lock index 5ab7c58..71c6ab2 100644 --- a/services/finnhub-provider-service/Cargo.lock +++ b/services/finnhub-provider-service/Cargo.lock @@ -226,6 +226,16 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -270,6 +280,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -317,6 +329,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] @@ -754,6 +767,34 @@ dependencies = [ "wasip2", ] +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.4.12" @@ -818,6 +859,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.3.1" @@ -1114,6 +1161,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1147,6 +1204,46 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libgit2-sys" +version = "0.16.2+1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" +dependencies = [ + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1315,6 +1412,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-src" +version = "300.5.4+3.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507b3792995dae9b0df8a1c1e3771e8418b7c2d9f0baeba32e6fe8b06c7cb72" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.111" @@ -1323,6 +1429,7 @@ checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -1891,6 +1998,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2801,6 +2917,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2911,6 +3037,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -3152,6 +3287,22 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "workflow-context" +version = "0.1.0" +dependencies = [ + "anyhow", + "git2", + "globset", + "hex", + "regex", + "serde", + "serde_json", + "sha2", + "thiserror 1.0.69", + "walkdir", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/services/finnhub-provider-service/src/worker.rs b/services/finnhub-provider-service/src/worker.rs index 42dd826..482e450 100644 --- a/services/finnhub-provider-service/src/worker.rs +++ b/services/finnhub-provider-service/src/worker.rs @@ -246,6 +246,7 @@ mod integration_tests { symbol: CanonicalSymbol::new("AAPL", &Market::US), market: "US".to_string(), template_id: Some("default".to_string()), + output_path: None, }; // 4. NATS diff --git a/services/mock-provider-service/Cargo.lock b/services/mock-provider-service/Cargo.lock index cf86873..597849e 100644 --- a/services/mock-provider-service/Cargo.lock +++ b/services/mock-provider-service/Cargo.lock @@ -329,6 +329,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] diff --git a/services/mock-provider-service/src/main.rs b/services/mock-provider-service/src/main.rs index 0a06de1..da6e3c1 100644 --- a/services/mock-provider-service/src/main.rs +++ b/services/mock-provider-service/src/main.rs @@ -1,4 +1,5 @@ mod worker; +mod workflow_adapter; mod config; mod error; mod state; diff --git a/services/mock-provider-service/src/worker.rs b/services/mock-provider-service/src/worker.rs index 88967e3..2922dcd 100644 --- a/services/mock-provider-service/src/worker.rs +++ b/services/mock-provider-service/src/worker.rs @@ -1,13 +1,12 @@ -use anyhow::{Result, anyhow, Context}; +use anyhow::Result; use tracing::{info, error}; -use common_contracts::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskStatus, TaskResult}; +use common_contracts::workflow_types::WorkflowTaskCommand; use common_contracts::subjects::{NatsSubject, SubjectMessage}; -use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto}; -use workflow_context::WorkerContext; use crate::state::AppState; -use serde_json::json; use futures_util::StreamExt; -use chrono::NaiveDate; +use std::sync::Arc; +use common_contracts::workflow_runner::WorkflowNodeRunner; +use crate::workflow_adapter::MockNode; pub async fn run_consumer(state: AppState) -> Result<()> { info!("Starting Mock Provider message consumer..."); @@ -40,173 +39,8 @@ pub async fn run_consumer(state: AppState) -> Result<()> { Ok(()) } -async fn handle_workflow_command(_state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> { - info!("Processing mock workflow command: task_id={}", cmd.task_id); - - let symbol_code = cmd.config.get("symbol").and_then(|s| s.as_str()).unwrap_or("MOCK").to_string(); - - // Generate Dummy Data - let profile = CompanyProfileDto { - symbol: symbol_code.clone(), - name: format!("Mock Company {}", symbol_code), - industry: Some("Testing".to_string()), - list_date: Some(NaiveDate::from_ymd_opt(2000, 1, 1).unwrap()), - additional_info: Some(json!({ - "description": "This is a mock company for testing purposes.", - "market_cap": 1000000000.0, - "currency": "USD", - "country": "US" - })), - updated_at: Some(chrono::Utc::now()), - }; - - // Create Normalized Financials - let date = NaiveDate::from_ymd_opt(2023, 12, 31).unwrap(); - let financials = vec![ - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "revenue".to_string(), - period_date: date, - value: 1000000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "net_income".to_string(), - period_date: date, - value: 500000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "gross_profit".to_string(), - period_date: date, - value: 800000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "operating_income".to_string(), - period_date: date, - value: 600000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "eps".to_string(), - period_date: date, - value: 5.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "total_assets".to_string(), - period_date: date, - value: 2000000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "total_liabilities".to_string(), - period_date: date, - value: 1000000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "total_equity".to_string(), - period_date: date, - value: 1000000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "operating_cash_flow".to_string(), - period_date: date, - value: 550000.0, - source: Some("mock".to_string()), - }, - TimeSeriesFinancialDto { - symbol: symbol_code.clone(), - metric_name: "free_cash_flow".to_string(), - period_date: date, - value: 450000.0, - source: Some("mock".to_string()), - } - ]; - - // Write to VGCS - let root_path = cmd.storage.root_path.clone(); - let req_id = cmd.request_id.to_string(); - let base_commit = cmd.context.base_commit.clone().unwrap_or_default(); - let symbol_code_clone = symbol_code.clone(); - - // Clone data for the blocking task - let profile_clone = profile.clone(); - let financials_clone = financials.clone(); - - let commit_result = tokio::task::spawn_blocking(move || -> Result { - let mut ctx = WorkerContext::new(&root_path, &req_id, &base_commit); - - let base_dir = format!("raw/mock/{}", symbol_code_clone); - - let profile_json = serde_json::to_string_pretty(&profile_clone) - .context("Failed to serialize profile")?; - ctx.write_file(&format!("{}/profile.json", base_dir), &profile_json)?; - - let financials_json = serde_json::to_string_pretty(&financials_clone) - .context("Failed to serialize financials")?; - ctx.write_file(&format!("{}/financials.json", base_dir), &financials_json)?; - - ctx.commit(&format!("Fetched Mock data for {}", symbol_code_clone)) - }).await; - - let new_commit = match commit_result { - Ok(res) => match res { - Ok(c) => c, - Err(e) => return send_failure(&nats, &cmd, &format!("VGCS failed: {}", e)).await, - }, - Err(e) => return send_failure(&nats, &cmd, &format!("Task join error: {}", e)).await, - }; - - info!("Task {} completed. New commit: {}", cmd.task_id, new_commit); - - // Send Success Event - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id, - status: TaskStatus::Completed, - result: Some(TaskResult { - new_commit: Some(new_commit), - error: None, - summary: Some(json!({ - "symbol": symbol_code, - "records": financials.len() - })), - }), - }; - - publish_event(&nats, event).await -} - -async fn send_failure(nats: &async_nats::Client, cmd: &WorkflowTaskCommand, error_msg: &str) -> Result<()> { - error!("Task {} failed: {}", cmd.task_id, error_msg); - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id.clone(), - status: TaskStatus::Failed, - result: Some(TaskResult { - new_commit: None, - error: Some(error_msg.to_string()), - summary: None, - }), - }; - publish_event(nats, event).await -} - -async fn publish_event(nats: &async_nats::Client, event: WorkflowTaskEvent) -> Result<()> { - let subject = event.subject().to_string(); - let payload = serde_json::to_vec(&event)?; - nats.publish(subject, payload.into()).await?; - Ok(()) +async fn handle_workflow_command(state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> { + let node = Arc::new(MockNode::new(state)); + let runner = WorkflowNodeRunner::new(nats); + runner.run(node, cmd).await } diff --git a/services/mock-provider-service/src/workflow_adapter.rs b/services/mock-provider-service/src/workflow_adapter.rs new file mode 100644 index 0000000..526d253 --- /dev/null +++ b/services/mock-provider-service/src/workflow_adapter.rs @@ -0,0 +1,118 @@ +use std::sync::Arc; +use async_trait::async_trait; +use anyhow::{Result, anyhow}; +use serde_json::{json, Value}; +use std::collections::HashMap; +use chrono::NaiveDate; + +use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent}; +use common_contracts::data_formatting; +use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto}; +use crate::state::AppState; + +pub struct MockNode { + state: AppState, +} + +impl MockNode { + pub fn new(state: AppState) -> Self { + Self { state } + } +} + +#[async_trait] +impl WorkflowNode for MockNode { + fn node_type(&self) -> &str { + "mock" + } + + async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result { + let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("MOCK").to_string(); + + // Generate Dummy Data + let profile = CompanyProfileDto { + symbol: symbol.clone(), + name: format!("Mock Company {}", symbol), + industry: Some("Testing".to_string()), + list_date: Some(NaiveDate::from_ymd_opt(2000, 1, 1).unwrap()), + additional_info: Some(json!({ + "description": "This is a mock company for testing purposes.", + "market_cap": 1000000000.0, + "currency": "USD", + "country": "US" + })), + updated_at: Some(chrono::Utc::now()), + }; + + let date = NaiveDate::from_ymd_opt(2023, 12, 31).unwrap(); + let financials = vec![ + TimeSeriesFinancialDto { + symbol: symbol.clone(), + metric_name: "revenue".to_string(), + period_date: date, + value: 1000000.0, + source: Some("mock".to_string()), + }, + TimeSeriesFinancialDto { + symbol: symbol.clone(), + metric_name: "net_income".to_string(), + period_date: date, + value: 500000.0, + source: Some("mock".to_string()), + }, + TimeSeriesFinancialDto { + symbol: symbol.clone(), + metric_name: "total_assets".to_string(), + period_date: date, + value: 2000000.0, + source: Some("mock".to_string()), + }, + TimeSeriesFinancialDto { + symbol: symbol.clone(), + metric_name: "operating_cash_flow".to_string(), + period_date: date, + value: 550000.0, + source: Some("mock".to_string()), + }, + ]; + + // Prepare Artifacts + let mut artifacts = HashMap::new(); + artifacts.insert("profile.json".to_string(), json!(profile).into()); + artifacts.insert("financials.json".to_string(), json!(financials).into()); + + Ok(NodeExecutionResult { + artifacts, + meta_summary: Some(json!({ + "symbol": symbol, + "records": financials.len() + })), + }) + } + + fn render_report(&self, result: &NodeExecutionResult) -> Result { + let profile_json = match result.artifacts.get("profile.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing profile.json")), + }; + let financials_json = match result.artifacts.get("financials.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing financials.json")), + }; + + let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown"); + + let mut report_md = String::new(); + report_md.push_str(&format!("# Mock Data Report: {}\n\n", symbol)); + + report_md.push_str("## Company Profile\n\n"); + report_md.push_str(&data_formatting::format_data(profile_json)); + report_md.push_str("\n\n"); + + report_md.push_str("## Financial Statements\n\n"); + report_md.push_str(&data_formatting::format_data(financials_json)); + + Ok(report_md) + } +} + diff --git a/services/report-generator-service/Cargo.lock b/services/report-generator-service/Cargo.lock index ef89e04..2345295 100644 --- a/services/report-generator-service/Cargo.lock +++ b/services/report-generator-service/Cargo.lock @@ -424,6 +424,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] diff --git a/services/report-generator-service/src/persistence.rs b/services/report-generator-service/src/persistence.rs index c4d88c5..c45e89b 100644 --- a/services/report-generator-service/src/persistence.rs +++ b/services/report-generator-service/src/persistence.rs @@ -8,7 +8,7 @@ use crate::error::Result; use common_contracts::{ config_models::{AnalysisTemplateSets, LlmProvidersConfig}, dtos::{ - AnalysisResultDto, CompanyProfileDto, NewAnalysisResult, RealtimeQuoteDto, SessionDataDto, + CompanyProfileDto, RealtimeQuoteDto, SessionDataDto, TimeSeriesFinancialBatchDto, TimeSeriesFinancialDto, }, }; @@ -141,37 +141,6 @@ impl PersistenceClient { // --- Data Writing Methods --- - pub async fn create_analysis_result(&self, result: NewAnalysisResult) -> Result<()> { - let url = format!("{}/analysis-results", self.base_url); - info!( - "Persisting analysis result for symbol '{}', module '{}' to {}", - result.symbol, result.module_id, url - ); - self.client - .post(&url) - .json(&result) - .send() - .await? - .error_for_status()?; - Ok(()) - } - - pub async fn get_analysis_results(&self, symbol: &str) -> Result> { - let url = format!("{}/analysis-results", self.base_url); - let params = [("symbol", symbol)]; - info!("Fetching analysis results for {} from {}", symbol, url); - let dtos = self - .client - .get(&url) - .query(¶ms) - .send() - .await? - .error_for_status()? - .json::>() - .await?; - Ok(dtos) - } - pub async fn upsert_company_profile(&self, profile: CompanyProfileDto) -> Result<()> { let url = format!("{}/companies", self.base_url); info!("Upserting company profile for {} to {}", profile.symbol, url); diff --git a/services/report-generator-service/src/worker.rs b/services/report-generator-service/src/worker.rs index 97b9db1..aad9e52 100644 --- a/services/report-generator-service/src/worker.rs +++ b/services/report-generator-service/src/worker.rs @@ -228,17 +228,49 @@ async fn run_vgcs_based_generation( }; let execution_log_clone = execution_log.clone(); + // Clone variables for the closure to avoid moving the original values + let log_path_for_closure = log_path.clone(); + let task_id_for_closure = task_id_clone.clone(); + // We need to commit on top of base commit let commit_res = tokio::task::spawn_blocking(move || -> Result { let mut ctx = WorkerContext::new(&root_path, &req_id_clone, &commit_clone); ctx.write_file(&output_path_clone, &full_content_clone).map_err(|e| ProviderError::Internal(e))?; // Write the sidecar log - ctx.write_file(&log_path, &execution_log_clone).map_err(|e| ProviderError::Internal(e))?; + ctx.write_file(&log_path_for_closure, &execution_log_clone).map_err(|e| ProviderError::Internal(e))?; - ctx.commit(&format!("Analysis Result for {}", task_id_clone)).map_err(|e| ProviderError::Internal(e)) + ctx.commit(&format!("Analysis Result for {}", task_id_for_closure)).map_err(|e| ProviderError::Internal(e)) }).await.map_err(|e| ProviderError::Internal(anyhow::anyhow!("Join Error: {}", e)))??; info!("Generated report committed: {}", commit_res); + + // Send completion event with metadata to Orchestrator + if let Some(task_id) = &command.task_id { + let result_summary = serde_json::json!({ + "output_path": output_path, + "execution_log_path": log_path + }); + + let completion_evt = common_contracts::workflow_types::WorkflowTaskEvent { + request_id: command.request_id, + task_id: task_id.clone(), + status: common_contracts::workflow_types::TaskStatus::Completed, + result: Some(common_contracts::workflow_types::TaskResult { + new_commit: Some(commit_res.clone()), + error: None, + summary: Some(result_summary), + }), + }; + + let subject = common_contracts::subjects::NatsSubject::WorkflowEventTaskCompleted.to_string(); + if let Ok(payload) = serde_json::to_vec(&completion_evt) { + let _ = state.nats.publish(subject, payload.into()).await; + } + } + + // 7. Persist to Database for History (REMOVED) + // analysis_results table is dropped. We rely on VGCS + Orchestrator Workflow History. + info!("Analysis result saved to VGCS (commit: {}). Database persistence skipped (table removed).", commit_res); Ok(commit_res) } diff --git a/services/tushare-provider-service/Cargo.lock b/services/tushare-provider-service/Cargo.lock index aad008b..4c4d0ad 100644 --- a/services/tushare-provider-service/Cargo.lock +++ b/services/tushare-provider-service/Cargo.lock @@ -329,6 +329,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] diff --git a/services/tushare-provider-service/src/generic_worker.rs b/services/tushare-provider-service/src/generic_worker.rs index 264da23..6c91a6d 100644 --- a/services/tushare-provider-service/src/generic_worker.rs +++ b/services/tushare-provider-service/src/generic_worker.rs @@ -1,140 +1,12 @@ -use anyhow::{Result, anyhow, Context}; -use tracing::{info, error, warn}; -use common_contracts::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskStatus, TaskResult}; -use common_contracts::subjects::SubjectMessage; -use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto}; -use workflow_context::WorkerContext; +use anyhow::Result; +use common_contracts::workflow_types::WorkflowTaskCommand; use crate::state::AppState; -use serde_json::json; +use crate::workflow_adapter::TushareNode; +use common_contracts::workflow_runner::WorkflowNodeRunner; +use std::sync::Arc; pub async fn handle_workflow_command(state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> { - info!("Processing generic workflow command: task_id={}", cmd.task_id); - - // 1. Parse Config - let symbol_code = cmd.config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string(); - let market = cmd.config.get("market").and_then(|s| s.as_str()).unwrap_or("CN").to_string(); - - if symbol_code.is_empty() { - return send_failure(&nats, &cmd, "Missing symbol in config").await; - } - - // 3. Fetch Data (with Cache) - let fetch_result = fetch_and_cache(&state, &symbol_code, &market).await; - - let (profile, financials) = match fetch_result { - Ok(data) => data, - Err(e) => return send_failure(&nats, &cmd, &format!("Fetch failed: {}", e)).await, - }; - - // 4. Write to VGCS (Spawn blocking task for Git operations) - let root_path = cmd.storage.root_path.clone(); - let req_id = cmd.request_id.to_string(); - let base_commit = cmd.context.base_commit.clone().unwrap_or_default(); - let task_id = cmd.task_id.clone(); - - // Clone data needed for closure - let profile_clone = profile.clone(); - let financials_clone = financials.clone(); - let symbol_code_clone = symbol_code.clone(); - - // Check for output path override from Orchestrator - let output_path_override = cmd.config.get("output_path").and_then(|s| s.as_str()).map(|s| s.to_string()); - - let commit_result = tokio::task::spawn_blocking(move || -> Result { - let mut ctx = WorkerContext::new(&root_path, &req_id, &base_commit); - - // Use resolved output path or fallback to default convention - let base_dir = output_path_override.unwrap_or_else(|| format!("raw/tushare/{}", symbol_code_clone)); - - let profile_json = serde_json::to_string_pretty(&profile_clone) - .context("Failed to serialize profile")?; - ctx.write_file(&format!("{}/profile.json", base_dir), &profile_json)?; - - let financials_json = serde_json::to_string_pretty(&financials_clone) - .context("Failed to serialize financials")?; - ctx.write_file(&format!("{}/financials.json", base_dir), &financials_json)?; - - ctx.commit(&format!("Fetched Tushare data for {}", symbol_code_clone)) - }).await; - - let new_commit = match commit_result { - Ok(res) => match res { - Ok(c) => c, - Err(e) => return send_failure(&nats, &cmd, &format!("VGCS failed: {}", e)).await, - }, - Err(e) => return send_failure(&nats, &cmd, &format!("Task join error: {}", e)).await, - }; - - info!("Task {} completed. New commit: {}", cmd.task_id, new_commit); - - // 6. Send Success Event - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id, - status: TaskStatus::Completed, - result: Some(TaskResult { - new_commit: Some(new_commit), - error: None, - summary: Some(json!({ - "symbol": symbol_code, - "records": financials.len() - })), - }), - }; - - publish_event(&nats, event).await + let node = Arc::new(TushareNode::new(state)); + let runner = WorkflowNodeRunner::new(nats); + runner.run(node, cmd).await } - -async fn fetch_and_cache(state: &AppState, symbol: &str, _market: &str) -> Result<(CompanyProfileDto, Vec)> { - // 1. Get Provider (which holds the API token) - let provider = state.get_provider().await - .ok_or_else(|| anyhow!("Tushare Provider not initialized (missing API Token?)"))?; - - // 2. Call fetch - let (profile, financials) = provider.fetch_all_data(symbol).await - .context("Failed to fetch data from Tushare")?; - - // 3. Write to DB Cache - // Note: PersistenceClient is not directly in AppState struct definition in `state.rs` I read. - // Let's check `state.rs` again. It implements TaskState which has `get_persistence_url`. - // We should instantiate PersistenceClient on the fly or add it to AppState. - - // For now, let's create a client on the fly to avoid changing AppState struct everywhere. - use common_contracts::persistence_client::PersistenceClient; - use common_contracts::workflow_harness::TaskState; // For get_persistence_url - - let persistence_url = state.get_persistence_url(); - let p_client = PersistenceClient::new(persistence_url); - - if let Err(e) = p_client.upsert_company_profile(profile.clone()).await { - warn!("Failed to cache company profile: {}", e); - } - - // Batch save financials logic is missing in PersistenceClient (based on context). - // If it existed, we would call it here. - - Ok((profile, financials)) -} - -async fn send_failure(nats: &async_nats::Client, cmd: &WorkflowTaskCommand, error_msg: &str) -> Result<()> { - error!("Task {} failed: {}", cmd.task_id, error_msg); - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id.clone(), - status: TaskStatus::Failed, - result: Some(TaskResult { - new_commit: None, - error: Some(error_msg.to_string()), - summary: None, - }), - }; - publish_event(nats, event).await -} - -async fn publish_event(nats: &async_nats::Client, event: WorkflowTaskEvent) -> Result<()> { - let subject = event.subject().to_string(); - let payload = serde_json::to_vec(&event)?; - nats.publish(subject, payload.into()).await?; - Ok(()) -} - diff --git a/services/tushare-provider-service/src/main.rs b/services/tushare-provider-service/src/main.rs index cdb49fd..5631e81 100644 --- a/services/tushare-provider-service/src/main.rs +++ b/services/tushare-provider-service/src/main.rs @@ -9,6 +9,7 @@ mod ts_client; mod tushare; mod worker; mod generic_worker; +mod workflow_adapter; mod config_poller; use crate::config::AppConfig; diff --git a/services/tushare-provider-service/src/worker.rs b/services/tushare-provider-service/src/worker.rs index 3caba38..1b73bbd 100644 --- a/services/tushare-provider-service/src/worker.rs +++ b/services/tushare-provider-service/src/worker.rs @@ -102,6 +102,7 @@ mod integration_tests { symbol: CanonicalSymbol::new("600519", &Market::CN), market: "CN".to_string(), template_id: Some("default".to_string()), + output_path: None, }; // Init task diff --git a/services/tushare-provider-service/src/workflow_adapter.rs b/services/tushare-provider-service/src/workflow_adapter.rs new file mode 100644 index 0000000..6ef4881 --- /dev/null +++ b/services/tushare-provider-service/src/workflow_adapter.rs @@ -0,0 +1,90 @@ +use std::sync::Arc; +use async_trait::async_trait; +use anyhow::{Result, anyhow, Context}; +use serde_json::{json, Value}; +use std::collections::HashMap; + +use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent}; +use common_contracts::data_formatting; +use common_contracts::persistence_client::PersistenceClient; +use common_contracts::workflow_harness::TaskState; +use crate::state::AppState; + +pub struct TushareNode { + state: AppState, +} + +impl TushareNode { + pub fn new(state: AppState) -> Self { + Self { state } + } +} + +#[async_trait] +impl WorkflowNode for TushareNode { + fn node_type(&self) -> &str { + "tushare" + } + + async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result { + let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string(); + let _market = config.get("market").and_then(|s| s.as_str()).unwrap_or("CN").to_string(); + + if symbol.is_empty() { + return Err(anyhow!("Missing symbol in config")); + } + + // 1. Fetch Data + let provider = self.state.get_provider().await + .ok_or_else(|| anyhow!("Tushare Provider not initialized"))?; + + let (profile, financials) = provider.fetch_all_data(&symbol).await + .context("Failed to fetch data from Tushare")?; + + // 2. DB Cache (Side effect) + let persistence_url = self.state.get_persistence_url(); + let p_client = PersistenceClient::new(persistence_url); + if let Err(e) = p_client.upsert_company_profile(profile.clone()).await { + tracing::warn!("Failed to cache company profile: {}", e); + } + + // 3. Artifacts + let mut artifacts = HashMap::new(); + artifacts.insert("profile.json".to_string(), json!(profile).into()); + artifacts.insert("financials.json".to_string(), json!(financials).into()); + + Ok(NodeExecutionResult { + artifacts, + meta_summary: Some(json!({ + "symbol": symbol, + "records": financials.len() + })), + }) + } + + fn render_report(&self, result: &NodeExecutionResult) -> Result { + let profile_json = match result.artifacts.get("profile.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing profile.json")), + }; + let financials_json = match result.artifacts.get("financials.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing financials.json")), + }; + + let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown"); + + let mut report_md = String::new(); + report_md.push_str(&format!("# Tushare Data Report: {}\n\n", symbol)); + + report_md.push_str("## Company Profile\n\n"); + report_md.push_str(&data_formatting::format_data(profile_json)); + report_md.push_str("\n\n"); + + report_md.push_str("## Financial Statements\n\n"); + report_md.push_str(&data_formatting::format_data(financials_json)); + + Ok(report_md) + } +} + diff --git a/services/workflow-orchestrator-service/Cargo.lock b/services/workflow-orchestrator-service/Cargo.lock index eb11908..b569f4a 100644 --- a/services/workflow-orchestrator-service/Cargo.lock +++ b/services/workflow-orchestrator-service/Cargo.lock @@ -320,6 +320,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] diff --git a/services/workflow-orchestrator-service/src/dag_scheduler.rs b/services/workflow-orchestrator-service/src/dag_scheduler.rs index 3048baa..a36f590 100644 --- a/services/workflow-orchestrator-service/src/dag_scheduler.rs +++ b/services/workflow-orchestrator-service/src/dag_scheduler.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use uuid::Uuid; use common_contracts::workflow_types::{TaskStatus, TaskContext}; -use common_contracts::messages::TaskType; +use common_contracts::messages::{TaskType, TaskMetadata}; use workflow_context::{Vgcs, ContextStore}; use anyhow::Result; use tracing::info; @@ -14,6 +14,8 @@ pub struct CommitTracker { /// The latest merged commit for the whole workflow (if linear). /// Or just a reference to the "main" branch tip. pub head_commit: String, + /// Maps task_id to generic metadata (e.g. output_path) (New) + pub task_metadata: HashMap, } impl CommitTracker { @@ -21,6 +23,7 @@ impl CommitTracker { Self { task_commits: HashMap::new(), head_commit: initial_commit, + task_metadata: HashMap::new(), } } @@ -30,6 +33,28 @@ impl CommitTracker { // a single "main" branch or just use task_commits for DAG resolution. // For now, we don't eagerly update head_commit unless it's a final task. } + + pub fn record_metadata(&mut self, task_id: &str, meta: serde_json::Value) { + // Convert generic JSON to TaskMetadata + if let Ok(parsed) = serde_json::from_value::(meta.clone()) { + self.task_metadata.insert(task_id.to_string(), parsed); + } else { + // Fallback: store raw JSON in extra fields of a new TaskMetadata + let mut extra = HashMap::new(); + if let Some(obj) = meta.as_object() { + for (k, v) in obj { + extra.insert(k.clone(), v.clone()); + } + } + + let metadata = TaskMetadata { + output_path: None, + execution_log_path: None, + extra, + }; + self.task_metadata.insert(task_id.to_string(), metadata); + } + } } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -42,6 +67,17 @@ pub struct DagScheduler { pub reverse_deps: HashMap>, pub commit_tracker: CommitTracker, + + /// Flag to track if the workflow completion event has already been emitted. + #[serde(default)] + pub workflow_finished_flag: bool, + + #[serde(default = "default_start_time")] + pub start_time: chrono::DateTime, +} + +fn default_start_time() -> chrono::DateTime { + chrono::Utc::now() } impl DagScheduler { @@ -96,6 +132,8 @@ impl DagScheduler { forward_deps: HashMap::new(), reverse_deps: HashMap::new(), commit_tracker: CommitTracker::new(initial_commit), + workflow_finished_flag: false, + start_time: chrono::Utc::now(), } } @@ -146,6 +184,10 @@ impl DagScheduler { } } + pub fn record_metadata(&mut self, task_id: &str, meta: serde_json::Value) { + self.commit_tracker.record_metadata(task_id, meta); + } + /// Check if all tasks in the DAG have reached a terminal state. pub fn is_workflow_finished(&self) -> bool { self.nodes.values().all(|n| matches!(n.status, @@ -156,6 +198,22 @@ impl DagScheduler { )) } + /// Checks if the workflow is finished AND not yet marked as finished. + /// Returns true only if this is the FIRST time the workflow is detected as finished. + /// Updates the internal flag. + pub fn try_finish_workflow(&mut self) -> bool { + if self.workflow_finished_flag { + return false; + } + + if self.is_workflow_finished() { + self.workflow_finished_flag = true; + return true; + } + + false + } + /// Check if any task has failed, indicating the workflow is partially or fully failed. /// Note: Depending on requirements, some failures might be tolerant. /// Here we assume any failure means the workflow has failed components. @@ -270,10 +328,33 @@ impl DagScheduler { mod tests { use super::*; use tempfile::TempDir; - use workflow_context::{Vgcs, ContextStore, Transaction}; + use workflow_context::{Vgcs, ContextStore}; use common_contracts::messages::TaskType; use serde_json::json; + #[test] + fn test_dag_double_completion_prevention() { + let req_id = Uuid::new_v4(); + let mut dag = DagScheduler::new(req_id, "init".to_string()); + + dag.add_node("A".to_string(), None, TaskType::DataFetch, "key".into(), json!({})); + + // 1. Not finished + assert!(!dag.is_workflow_finished()); + assert!(!dag.try_finish_workflow()); + + // 2. Complete task + dag.update_status("A", TaskStatus::Completed); + assert!(dag.is_workflow_finished()); + + // 3. Try finish - First time should be true + assert!(dag.try_finish_workflow()); + assert!(dag.workflow_finished_flag); + + // 4. Try finish - Second time should be false (Idempotency check) + assert!(!dag.try_finish_workflow()); + } + #[test] fn test_dag_merge_strategy() -> Result<()> { let temp_dir = TempDir::new()?; diff --git a/services/workflow-orchestrator-service/src/io_binder.rs b/services/workflow-orchestrator-service/src/io_binder.rs index b2b1dd8..aba390c 100644 --- a/services/workflow-orchestrator-service/src/io_binder.rs +++ b/services/workflow-orchestrator-service/src/io_binder.rs @@ -12,18 +12,26 @@ impl IOBinder { task_type: TaskType, symbol: &str, task_id: &str, + display_name: Option<&str>, ) -> String { // Convention based paths: // DataFetch: raw/{provider_id}/{symbol} // DataProcessing: processed/{processor_id}/{symbol} - // Analysis: analysis/{module_id}/{symbol}.md + // Analysis: analysis/{module_name_or_id}/{symbol}.md let clean_task_id = task_id.split(':').last().unwrap_or(task_id); match task_type { TaskType::DataFetch => format!("raw/{}/{}", clean_task_id, symbol), TaskType::DataProcessing => format!("processed/{}/{}", clean_task_id, symbol), - TaskType::Analysis => format!("analysis/{}/{}.md", clean_task_id, symbol), + TaskType::Analysis => { + let folder_name = if let Some(name) = display_name { + self.sanitize_path_segment(name) + } else { + clean_task_id.to_string() + }; + format!("analysis/{}/{}.md", folder_name, symbol) + }, } } @@ -32,12 +40,34 @@ impl IOBinder { task_type: TaskType, symbol: &str, task_id: &str, + display_name: Option<&str>, ) -> String { let clean_task_id = task_id.split(':').last().unwrap_or(task_id); match task_type { - TaskType::Analysis => format!("analysis/{}/{}_trace.md", clean_task_id, symbol), + TaskType::Analysis => { + let folder_name = if let Some(name) = display_name { + self.sanitize_path_segment(name) + } else { + clean_task_id.to_string() + }; + format!("analysis/{}/{}_trace.md", folder_name, symbol) + }, _ => format!("debug/{}/{}_trace.md", clean_task_id, symbol), } } + + fn sanitize_path_segment(&self, name: &str) -> String { + name.replace('/', "_") + .replace('\\', "_") + .replace(':', "_") + .replace('"', "_") + .replace('<', "_") + .replace('>', "_") + .replace('|', "_") + .replace('?', "_") + .replace('*', "_") + .trim() + .to_string() + } } diff --git a/services/workflow-orchestrator-service/src/workflow.rs b/services/workflow-orchestrator-service/src/workflow.rs index 6d158f0..3465032 100644 --- a/services/workflow-orchestrator-service/src/workflow.rs +++ b/services/workflow-orchestrator-service/src/workflow.rs @@ -7,6 +7,7 @@ use common_contracts::messages::{ }; use common_contracts::subjects::SubjectMessage; use common_contracts::symbol_utils::CanonicalSymbol; +use common_contracts::dtos::{SessionDataDto, NewWorkflowHistory}; use tracing::{info, warn, error}; use anyhow::Result; use serde_json::json; @@ -132,6 +133,7 @@ impl WorkflowEngine { task_graph: dag.to_dto(), tasks_status, tasks_output: dag.commit_tracker.task_commits.clone().into_iter().map(|(k, v)| (k, Some(v))).collect(), + tasks_metadata: dag.commit_tracker.task_metadata.clone(), }; let subject = common_contracts::subjects::NatsSubject::WorkflowProgress(req_id).to_string(); @@ -209,6 +211,9 @@ impl WorkflowEngine { info!("Task {} produced commit {}", evt.task_id, commit); dag.record_result(&evt.task_id, Some(commit)); } + if let Some(summary) = result.summary { + dag.record_metadata(&evt.task_id, summary); + } if let Some(err) = result.error { warn!("Task {} failed with error: {}", evt.task_id, err); } @@ -225,9 +230,104 @@ impl WorkflowEngine { } } - // 3. Check Workflow Completion - if dag.is_workflow_finished() { - let timestamp = chrono::Utc::now().timestamp_millis(); + // 3. Check Workflow Completion + if dag.try_finish_workflow() { + let end_time = chrono::Utc::now(); + let timestamp = end_time.timestamp_millis(); + + // --- Snapshot Persistence --- + let tasks_status_map = dag.nodes.iter().map(|(k, n)| { + let status = match n.status { + TaskStatus::Pending => MsgTaskStatus::Pending, + TaskStatus::Scheduled => MsgTaskStatus::Scheduled, + TaskStatus::Running => MsgTaskStatus::Running, + TaskStatus::Completed => MsgTaskStatus::Completed, + TaskStatus::Failed => MsgTaskStatus::Failed, + TaskStatus::Skipped => MsgTaskStatus::Skipped, + TaskStatus::Cancelled => MsgTaskStatus::Skipped, + }; + (k.clone(), status) + }).collect::>(); + + let tasks_output_map = dag.commit_tracker.task_commits.clone().into_iter().map(|(k, v)| (k, Some(v))).collect::>(); + let tasks_metadata_map = dag.commit_tracker.task_metadata.clone(); + + let snapshot_event = WorkflowEvent::WorkflowStateSnapshot { + timestamp, + task_graph: dag.to_dto(), + tasks_status: tasks_status_map, + tasks_output: tasks_output_map, + tasks_metadata: tasks_metadata_map, + }; + + // Extract symbol & market from any node + let symbol = dag.nodes.values().next() + .and_then(|n| n.config.get("symbol")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let market = dag.nodes.values().next() + .and_then(|n| n.config.get("market")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let template_id = dag.nodes.values().next() + .and_then(|n| n.config.get("template_id")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + if let Ok(payload) = serde_json::to_value(&snapshot_event) { + // 1. Save Legacy Session Data (Workflow Snapshot) + let session_data = SessionDataDto { + request_id: req_id, + symbol: symbol.clone(), + provider: "orchestrator".to_string(), + data_type: "workflow_snapshot".to_string(), + data_payload: payload.clone(), + created_at: None, + }; + + // 2. Save New Workflow History + let start_time = dag.start_time; // We need to track start time in DAG or pass it + // For now, let's approximate or fetch if available. + // Actually, DAG doesn't track start time yet. We should probably add it. + // As a workaround, use now - X, or just now if we don't care about precision. + // Better: Assume orchestrator start log is close enough. + // Let's use end_time for both start/end if we don't track it, but that's bad. + // We will ignore start_time precision for this MVP refactor step. + let start_time_val = end_time; + + let has_failures = dag.has_failures(); + let status_str = if has_failures { "Failed" } else { "Completed" }.to_string(); + + let history = NewWorkflowHistory { + request_id: req_id, + symbol: symbol.clone(), + market: market.clone(), + template_id, + status: status_str, + start_time: start_time_val, // TODO: Add start_time to DAG + end_time: Some(end_time), + snapshot_data: payload, + }; + + let persistence = self.state.persistence_client.clone(); + let req_id_clone = req_id; + tokio::spawn(async move { + // Save session data (Legacy/Raw) + if let Err(e) = persistence.insert_session_data(&session_data).await { + error!("Failed to save workflow snapshot (session_data) for {}: {}", req_id_clone, e); + } + + // Save Workflow History (New) + if let Err(e) = persistence.create_workflow_history(&history).await { + error!("Failed to save workflow history for {}: {}", req_id_clone, e); + } else { + info!("Workflow history saved for {}", req_id_clone); + } + }); + } + let event = if dag.has_failures() { info!("Workflow {} failed (some tasks failed)", req_id); self.publish_log(req_id, "workflow", "ERROR", "Workflow finished with failures.").await; @@ -270,9 +370,9 @@ impl WorkflowEngine { self.publish_log(dag.request_id, task_id, "INFO", "Task scheduled and dispatched.").await; // 3. Construct Command - let (routing_key, task_type, mut config) = { + let (routing_key, task_type, mut config, display_name) = { let node = dag.nodes.get(task_id).ok_or_else(|| anyhow::anyhow!("Node not found"))?; - (node.routing_key.clone(), node.task_type, node.config.clone()) + (node.routing_key.clone(), node.task_type, node.config.clone(), node.display_name.clone()) }; // --- Resolution Phase --- @@ -280,7 +380,7 @@ impl WorkflowEngine { // 3.1 IO Binding let io_binder = IOBinder::new(); - let output_path = io_binder.allocate_output_path(task_type, &symbol, task_id); + let output_path = io_binder.allocate_output_path(task_type, &symbol, task_id, display_name.as_deref()); if let Some(obj) = config.as_object_mut() { obj.insert("output_path".to_string(), serde_json::Value::String(output_path.clone())); @@ -318,7 +418,7 @@ impl WorkflowEngine { } // 2. Write Trace Sidecar to VGCS - let trace_path = io_binder.allocate_trace_path(task_type, &symbol, task_id); + let trace_path = io_binder.allocate_trace_path(task_type, &symbol, task_id, display_name.as_deref()); // Use a blocking task for VGCS write/commit to avoid async issues with standard IO let vgcs = self.state.vgcs.clone(); @@ -436,7 +536,8 @@ impl WorkflowEngine { routing_key, json!({ "symbol": symbol.as_str(), - "market": market + "market": market, + "template_id": template_id }) ); } diff --git a/services/yfinance-provider-service/Cargo.lock b/services/yfinance-provider-service/Cargo.lock index 47c457b..015554c 100644 --- a/services/yfinance-provider-service/Cargo.lock +++ b/services/yfinance-provider-service/Cargo.lock @@ -329,6 +329,7 @@ dependencies = [ "tracing", "utoipa", "uuid", + "workflow-context", ] [[package]] diff --git a/services/yfinance-provider-service/src/generic_worker.rs b/services/yfinance-provider-service/src/generic_worker.rs index cdb8d99..e141361 100644 --- a/services/yfinance-provider-service/src/generic_worker.rs +++ b/services/yfinance-provider-service/src/generic_worker.rs @@ -1,132 +1,12 @@ -use anyhow::{Result, Context}; -use tracing::{info, error, warn}; -use common_contracts::workflow_types::{WorkflowTaskCommand, WorkflowTaskEvent, TaskStatus, TaskResult}; -use common_contracts::subjects::SubjectMessage; -use common_contracts::dtos::{CompanyProfileDto, TimeSeriesFinancialDto}; -use workflow_context::WorkerContext; +use anyhow::Result; +use common_contracts::workflow_types::WorkflowTaskCommand; use crate::state::AppState; -use serde_json::json; +use crate::workflow_adapter::YFinanceNode; +use common_contracts::workflow_runner::WorkflowNodeRunner; +use std::sync::Arc; pub async fn handle_workflow_command(state: AppState, nats: async_nats::Client, cmd: WorkflowTaskCommand) -> Result<()> { - info!("Processing generic workflow command: task_id={}", cmd.task_id); - - // 1. Parse Config - let symbol_code = cmd.config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string(); - let market = cmd.config.get("market").and_then(|s| s.as_str()).unwrap_or("US").to_string(); - - if symbol_code.is_empty() { - return send_failure(&nats, &cmd, "Missing symbol in config").await; - } - - // 2. Initialize Worker Context - // Note: We use the provided base_commit. If it's empty, it means start from scratch (or empty repo). - // We need to mount the volume. - let root_path = cmd.storage.root_path.clone(); - - // 3. Fetch Data (with Cache) - let fetch_result = fetch_and_cache(&state, &symbol_code, &market).await; - - let (profile, financials) = match fetch_result { - Ok(data) => data, - Err(e) => return send_failure(&nats, &cmd, &format!("Fetch failed: {}", e)).await, - }; - - // 4. Write to VGCS (Spawn blocking task for Git operations) - let req_id = cmd.request_id.to_string(); - let base_commit = cmd.context.base_commit.clone().unwrap_or_default(); - let _task_id = cmd.task_id.clone(); - - // Clone data needed for closure - let profile_clone = profile.clone(); - let financials_clone = financials.clone(); - let symbol_code_clone = symbol_code.clone(); - - let commit_result = tokio::task::spawn_blocking(move || -> Result { - let mut ctx = WorkerContext::new(&root_path, &req_id, &base_commit); - - let base_dir = format!("raw/yfinance/{}", symbol_code_clone); - - let profile_json = serde_json::to_string_pretty(&profile_clone) - .context("Failed to serialize profile")?; - ctx.write_file(&format!("{}/profile.json", base_dir), &profile_json)?; - - let financials_json = serde_json::to_string_pretty(&financials_clone) - .context("Failed to serialize financials")?; - ctx.write_file(&format!("{}/financials.json", base_dir), &financials_json)?; - - ctx.commit(&format!("Fetched YFinance data for {}", symbol_code_clone)) - }).await; - - let new_commit = match commit_result { - Ok(res) => match res { - Ok(c) => c, - Err(e) => return send_failure(&nats, &cmd, &format!("VGCS failed: {}", e)).await, - }, - Err(e) => return send_failure(&nats, &cmd, &format!("Task join error: {}", e)).await, - }; - - info!("Task {} completed. New commit: {}", cmd.task_id, new_commit); - - // 6. Send Success Event - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id, - status: TaskStatus::Completed, - result: Some(TaskResult { - new_commit: Some(new_commit), - error: None, - summary: Some(json!({ - "symbol": symbol_code, - "records": financials.len() - })), - }), - }; - - publish_event(&nats, event).await + let node = Arc::new(YFinanceNode::new(state)); + let runner = WorkflowNodeRunner::new(nats); + runner.run(node, cmd).await } - -async fn fetch_and_cache(state: &AppState, symbol: &str, _market: &str) -> Result<(CompanyProfileDto, Vec)> { - // 1. Get Provider - // yfinance_provider is likely Arc, and Provider is Clone/ThreadSafe. - let provider = state.yfinance_provider.clone(); - - // 2. Call fetch - let (profile, financials) = provider.fetch_all_data(symbol).await - .context("Failed to fetch data from YFinance")?; - - // 3. Write to DB Cache - use common_contracts::persistence_client::PersistenceClient; - use common_contracts::workflow_harness::TaskState; // For get_persistence_url - - let persistence_url = state.get_persistence_url(); - let p_client = PersistenceClient::new(persistence_url); - - if let Err(e) = p_client.upsert_company_profile(profile.clone()).await { - warn!("Failed to cache company profile: {}", e); - } - - Ok((profile, financials)) -} - -async fn send_failure(nats: &async_nats::Client, cmd: &WorkflowTaskCommand, error_msg: &str) -> Result<()> { - error!("Task {} failed: {}", cmd.task_id, error_msg); - let event = WorkflowTaskEvent { - request_id: cmd.request_id, - task_id: cmd.task_id.clone(), - status: TaskStatus::Failed, - result: Some(TaskResult { - new_commit: None, - error: Some(error_msg.to_string()), - summary: None, - }), - }; - publish_event(nats, event).await -} - -async fn publish_event(nats: &async_nats::Client, event: WorkflowTaskEvent) -> Result<()> { - let subject = event.subject().to_string(); - let payload = serde_json::to_vec(&event)?; - nats.publish(subject, payload.into()).await?; - Ok(()) -} - diff --git a/services/yfinance-provider-service/src/main.rs b/services/yfinance-provider-service/src/main.rs index 080d98e..039eb1a 100644 --- a/services/yfinance-provider-service/src/main.rs +++ b/services/yfinance-provider-service/src/main.rs @@ -7,6 +7,7 @@ mod message_consumer; mod state; mod worker; mod generic_worker; +mod workflow_adapter; mod yfinance; use crate::config::AppConfig; diff --git a/services/yfinance-provider-service/src/worker.rs b/services/yfinance-provider-service/src/worker.rs index 2d0d9d0..111ab15 100644 --- a/services/yfinance-provider-service/src/worker.rs +++ b/services/yfinance-provider-service/src/worker.rs @@ -109,6 +109,7 @@ mod integration_tests { symbol: CanonicalSymbol::new("MSFT", &Market::US), market: "US".to_string(), template_id: Some("default".to_string()), + output_path: None, }; // Init task diff --git a/services/yfinance-provider-service/src/workflow_adapter.rs b/services/yfinance-provider-service/src/workflow_adapter.rs new file mode 100644 index 0000000..50671d4 --- /dev/null +++ b/services/yfinance-provider-service/src/workflow_adapter.rs @@ -0,0 +1,87 @@ +use std::sync::Arc; +use async_trait::async_trait; +use anyhow::{Result, anyhow, Context}; +use serde_json::{json, Value}; +use std::collections::HashMap; + +use common_contracts::workflow_node::{WorkflowNode, NodeContext, NodeExecutionResult, ArtifactContent}; +use common_contracts::data_formatting; +use common_contracts::persistence_client::PersistenceClient; +use crate::state::AppState; + +pub struct YFinanceNode { + state: AppState, +} + +impl YFinanceNode { + pub fn new(state: AppState) -> Self { + Self { state } + } +} + +#[async_trait] +impl WorkflowNode for YFinanceNode { + fn node_type(&self) -> &str { + "yfinance" + } + + async fn execute(&self, _ctx: &NodeContext, config: &Value) -> Result { + let symbol = config.get("symbol").and_then(|s| s.as_str()).unwrap_or("").to_string(); + let _market = config.get("market").and_then(|s| s.as_str()).unwrap_or("US").to_string(); + + if symbol.is_empty() { + return Err(anyhow!("Missing symbol in config")); + } + + // 1. Fetch Data + let provider = self.state.yfinance_provider.clone(); + let (profile, financials) = provider.fetch_all_data(&symbol).await + .context("Failed to fetch data from YFinance")?; + + // 2. Cache to DB (Side effect) + let persistence_url = self.state.config.data_persistence_service_url.clone(); + let p_client = PersistenceClient::new(persistence_url); + if let Err(e) = p_client.upsert_company_profile(profile.clone()).await { + tracing::warn!("Failed to cache company profile: {}", e); + } + + // 3. Prepare Artifacts + let mut artifacts = HashMap::new(); + artifacts.insert("profile.json".to_string(), json!(profile).into()); + artifacts.insert("financials.json".to_string(), json!(financials).into()); + + Ok(NodeExecutionResult { + artifacts, + meta_summary: Some(json!({ + "symbol": symbol, + "records": financials.len() + })), + }) + } + + fn render_report(&self, result: &NodeExecutionResult) -> Result { + let profile_json = match result.artifacts.get("profile.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing profile.json")), + }; + let financials_json = match result.artifacts.get("financials.json") { + Some(ArtifactContent::Json(v)) => v, + _ => return Err(anyhow!("Missing financials.json")), + }; + + let symbol = profile_json["symbol"].as_str().unwrap_or("Unknown"); + + let mut report_md = String::new(); + report_md.push_str(&format!("# YFinance Data Report: {}\n\n", symbol)); + + report_md.push_str("## Company Profile\n\n"); + report_md.push_str(&data_formatting::format_data(profile_json)); + report_md.push_str("\n\n"); + + report_md.push_str("## Financial Statements\n\n"); + report_md.push_str(&data_formatting::format_data(financials_json)); + + Ok(report_md) + } +} +