Compare commits
20 Commits
d28f3c5266
...
70b30b39d8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70b30b39d8 | ||
|
|
eee1eb8b3f | ||
|
|
0c975bb8f1 | ||
|
|
03b53aed71 | ||
|
|
91a6dfc4c1 | ||
|
|
b90388b76e | ||
|
|
fbfb820853 | ||
|
|
b8eab4dfd5 | ||
|
|
a68a95338b | ||
|
|
a59b994a92 | ||
|
|
ca1eddd244 | ||
|
|
efd2c42775 | ||
|
|
fcadb1ff6a | ||
|
|
48e45faffb | ||
|
|
70b1a27978 | ||
|
|
b43817919d | ||
|
|
b41eaf8b99 | ||
|
|
4881ac8603 | ||
|
|
6c880f51dd | ||
|
|
0cb31e363e |
@ -21,7 +21,10 @@ __pycache__
|
||||
*.pyc
|
||||
|
||||
# Large reference/resources not needed in images
|
||||
ref/
|
||||
# ref/ is usually ignored, but we need service_kit_mirror for build context
|
||||
# We use exclusion pattern (!) to allow specific subdirectories
|
||||
ref/*
|
||||
!ref/service_kit_mirror
|
||||
archive/
|
||||
docs/
|
||||
|
||||
|
||||
6
.tiltignore
Normal file
@ -0,0 +1,6 @@
|
||||
# Ignore Rust source changes to prevent Tilt from rebuilding/restarting containers.
|
||||
# We rely on cargo-watch inside the container for hot reload (via volume mounts).
|
||||
**/*.rs
|
||||
**/Cargo.toml
|
||||
**/Cargo.lock
|
||||
|
||||
1403
services/report-generator-service/Cargo.lock → Cargo.lock
generated
33
Cargo.toml
Normal file
@ -0,0 +1,33 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"services/alphavantage-provider-service",
|
||||
"services/api-gateway",
|
||||
"services/common-contracts",
|
||||
"services/data-persistence-service",
|
||||
"services/finnhub-provider-service",
|
||||
"services/mock-provider-service",
|
||||
"services/report-generator-service",
|
||||
"services/tushare-provider-service",
|
||||
"services/workflow-orchestrator-service",
|
||||
"services/yfinance-provider-service",
|
||||
"crates/workflow-context",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
version = "0.1.0"
|
||||
authors = ["Lv, Qi <lvsoft@gmail.com>"]
|
||||
license = "MIT"
|
||||
repository = "https://github.com/lvsoft/Fundamental_Analysis"
|
||||
homepage = "https://github.com/lvsoft/Fundamental_Analysis"
|
||||
readme = "README.md"
|
||||
|
||||
[workspace.dependencies]
|
||||
rmcp = "0.9.1"
|
||||
rmcp-macros = "0.9.1"
|
||||
|
||||
[patch.crates-io]
|
||||
service_kit = { path = "ref/service_kit_mirror/service_kit/service_kit" }
|
||||
service-kit-macros = { path = "ref/service_kit_mirror/service_kit/service_kit/service-kit-macros" }
|
||||
|
||||
23
crates/workflow-context/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "workflow-context"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "workflow_context"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
git2 = { version = "0.18", features = ["vendored-openssl"] }
|
||||
sha2 = "0.10"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1.0"
|
||||
hex = "0.4"
|
||||
walkdir = "2.3"
|
||||
regex = "1.10"
|
||||
globset = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.8"
|
||||
320
crates/workflow-context/src/docos.rs
Normal file
@ -0,0 +1,320 @@
|
||||
use anyhow::{Result, anyhow, Context};
|
||||
use std::io::Read;
|
||||
use std::sync::Arc;
|
||||
use regex::Regex;
|
||||
use crate::types::{DocNode, DocNodeKind, EntryKind};
|
||||
use crate::traits::{ContextStore, Transaction};
|
||||
|
||||
pub trait DocManager {
|
||||
/// Reload state based on the latest Commit
|
||||
fn reload(&mut self, commit_hash: &str) -> Result<()>;
|
||||
|
||||
/// Get the current document tree outline
|
||||
fn get_outline(&self) -> Result<DocNode>;
|
||||
|
||||
/// Read node content
|
||||
fn read_content(&self, path: &str) -> Result<String>;
|
||||
|
||||
/// Write content (Upsert)
|
||||
fn write_content(&mut self, path: &str, content: &str) -> Result<()>;
|
||||
|
||||
/// Insert subsection (Implies Promotion)
|
||||
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()>;
|
||||
|
||||
/// Demote Composite to Leaf (Aggregation)
|
||||
fn demote(&mut self, path: &str) -> Result<()>;
|
||||
|
||||
/// Commit changes
|
||||
fn save(&mut self, message: &str) -> Result<String>;
|
||||
}
|
||||
|
||||
pub struct DocOS<S: ContextStore> {
|
||||
store: Arc<S>,
|
||||
req_id: String,
|
||||
commit_hash: String,
|
||||
transaction: Option<Box<dyn Transaction>>,
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocOS<S> {
|
||||
pub fn new(store: Arc<S>, req_id: &str, commit_hash: &str) -> Self {
|
||||
Self {
|
||||
store,
|
||||
req_id: req_id.to_string(),
|
||||
commit_hash: commit_hash.to_string(),
|
||||
transaction: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_transaction(&mut self) -> Result<&mut Box<dyn Transaction>> {
|
||||
if self.transaction.is_none() {
|
||||
let tx = self.store.begin_transaction(&self.req_id, &self.commit_hash)?;
|
||||
self.transaction = Some(tx);
|
||||
}
|
||||
Ok(self.transaction.as_mut().unwrap())
|
||||
}
|
||||
|
||||
fn is_leaf(&self, path: &str) -> Result<bool> {
|
||||
match self.store.read_file(&self.req_id, &self.commit_hash, path) {
|
||||
Ok(_) => Ok(true),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_composite(&self, path: &str) -> Result<bool> {
|
||||
match self.store.list_dir(&self.req_id, &self.commit_hash, path) {
|
||||
Ok(_) => Ok(true),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse Markdown headers to find subsections
|
||||
fn parse_markdown_headers(&self, content: &str) -> Vec<DocNode> {
|
||||
let re = Regex::new(r"(?m)^(#{1,6})\s+(.+)").unwrap();
|
||||
let mut sections = Vec::new();
|
||||
|
||||
for cap in re.captures_iter(content) {
|
||||
let _level = cap[1].len();
|
||||
let name = cap[2].trim().to_string();
|
||||
|
||||
// Simplified logic: All headers are children of the file node
|
||||
// In a real rich outline, we would build a tree based on level.
|
||||
// For this MVP, we treat found sections as direct children in the outline view.
|
||||
sections.push(DocNode {
|
||||
name: name.clone(),
|
||||
path: "".to_string(), // Virtual path, no direct file address
|
||||
kind: DocNodeKind::Section,
|
||||
children: vec![],
|
||||
});
|
||||
}
|
||||
sections
|
||||
}
|
||||
|
||||
fn build_node(&self, name: String, path: String, kind: DocNodeKind) -> Result<DocNode> {
|
||||
let mut node = DocNode {
|
||||
name,
|
||||
path: path.clone(),
|
||||
kind: kind.clone(),
|
||||
children: vec![],
|
||||
};
|
||||
|
||||
match kind {
|
||||
DocNodeKind::Composite => {
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, &path)?;
|
||||
|
||||
// 1. Process index.md first if exists (content of this composite node)
|
||||
let mut index_content = String::new();
|
||||
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &format!("{}/index.md", path)) {
|
||||
reader.read_to_string(&mut index_content).unwrap_or_default();
|
||||
let sections = self.parse_markdown_headers(&index_content);
|
||||
node.children.extend(sections);
|
||||
}
|
||||
|
||||
// 2. Process children files/dirs
|
||||
let mut children_nodes = Vec::new();
|
||||
for entry in entries {
|
||||
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let child_path = if path == "/" {
|
||||
entry.name.clone()
|
||||
} else {
|
||||
format!("{}/{}", path, entry.name)
|
||||
};
|
||||
|
||||
let child_kind = match entry.kind {
|
||||
EntryKind::Dir => DocNodeKind::Composite,
|
||||
EntryKind::File => DocNodeKind::Leaf,
|
||||
};
|
||||
|
||||
let child_node = self.build_node(entry.name, child_path, child_kind)?;
|
||||
children_nodes.push(child_node);
|
||||
}
|
||||
// Sort children by name (simple default)
|
||||
children_nodes.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
node.children.extend(children_nodes);
|
||||
}
|
||||
DocNodeKind::Leaf => {
|
||||
// Parse content for sections
|
||||
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &path) {
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content).unwrap_or_default();
|
||||
let sections = self.parse_markdown_headers(&content);
|
||||
node.children.extend(sections);
|
||||
}
|
||||
}
|
||||
DocNodeKind::Section => {
|
||||
// Sections don't have children in this simplified view
|
||||
}
|
||||
}
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocManager for DocOS<S> {
|
||||
fn reload(&mut self, commit_hash: &str) -> Result<()> {
|
||||
self.commit_hash = commit_hash.to_string();
|
||||
self.transaction = None;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_outline(&self) -> Result<DocNode> {
|
||||
self.build_node("Root".to_string(), "/".to_string(), DocNodeKind::Composite)
|
||||
}
|
||||
|
||||
fn read_content(&self, path: &str) -> Result<String> {
|
||||
let target_path = if path == "/" {
|
||||
"index.md".to_string()
|
||||
} else if self.is_composite(path)? {
|
||||
format!("{}/index.md", path)
|
||||
} else {
|
||||
path.to_string()
|
||||
};
|
||||
|
||||
let mut reader = self.store.read_file(&self.req_id, &self.commit_hash, &target_path)
|
||||
.context("Failed to read content")?;
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content)?;
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
fn write_content(&mut self, path: &str, content: &str) -> Result<()> {
|
||||
let is_comp = self.is_composite(path)?;
|
||||
let target_path = if is_comp {
|
||||
format!("{}/index.md", path)
|
||||
} else {
|
||||
path.to_string()
|
||||
};
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.write(&target_path, content.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()> {
|
||||
let is_leaf = self.is_leaf(parent_path)?;
|
||||
let is_composite = self.is_composite(parent_path)?;
|
||||
|
||||
if !is_leaf && !is_composite && parent_path != "/" {
|
||||
return Err(anyhow!("Parent path '{}' does not exist", parent_path));
|
||||
}
|
||||
|
||||
if is_leaf {
|
||||
// Promote: Leaf -> Composite
|
||||
let old_content = self.read_content(parent_path)?;
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.remove(parent_path)?;
|
||||
|
||||
let index_path = format!("{}/index.md", parent_path);
|
||||
tx.write(&index_path, old_content.as_bytes())?;
|
||||
|
||||
let child_path = format!("{}/{}", parent_path, name);
|
||||
tx.write(&child_path, content.as_bytes())?;
|
||||
|
||||
} else {
|
||||
let child_path = if parent_path == "/" {
|
||||
name.to_string()
|
||||
} else {
|
||||
format!("{}/{}", parent_path, name)
|
||||
};
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
tx.write(&child_path, content.as_bytes())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn demote(&mut self, path: &str) -> Result<()> {
|
||||
if !self.is_composite(path)? {
|
||||
return Err(anyhow!("Path '{}' is not a composite node (directory)", path));
|
||||
}
|
||||
if path == "/" {
|
||||
return Err(anyhow!("Cannot demote root"));
|
||||
}
|
||||
|
||||
// 1. Read index.md (Main content)
|
||||
let mut main_content = String::new();
|
||||
if let Ok(content) = self.read_content(path) {
|
||||
main_content = content;
|
||||
}
|
||||
|
||||
// Reading directory entries
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path)?;
|
||||
|
||||
// Sort entries to have deterministic order
|
||||
let mut sorted_entries = entries;
|
||||
sorted_entries.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
let mut combined_content = main_content;
|
||||
|
||||
// Iterate for content reading (Borrowing self immutably)
|
||||
for entry in &sorted_entries {
|
||||
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let child_rel_path = format!("{}/{}", path, entry.name);
|
||||
let child_content = self.read_content(&child_rel_path)?;
|
||||
combined_content.push_str(&format!("\n\n# {}\n\n", entry.name));
|
||||
combined_content.push_str(&child_content);
|
||||
}
|
||||
|
||||
// Get list of items to remove before starting transaction (to avoid double borrow)
|
||||
// We need a recursive list of paths to remove from git index.
|
||||
let paths_to_remove = self.collect_recursive_paths(path)?;
|
||||
|
||||
let tx = self.ensure_transaction()?;
|
||||
|
||||
// 3. Remove everything recursively
|
||||
for p in paths_to_remove {
|
||||
tx.remove(&p)?;
|
||||
}
|
||||
// Also remove the directory path itself (conceptually, or handled by git index cleanup)
|
||||
// In our simplified VGCS, remove(dir) is not enough if not empty.
|
||||
// But we just cleaned up recursively.
|
||||
|
||||
// 4. Write new file
|
||||
tx.write(path, combined_content.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn save(&mut self, message: &str) -> Result<String> {
|
||||
if let Some(tx) = self.transaction.take() {
|
||||
let new_oid = tx.commit(message, "DocOS User")?;
|
||||
self.commit_hash = new_oid.clone();
|
||||
Ok(new_oid)
|
||||
} else {
|
||||
Ok(self.commit_hash.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: ContextStore> DocOS<S> {
|
||||
// Helper: Collect paths recursively (reading from store, immutable self)
|
||||
fn collect_recursive_paths(&self, path: &str) -> Result<Vec<String>> {
|
||||
let mut paths = Vec::new();
|
||||
|
||||
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path);
|
||||
if let Ok(entries) = entries {
|
||||
for entry in entries {
|
||||
let child_path = format!("{}/{}", path, entry.name);
|
||||
match entry.kind {
|
||||
EntryKind::File => {
|
||||
paths.push(child_path);
|
||||
},
|
||||
EntryKind::Dir => {
|
||||
// Add children of dir first
|
||||
let mut sub_paths = self.collect_recursive_paths(&child_path)?;
|
||||
paths.append(&mut sub_paths);
|
||||
// No need to remove dir itself in git, but we might track it?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(paths)
|
||||
}
|
||||
}
|
||||
11
crates/workflow-context/src/lib.rs
Normal file
@ -0,0 +1,11 @@
|
||||
pub mod types;
|
||||
pub mod traits;
|
||||
pub mod vgcs;
|
||||
pub mod docos;
|
||||
pub mod worker_runtime;
|
||||
|
||||
pub use types::*;
|
||||
pub use traits::*;
|
||||
pub use vgcs::Vgcs;
|
||||
pub use docos::{DocOS, DocManager};
|
||||
pub use worker_runtime::{WorkerContext, ContextShell, OutputFormat, FindOptions, NodeMetadata, GrepMatch, FileStats};
|
||||
39
crates/workflow-context/src/traits.rs
Normal file
@ -0,0 +1,39 @@
|
||||
use anyhow::Result;
|
||||
use std::io::Read;
|
||||
use crate::types::{DirEntry, FileChange};
|
||||
|
||||
pub trait ContextStore {
|
||||
/// Initialize a new repository for the request
|
||||
fn init_repo(&self, req_id: &str) -> Result<()>;
|
||||
|
||||
/// Read file content. Transparently handles BlobRef redirection.
|
||||
fn read_file(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Box<dyn Read + Send>>;
|
||||
|
||||
/// List directory contents
|
||||
fn list_dir(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Vec<DirEntry>>;
|
||||
|
||||
/// Get changes between two commits
|
||||
fn diff(&self, req_id: &str, from_commit: &str, to_commit: &str) -> Result<Vec<FileChange>>;
|
||||
|
||||
/// Three-way merge (In-Memory), returns new Tree OID
|
||||
fn merge_trees(&self, req_id: &str, base: &str, ours: &str, theirs: &str) -> Result<String>;
|
||||
|
||||
/// Smart merge two commits, automatically finding the best common ancestor.
|
||||
/// Returns the OID of the new merge commit.
|
||||
fn merge_commits(&self, req_id: &str, our_commit: &str, their_commit: &str) -> Result<String>;
|
||||
|
||||
/// Start a write transaction
|
||||
fn begin_transaction(&self, req_id: &str, base_commit: &str) -> Result<Box<dyn Transaction>>;
|
||||
}
|
||||
|
||||
pub trait Transaction {
|
||||
/// Write file content
|
||||
fn write(&mut self, path: &str, content: &[u8]) -> Result<()>;
|
||||
|
||||
/// Remove file
|
||||
fn remove(&mut self, path: &str) -> Result<()>;
|
||||
|
||||
/// Commit changes
|
||||
fn commit(self: Box<Self>, message: &str, author: &str) -> Result<String>;
|
||||
}
|
||||
|
||||
50
crates/workflow-context/src/types.rs
Normal file
@ -0,0 +1,50 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum EntryKind {
|
||||
File,
|
||||
Dir,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DirEntry {
|
||||
pub name: String,
|
||||
pub kind: EntryKind,
|
||||
pub object_id: String,
|
||||
// New metadata fields
|
||||
pub size: Option<u64>,
|
||||
pub line_count: Option<usize>,
|
||||
pub word_count: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum FileChange {
|
||||
Added(String),
|
||||
Modified(String),
|
||||
Deleted(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct BlobRef {
|
||||
#[serde(rename = "$vgcs_ref")]
|
||||
pub vgcs_ref: String, // "v1"
|
||||
pub sha256: String,
|
||||
pub size: u64,
|
||||
pub mime_type: String,
|
||||
pub original_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub enum DocNodeKind {
|
||||
Leaf, // Pure content node (file)
|
||||
Composite, // Composite node (dir with index.md)
|
||||
Section, // Virtual node (Markdown Header inside a file)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct DocNode {
|
||||
pub name: String,
|
||||
pub path: String, // Logical path e.g., "Analysis/Revenue"
|
||||
pub kind: DocNodeKind,
|
||||
pub children: Vec<DocNode>, // Only for Composite or Section-bearing Leaf
|
||||
}
|
||||
361
crates/workflow-context/src/vgcs.rs
Normal file
@ -0,0 +1,361 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::fs::{self, File};
|
||||
use std::io::{Cursor, Read, Write};
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use git2::{Repository, Oid, ObjectType, Signature, Index, IndexEntry, IndexTime};
|
||||
use sha2::{Sha256, Digest};
|
||||
|
||||
use crate::traits::{ContextStore, Transaction};
|
||||
use crate::types::{DirEntry, EntryKind, FileChange, BlobRef};
|
||||
|
||||
pub struct Vgcs {
|
||||
root_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Vgcs {
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Self {
|
||||
Self {
|
||||
root_path: path.as_ref().to_path_buf(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_repo_path(&self, req_id: &str) -> PathBuf {
|
||||
self.root_path.join("repos").join(format!("{}.git", req_id))
|
||||
}
|
||||
|
||||
fn get_blob_store_root(&self, req_id: &str) -> PathBuf {
|
||||
self.root_path.join("blobs").join(req_id)
|
||||
}
|
||||
|
||||
fn get_blob_path(&self, req_id: &str, sha256: &str) -> PathBuf {
|
||||
self.get_blob_store_root(req_id)
|
||||
.join(&sha256[0..2])
|
||||
.join(sha256)
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextStore for Vgcs {
|
||||
fn init_repo(&self, req_id: &str) -> Result<()> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
if !repo_path.exists() {
|
||||
fs::create_dir_all(&repo_path).context("Failed to create repo dir")?;
|
||||
Repository::init_bare(&repo_path).context("Failed to init bare repo")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_file(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Box<dyn Read + Send>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let oid = Oid::from_str(commit_hash).context("Invalid commit hash")?;
|
||||
let commit = repo.find_commit(oid).context("Commit not found")?;
|
||||
let tree = commit.tree().context("Tree not found")?;
|
||||
|
||||
let entry = tree.get_path(Path::new(path)).context("File not found in tree")?;
|
||||
let object = entry.to_object(&repo).context("Object not found")?;
|
||||
|
||||
if let Some(blob) = object.as_blob() {
|
||||
let content = blob.content();
|
||||
// Try parsing as BlobRef
|
||||
if let Ok(blob_ref) = serde_json::from_slice::<BlobRef>(content) {
|
||||
if blob_ref.vgcs_ref == "v1" {
|
||||
let blob_path = self.get_blob_path(req_id, &blob_ref.sha256);
|
||||
let file = File::open(blob_path).context("Failed to open blob file from store")?;
|
||||
return Ok(Box::new(file));
|
||||
}
|
||||
}
|
||||
// Return raw content
|
||||
return Ok(Box::new(Cursor::new(content.to_vec())));
|
||||
}
|
||||
|
||||
Err(anyhow!("Path is not a file"))
|
||||
}
|
||||
|
||||
fn list_dir(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Vec<DirEntry>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let oid = Oid::from_str(commit_hash).context("Invalid commit hash")?;
|
||||
let commit = repo.find_commit(oid).context("Commit not found")?;
|
||||
let root_tree = commit.tree().context("Tree not found")?;
|
||||
|
||||
let tree = if path.is_empty() || path == "/" || path == "." {
|
||||
root_tree
|
||||
} else {
|
||||
let entry = root_tree.get_path(Path::new(path)).context("Path not found")?;
|
||||
let object = entry.to_object(&repo).context("Object not found")?;
|
||||
object.into_tree().map_err(|_| anyhow!("Path is not a directory"))?
|
||||
};
|
||||
|
||||
let mut entries = Vec::new();
|
||||
for entry in tree.iter() {
|
||||
let name = entry.name().unwrap_or("").to_string();
|
||||
let kind = match entry.kind() {
|
||||
Some(ObjectType::Tree) => EntryKind::Dir,
|
||||
_ => EntryKind::File,
|
||||
};
|
||||
let object_id = entry.id().to_string();
|
||||
|
||||
// Metadata extraction (Expensive but necessary for the prompt)
|
||||
let mut size = None;
|
||||
let mut line_count = None;
|
||||
let mut word_count = None;
|
||||
|
||||
if kind == EntryKind::File {
|
||||
if let Ok(object) = entry.to_object(&repo) {
|
||||
if let Some(blob) = object.as_blob() {
|
||||
let content = blob.content();
|
||||
size = Some(content.len() as u64);
|
||||
|
||||
// Check for binary content or just use heuristic
|
||||
if !content.contains(&0) {
|
||||
let s = String::from_utf8_lossy(content);
|
||||
line_count = Some(s.lines().count());
|
||||
word_count = Some(s.split_whitespace().count());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entries.push(DirEntry { name, kind, object_id, size, line_count, word_count });
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
fn diff(&self, req_id: &str, from_commit: &str, to_commit: &str) -> Result<Vec<FileChange>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let from_oid = Oid::from_str(from_commit).context("Invalid from_commit")?;
|
||||
let to_oid = Oid::from_str(to_commit).context("Invalid to_commit")?;
|
||||
|
||||
let from_tree = repo.find_commit(from_oid)?.tree()?;
|
||||
let to_tree = repo.find_commit(to_oid)?.tree()?;
|
||||
|
||||
let diff = repo.diff_tree_to_tree(Some(&from_tree), Some(&to_tree), None)?;
|
||||
|
||||
let mut changes = Vec::new();
|
||||
diff.foreach(&mut |delta, _| {
|
||||
let path = delta.new_file().path().or(delta.old_file().path()).unwrap();
|
||||
let path_str = path.to_string_lossy().to_string();
|
||||
|
||||
match delta.status() {
|
||||
git2::Delta::Added => changes.push(FileChange::Added(path_str)),
|
||||
git2::Delta::Deleted => changes.push(FileChange::Deleted(path_str)),
|
||||
git2::Delta::Modified => changes.push(FileChange::Modified(path_str)),
|
||||
_ => {}
|
||||
}
|
||||
true
|
||||
}, None, None, None)?;
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn merge_trees(&self, req_id: &str, base: &str, ours: &str, theirs: &str) -> Result<String> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let base_tree = repo.find_commit(Oid::from_str(base)?)?.tree()?;
|
||||
let our_tree = repo.find_commit(Oid::from_str(ours)?)?.tree()?;
|
||||
let their_tree = repo.find_commit(Oid::from_str(theirs)?)?.tree()?;
|
||||
|
||||
let mut index = repo.merge_trees(&base_tree, &our_tree, &their_tree, None)?;
|
||||
|
||||
if index.has_conflicts() {
|
||||
return Err(anyhow!("Merge conflict detected"));
|
||||
}
|
||||
|
||||
let oid = index.write_tree_to(&repo)?;
|
||||
Ok(oid.to_string())
|
||||
}
|
||||
|
||||
fn merge_commits(&self, req_id: &str, our_commit: &str, their_commit: &str) -> Result<String> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let our_oid = Oid::from_str(our_commit).context("Invalid our_commit")?;
|
||||
let their_oid = Oid::from_str(their_commit).context("Invalid their_commit")?;
|
||||
|
||||
let base_oid = repo.merge_base(our_oid, their_oid).context("Failed to find merge base")?;
|
||||
|
||||
let base_commit = repo.find_commit(base_oid)?;
|
||||
let our_commit_obj = repo.find_commit(our_oid)?;
|
||||
let their_commit_obj = repo.find_commit(their_oid)?;
|
||||
|
||||
// If base equals one of the commits, it's a fast-forward
|
||||
if base_oid == our_oid {
|
||||
return Ok(their_commit.to_string());
|
||||
}
|
||||
if base_oid == their_oid {
|
||||
return Ok(our_commit.to_string());
|
||||
}
|
||||
|
||||
let base_tree = base_commit.tree()?;
|
||||
let our_tree = our_commit_obj.tree()?;
|
||||
let their_tree = their_commit_obj.tree()?;
|
||||
|
||||
let mut index = repo.merge_trees(&base_tree, &our_tree, &their_tree, None)?;
|
||||
|
||||
if index.has_conflicts() {
|
||||
return Err(anyhow!("Merge conflict detected between {} and {}", our_commit, their_commit));
|
||||
}
|
||||
|
||||
let tree_oid = index.write_tree_to(&repo)?;
|
||||
let tree = repo.find_tree(tree_oid)?;
|
||||
|
||||
let sig = Signature::now("vgcs-merge", "system")?;
|
||||
|
||||
let merge_commit_oid = repo.commit(
|
||||
None, // Detached
|
||||
&sig,
|
||||
&sig,
|
||||
&format!("Merge commit {} into {}", their_commit, our_commit),
|
||||
&tree,
|
||||
&[&our_commit_obj, &their_commit_obj],
|
||||
)?;
|
||||
|
||||
Ok(merge_commit_oid.to_string())
|
||||
}
|
||||
|
||||
fn begin_transaction(&self, req_id: &str, base_commit: &str) -> Result<Box<dyn Transaction>> {
|
||||
let repo_path = self.get_repo_path(req_id);
|
||||
let repo = Repository::open(&repo_path).context("Failed to open repo")?;
|
||||
|
||||
let mut index = Index::new()?;
|
||||
let mut base_commit_oid = None;
|
||||
|
||||
if !base_commit.is_empty() {
|
||||
let base_oid = Oid::from_str(base_commit).context("Invalid base_commit")?;
|
||||
if !base_oid.is_zero() {
|
||||
// Scope the borrow of repo
|
||||
{
|
||||
let commit = repo.find_commit(base_oid).context("Base commit not found")?;
|
||||
let tree = commit.tree()?;
|
||||
index.read_tree(&tree)?;
|
||||
}
|
||||
base_commit_oid = Some(base_oid);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Box::new(VgcsTransaction {
|
||||
repo,
|
||||
req_id: req_id.to_string(),
|
||||
root_path: self.root_path.clone(),
|
||||
base_commit: base_commit_oid,
|
||||
index,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VgcsTransaction {
|
||||
repo: Repository,
|
||||
req_id: String,
|
||||
root_path: PathBuf,
|
||||
base_commit: Option<Oid>,
|
||||
index: Index,
|
||||
}
|
||||
|
||||
impl Transaction for VgcsTransaction {
|
||||
fn write(&mut self, path: &str, content: &[u8]) -> Result<()> {
|
||||
let final_content = if content.len() > 1024 * 1024 { // 1MB
|
||||
// Calculate SHA256
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(content);
|
||||
let result = hasher.finalize();
|
||||
let sha256 = hex::encode(result);
|
||||
|
||||
// Write to Blob Store
|
||||
let blob_path = self.root_path
|
||||
.join("blobs")
|
||||
.join(&self.req_id)
|
||||
.join(&sha256[0..2])
|
||||
.join(&sha256);
|
||||
|
||||
if !blob_path.exists() {
|
||||
if let Some(parent) = blob_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
let mut file = File::create(&blob_path)?;
|
||||
file.write_all(content)?;
|
||||
}
|
||||
|
||||
// Create BlobRef JSON
|
||||
let blob_ref = BlobRef {
|
||||
vgcs_ref: "v1".to_string(),
|
||||
sha256: sha256,
|
||||
size: content.len() as u64,
|
||||
mime_type: "application/octet-stream".to_string(), // Simplified
|
||||
original_name: Path::new(path).file_name().unwrap_or_default().to_string_lossy().to_string(),
|
||||
};
|
||||
|
||||
serde_json::to_vec(&blob_ref)?
|
||||
} else {
|
||||
content.to_vec()
|
||||
};
|
||||
|
||||
// Write to ODB manually
|
||||
let oid = self.repo.blob(&final_content)?;
|
||||
|
||||
let mut entry = create_index_entry(path, 0o100644);
|
||||
entry.id = oid;
|
||||
entry.file_size = final_content.len() as u32;
|
||||
|
||||
self.index.add(&entry).context("Failed to add entry to index")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove(&mut self, path: &str) -> Result<()> {
|
||||
self.index.remove_path(Path::new(path))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn commit(mut self: Box<Self>, message: &str, author: &str) -> Result<String> {
|
||||
let tree_oid = self.index.write_tree_to(&self.repo)?;
|
||||
let tree = self.repo.find_tree(tree_oid)?;
|
||||
|
||||
let sig = Signature::now(author, "vgcs@system")?;
|
||||
|
||||
let commit_oid = if let Some(base_oid) = self.base_commit {
|
||||
let parent_commit = self.repo.find_commit(base_oid)?;
|
||||
self.repo.commit(
|
||||
None, // Detached commit
|
||||
&sig,
|
||||
&sig,
|
||||
message,
|
||||
&tree,
|
||||
&[&parent_commit],
|
||||
)?
|
||||
} else {
|
||||
self.repo.commit(
|
||||
None, // Detached commit
|
||||
&sig,
|
||||
&sig,
|
||||
message,
|
||||
&tree,
|
||||
&[],
|
||||
)?
|
||||
};
|
||||
|
||||
Ok(commit_oid.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn create_index_entry(path: &str, mode: u32) -> IndexEntry {
|
||||
IndexEntry {
|
||||
ctime: IndexTime::new(0, 0),
|
||||
mtime: IndexTime::new(0, 0),
|
||||
dev: 0,
|
||||
ino: 0,
|
||||
mode,
|
||||
uid: 0,
|
||||
gid: 0,
|
||||
file_size: 0,
|
||||
id: Oid::zero(),
|
||||
flags: 0,
|
||||
flags_extended: 0,
|
||||
path: path.as_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
378
crates/workflow-context/src/worker_runtime.rs
Normal file
@ -0,0 +1,378 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::env;
|
||||
use anyhow::{Result, Context, anyhow};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::de::DeserializeOwned;
|
||||
use globset::Glob;
|
||||
use regex::Regex;
|
||||
use crate::{DocOS, DocManager, Vgcs, DocNodeKind};
|
||||
|
||||
// --- Data Structures ---
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum OutputFormat {
|
||||
Text,
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct NodeMetadata {
|
||||
pub path: String,
|
||||
pub kind: String, // "File" or "Dir"
|
||||
pub size: u64,
|
||||
// pub modified: bool, // TODO: Implement diff check against base
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct FindOptions {
|
||||
pub recursive: bool,
|
||||
pub max_depth: Option<usize>,
|
||||
pub type_filter: Option<String>, // "File" or "Dir"
|
||||
pub min_size: Option<u64>,
|
||||
pub max_size: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GrepMatch {
|
||||
pub path: String,
|
||||
pub line_number: usize,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FileStats {
|
||||
pub path: String,
|
||||
pub lines: usize,
|
||||
pub bytes: usize,
|
||||
}
|
||||
|
||||
// --- Trait Definition ---
|
||||
|
||||
pub trait ContextShell {
|
||||
fn tree(&self, path: &str, depth: Option<usize>, format: OutputFormat) -> Result<String>;
|
||||
fn find(&self, name_pattern: &str, options: FindOptions) -> Result<Vec<NodeMetadata>>;
|
||||
fn grep(&self, pattern: &str, paths: Option<Vec<String>>) -> Result<Vec<GrepMatch>>;
|
||||
fn cat(&self, paths: &[String]) -> Result<String>;
|
||||
fn wc(&self, paths: &[String]) -> Result<Vec<FileStats>>;
|
||||
fn patch(&mut self, path: &str, original: &str, replacement: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
// --- WorkerContext Implementation ---
|
||||
|
||||
pub struct WorkerContext {
|
||||
doc: DocOS<Vgcs>,
|
||||
}
|
||||
|
||||
impl WorkerContext {
|
||||
pub fn from_env() -> Result<Self> {
|
||||
let req_id = env::var("WORKFLOW_REQ_ID").context("Missing WORKFLOW_REQ_ID")?;
|
||||
let commit = env::var("WORKFLOW_BASE_COMMIT").context("Missing WORKFLOW_BASE_COMMIT")?;
|
||||
let data_path = env::var("WORKFLOW_DATA_PATH").context("Missing WORKFLOW_DATA_PATH")?;
|
||||
|
||||
let vgcs = Vgcs::new(&data_path);
|
||||
let doc = DocOS::new(Arc::new(vgcs), &req_id, &commit);
|
||||
|
||||
Ok(Self { doc })
|
||||
}
|
||||
|
||||
pub fn new(data_path: &str, req_id: &str, commit: &str) -> Self {
|
||||
let vgcs = Vgcs::new(data_path);
|
||||
let doc = DocOS::new(Arc::new(vgcs), req_id, commit);
|
||||
Self { doc }
|
||||
}
|
||||
|
||||
pub fn read_json<T: DeserializeOwned>(&self, path: impl AsRef<Path>) -> Result<T> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
let content = self.doc.read_content(&path_str)?;
|
||||
let data = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse JSON from {}", path_str))?;
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
pub fn read_text(&self, path: impl AsRef<Path>) -> Result<String> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
self.doc.read_content(&path_str)
|
||||
}
|
||||
|
||||
pub fn write_file(&mut self, path: impl AsRef<Path>, content: &str) -> Result<()> {
|
||||
let path_str = path.as_ref().to_string_lossy();
|
||||
self.doc.write_content(&path_str, content)
|
||||
}
|
||||
|
||||
pub fn attach_subsection(&mut self, parent: impl AsRef<Path>, name: &str, content: &str) -> Result<()> {
|
||||
let parent_str = parent.as_ref().to_string_lossy();
|
||||
self.doc.insert_subsection(&parent_str, name, content)
|
||||
}
|
||||
|
||||
pub fn commit(&mut self, message: &str) -> Result<String> {
|
||||
self.doc.save(message)
|
||||
}
|
||||
|
||||
pub fn get_tool_definitions() -> serde_json::Value {
|
||||
serde_json::json!([
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tree",
|
||||
"description": "List directory structure to understand the file layout.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string", "description": "Root path to list (default: root)" },
|
||||
"depth": { "type": "integer", "description": "Recursion depth" },
|
||||
"format": { "type": "string", "enum": ["Text", "Json"], "default": "Text" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "find",
|
||||
"description": "Find files by name pattern (glob). Fast metadata search.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["pattern"],
|
||||
"properties": {
|
||||
"pattern": { "type": "string", "description": "Glob pattern (e.g. **/*.rs)" },
|
||||
"recursive": { "type": "boolean", "default": true }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "grep",
|
||||
"description": "Search for content within files using regex.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["pattern"],
|
||||
"properties": {
|
||||
"pattern": { "type": "string", "description": "Regex pattern" },
|
||||
"paths": { "type": "array", "items": { "type": "string" }, "description": "Limit search to these paths" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "cat",
|
||||
"description": "Read and assemble content of multiple files.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["paths"],
|
||||
"properties": {
|
||||
"paths": { "type": "array", "items": { "type": "string" } }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "patch",
|
||||
"description": "Replace a specific text block in a file. Use this for small corrections.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["path", "original", "replacement"],
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"original": { "type": "string", "description": "Exact text to look for. Must be unique in file." },
|
||||
"replacement": { "type": "string", "description": "New text to insert." }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextShell for WorkerContext {
|
||||
fn tree(&self, path: &str, depth: Option<usize>, format: OutputFormat) -> Result<String> {
|
||||
let root_node = self.doc.get_outline()?;
|
||||
|
||||
let target_node = if path == "/" || path == "." {
|
||||
Some(&root_node)
|
||||
} else {
|
||||
fn find_node<'a>(node: &'a crate::DocNode, path: &str) -> Option<&'a crate::DocNode> {
|
||||
if node.path == path {
|
||||
return Some(node);
|
||||
}
|
||||
for child in &node.children {
|
||||
if let Some(found) = find_node(child, path) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
find_node(&root_node, path)
|
||||
};
|
||||
|
||||
let node = target_node.ok_or_else(|| anyhow!("Path not found: {}", path))?;
|
||||
|
||||
match format {
|
||||
OutputFormat::Json => {
|
||||
Ok(serde_json::to_string_pretty(node)?)
|
||||
},
|
||||
OutputFormat::Text => {
|
||||
let mut output = String::new();
|
||||
fn print_tree(node: &crate::DocNode, prefix: &str, is_last: bool, depth: usize, max_depth: Option<usize>, output: &mut String) {
|
||||
if let Some(max) = max_depth {
|
||||
if depth > max { return; }
|
||||
}
|
||||
|
||||
let name = if node.path == "/" { "." } else { &node.name };
|
||||
|
||||
if depth > 0 {
|
||||
let connector = if is_last { "└── " } else { "├── " };
|
||||
output.push_str(&format!("{}{}{}\n", prefix, connector, name));
|
||||
} else {
|
||||
output.push_str(&format!("{}\n", name));
|
||||
}
|
||||
|
||||
let child_prefix = if depth > 0 {
|
||||
if is_last { format!("{} ", prefix) } else { format!("{}│ ", prefix) }
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
for (i, child) in node.children.iter().enumerate() {
|
||||
print_tree(child, &child_prefix, i == node.children.len() - 1, depth + 1, max_depth, output);
|
||||
}
|
||||
}
|
||||
print_tree(node, "", true, 0, depth, &mut output);
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find(&self, name_pattern: &str, options: FindOptions) -> Result<Vec<NodeMetadata>> {
|
||||
let root = self.doc.get_outline()?;
|
||||
let mut results = Vec::new();
|
||||
|
||||
let glob = Glob::new(name_pattern)?.compile_matcher();
|
||||
|
||||
fn traverse(node: &crate::DocNode, glob: &globset::GlobMatcher, opts: &FindOptions, depth: usize, results: &mut Vec<NodeMetadata>) {
|
||||
if let Some(max) = opts.max_depth {
|
||||
if depth > max { return; }
|
||||
}
|
||||
|
||||
let match_name = glob.is_match(&node.name) || glob.is_match(&node.path);
|
||||
|
||||
let kind_str = match node.kind {
|
||||
DocNodeKind::Composite => "Dir",
|
||||
DocNodeKind::Leaf => "File",
|
||||
DocNodeKind::Section => "Section",
|
||||
};
|
||||
|
||||
let type_match = match &opts.type_filter {
|
||||
Some(t) => t.eq_ignore_ascii_case(kind_str),
|
||||
None => true,
|
||||
};
|
||||
|
||||
if depth > 0 && match_name && type_match {
|
||||
results.push(NodeMetadata {
|
||||
path: node.path.clone(),
|
||||
kind: kind_str.to_string(),
|
||||
size: 0,
|
||||
});
|
||||
}
|
||||
|
||||
if opts.recursive || depth == 0 {
|
||||
for child in &node.children {
|
||||
traverse(child, glob, opts, depth + 1, results);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
traverse(&root, &glob, &options, 0, &mut results);
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn grep(&self, pattern: &str, paths: Option<Vec<String>>) -> Result<Vec<GrepMatch>> {
|
||||
let re = Regex::new(pattern).context("Invalid regex pattern")?;
|
||||
|
||||
let target_paths = match paths {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
let all_nodes = self.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("File".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
all_nodes.into_iter().map(|n| n.path).collect()
|
||||
}
|
||||
};
|
||||
|
||||
let mut matches = Vec::new();
|
||||
|
||||
for path in target_paths {
|
||||
if let Ok(content) = self.read_text(&path) {
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
if re.is_match(line) {
|
||||
matches.push(GrepMatch {
|
||||
path: path.clone(),
|
||||
line_number: i + 1,
|
||||
content: line.trim().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
fn cat(&self, paths: &[String]) -> Result<String> {
|
||||
let mut output = String::new();
|
||||
for path in paths {
|
||||
match self.read_text(path) {
|
||||
Ok(content) => {
|
||||
output.push_str(&format!("<file path=\"{}\">\n", path));
|
||||
output.push_str(&content);
|
||||
if !content.ends_with('\n') {
|
||||
output.push('\n');
|
||||
}
|
||||
output.push_str("</file>\n\n");
|
||||
},
|
||||
Err(e) => {
|
||||
output.push_str(&format!("<!-- Failed to read {}: {} -->\n", path, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn wc(&self, paths: &[String]) -> Result<Vec<FileStats>> {
|
||||
let mut stats = Vec::new();
|
||||
for path in paths {
|
||||
if let Ok(content) = self.read_text(path) {
|
||||
stats.push(FileStats {
|
||||
path: path.clone(),
|
||||
lines: content.lines().count(),
|
||||
bytes: content.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
fn patch(&mut self, path: &str, original: &str, replacement: &str) -> Result<()> {
|
||||
let content = self.read_text(path)?;
|
||||
|
||||
let matches: Vec<_> = content.match_indices(original).collect();
|
||||
|
||||
match matches.len() {
|
||||
0 => return Err(anyhow!("Original text not found in {}", path)),
|
||||
1 => {
|
||||
let new_content = content.replace(original, replacement);
|
||||
self.write_file(path, &new_content)?;
|
||||
Ok(())
|
||||
},
|
||||
_ => return Err(anyhow!("Ambiguous match: original text found {} times", matches.len())),
|
||||
}
|
||||
}
|
||||
}
|
||||
141
crates/workflow-context/tests/docos_tests.rs
Normal file
@ -0,0 +1,141 @@
|
||||
use workflow_context::{ContextStore, Vgcs, DocOS, DocManager, DocNodeKind};
|
||||
use tempfile::TempDir;
|
||||
use std::sync::Arc;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
#[test]
|
||||
fn test_docos_basic() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-1";
|
||||
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// 1. Init DocOS with empty repo
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 2. Create a file (Leaf)
|
||||
docos.write_content("Introduction", "Intro Content")?;
|
||||
let _commit_1 = docos.save("Add Intro")?;
|
||||
|
||||
// 3. Verify outline
|
||||
let outline = docos.get_outline()?;
|
||||
// Root -> [Introduction (Leaf)]
|
||||
assert_eq!(outline.children.len(), 1);
|
||||
let intro_node = &outline.children[0];
|
||||
assert_eq!(intro_node.name, "Introduction");
|
||||
assert_eq!(intro_node.kind, DocNodeKind::Leaf);
|
||||
|
||||
// 4. Read content
|
||||
let content = docos.read_content("Introduction")?;
|
||||
assert_eq!(content, "Intro Content");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docos_fission() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-2";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 1. Start with a Leaf: "Analysis"
|
||||
docos.write_content("Analysis", "General Analysis")?;
|
||||
let commit_1 = docos.save("Init Analysis")?;
|
||||
|
||||
// 2. Insert subsection "Revenue" into "Analysis"
|
||||
// This should promote "Analysis" to Composite
|
||||
docos.reload(&commit_1)?;
|
||||
docos.insert_subsection("Analysis", "Revenue", "Revenue Data")?;
|
||||
let commit_2 = docos.save("Split Analysis")?;
|
||||
|
||||
// 3. Verify Structure
|
||||
docos.reload(&commit_2)?;
|
||||
let outline = docos.get_outline()?;
|
||||
|
||||
// Root -> [Analysis (Composite)]
|
||||
assert_eq!(outline.children.len(), 1);
|
||||
let analysis_node = &outline.children[0];
|
||||
assert_eq!(analysis_node.name, "Analysis");
|
||||
assert_eq!(analysis_node.kind, DocNodeKind::Composite);
|
||||
|
||||
// Analysis -> [Revenue (Leaf)] (index.md is hidden in outline)
|
||||
assert_eq!(analysis_node.children.len(), 1);
|
||||
let revenue_node = &analysis_node.children[0];
|
||||
assert_eq!(revenue_node.name, "Revenue");
|
||||
assert_eq!(revenue_node.kind, DocNodeKind::Leaf);
|
||||
|
||||
// 4. Verify Content
|
||||
// Reading "Analysis" should now read "Analysis/index.md" which contains "General Analysis"
|
||||
let analysis_content = docos.read_content("Analysis")?;
|
||||
assert_eq!(analysis_content, "General Analysis");
|
||||
|
||||
let revenue_content = docos.read_content("Analysis/Revenue")?;
|
||||
assert_eq!(revenue_content, "Revenue Data");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_docos_fusion_and_outline() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Arc::new(Vgcs::new(temp_dir.path()));
|
||||
let req_id = "req-docos-3";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
|
||||
|
||||
// 1. Create a composite structure (Pre-fissioned state)
|
||||
// Root -> [Chapter1 (Composite)] -> [SectionA (Leaf), SectionB (Leaf)]
|
||||
docos.write_content("Chapter1/index.md", "Chapter 1 Intro")?;
|
||||
docos.write_content("Chapter1/SectionA", "Content A")?;
|
||||
docos.write_content("Chapter1/SectionB", "Content B")?;
|
||||
let commit_1 = docos.save("Setup Structure")?;
|
||||
|
||||
docos.reload(&commit_1)?;
|
||||
|
||||
// Verify Initial Outline
|
||||
let outline_1 = docos.get_outline()?;
|
||||
let ch1 = &outline_1.children[0];
|
||||
assert_eq!(ch1.kind, DocNodeKind::Composite);
|
||||
assert_eq!(ch1.children.len(), 2); // SectionA, SectionB
|
||||
|
||||
// 2. Demote (Fusion)
|
||||
docos.demote("Chapter1")?;
|
||||
let commit_2 = docos.save("Demote Chapter 1")?;
|
||||
|
||||
// 3. Verify Fusion Result
|
||||
docos.reload(&commit_2)?;
|
||||
let outline_2 = docos.get_outline()?;
|
||||
|
||||
// Now Chapter1 should be a Leaf
|
||||
let ch1_fused = &outline_2.children[0];
|
||||
assert_eq!(ch1_fused.name, "Chapter1");
|
||||
assert_eq!(ch1_fused.kind, DocNodeKind::Leaf);
|
||||
|
||||
// But wait! Because of our Outline Enhancement (Markdown Headers),
|
||||
// we expect the Fused file to have children (Sections) derived from headers!
|
||||
// The demote logic appends children with "# Name".
|
||||
// So "SectionA" became "# SectionA".
|
||||
|
||||
// Let's inspect the children of the Fused node
|
||||
// We expect 2 children: "SectionA" and "SectionB" (as Sections)
|
||||
assert_eq!(ch1_fused.children.len(), 2);
|
||||
assert_eq!(ch1_fused.children[0].name, "SectionA");
|
||||
assert_eq!(ch1_fused.children[0].kind, DocNodeKind::Section);
|
||||
assert_eq!(ch1_fused.children[1].name, "SectionB");
|
||||
|
||||
// 4. Verify Content of Fused File
|
||||
let content = docos.read_content("Chapter1")?;
|
||||
// Should contain Intro + # SectionA ... + # SectionB ...
|
||||
assert!(content.contains("Chapter 1 Intro"));
|
||||
assert!(content.contains("# SectionA"));
|
||||
assert!(content.contains("Content A"));
|
||||
assert!(content.contains("# SectionB"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
171
crates/workflow-context/tests/vgcs_tests.rs
Normal file
@ -0,0 +1,171 @@
|
||||
use workflow_context::{ContextStore, Vgcs};
|
||||
use std::io::Read;
|
||||
use tempfile::TempDir;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
#[test]
|
||||
fn test_basic_workflow() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-001";
|
||||
|
||||
// 1. Init
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// 2. Write Transaction (Initial Commit)
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("test.txt", b"Hello World")?;
|
||||
let commit_hash_1 = tx.commit("Initial commit", "Test User")?;
|
||||
|
||||
// 3. Read
|
||||
let mut reader = store.read_file(req_id, &commit_hash_1, "test.txt")?;
|
||||
let mut content = String::new();
|
||||
reader.read_to_string(&mut content)?;
|
||||
assert_eq!(content, "Hello World");
|
||||
|
||||
// 4. Modify file
|
||||
let mut tx = store.begin_transaction(req_id, &commit_hash_1)?;
|
||||
tx.write("test.txt", b"Hello World Modified")?;
|
||||
tx.write("new.txt", b"New File")?;
|
||||
let commit_hash_2 = tx.commit("Second commit", "Test User")?;
|
||||
|
||||
// 5. Verify Diff
|
||||
let changes = store.diff(req_id, &commit_hash_1, &commit_hash_2)?;
|
||||
// Should have 1 Modified (test.txt) and 1 Added (new.txt)
|
||||
assert_eq!(changes.len(), 2);
|
||||
|
||||
// 6. List Dir
|
||||
let entries = store.list_dir(req_id, &commit_hash_2, "")?;
|
||||
assert_eq!(entries.len(), 2); // test.txt, new.txt
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_file_support() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-large";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Create 2MB data
|
||||
let large_data = vec![b'a'; 2 * 1024 * 1024];
|
||||
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("large.bin", &large_data)?;
|
||||
let commit_hash = tx.commit("Add large file", "Tester")?;
|
||||
|
||||
// Read back
|
||||
let mut reader = store.read_file(req_id, &commit_hash, "large.bin")?;
|
||||
let mut read_data = Vec::new();
|
||||
reader.read_to_end(&mut read_data)?;
|
||||
|
||||
assert_eq!(read_data.len(), large_data.len());
|
||||
// Checking first and last bytes to be reasonably sure without comparing 2MB in assertion message on failure
|
||||
assert_eq!(read_data[0], b'a');
|
||||
assert_eq!(read_data[read_data.len()-1], b'a');
|
||||
assert_eq!(read_data, large_data);
|
||||
|
||||
// Check internal blob store
|
||||
// We don't calculate SHA256 here to verify path exactly, but we check if blobs dir has content
|
||||
let blobs_dir = temp_dir.path().join("blobs").join(req_id);
|
||||
assert!(blobs_dir.exists());
|
||||
// Should have subdirectories for SHA prefix
|
||||
let entries = std::fs::read_dir(blobs_dir)?.collect::<Vec<_>>();
|
||||
assert!(!entries.is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parallel_branching_and_merge() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
// Clone temp_path for threads
|
||||
let temp_path = temp_dir.path().to_path_buf();
|
||||
let store = Arc::new(Vgcs::new(&temp_path));
|
||||
let req_id = "req-parallel";
|
||||
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Initial commit
|
||||
let base_commit = {
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("base.txt", b"Base Content")?;
|
||||
tx.commit("Base Commit", "System")?
|
||||
};
|
||||
|
||||
// Fork 1: Modify base.txt
|
||||
let store1 = store.clone();
|
||||
let base1 = base_commit.clone();
|
||||
let handle1 = thread::spawn(move || -> anyhow::Result<String> {
|
||||
let mut tx = store1.begin_transaction(req_id, &base1)?;
|
||||
tx.write("base.txt", b"Base Content Modified by 1")?;
|
||||
tx.write("file1.txt", b"File 1 Content")?;
|
||||
Ok(tx.commit("Fork 1 Commit", "User 1")?)
|
||||
});
|
||||
|
||||
// Fork 2: Add file2.txt (No conflict)
|
||||
let store2 = store.clone();
|
||||
let base2 = base_commit.clone();
|
||||
let handle2 = thread::spawn(move || -> anyhow::Result<String> {
|
||||
let mut tx = store2.begin_transaction(req_id, &base2)?;
|
||||
tx.write("file2.txt", b"File 2 Content")?;
|
||||
Ok(tx.commit("Fork 2 Commit", "User 2")?)
|
||||
});
|
||||
|
||||
let commit1 = handle1.join().unwrap()?;
|
||||
let commit2 = handle2.join().unwrap()?;
|
||||
|
||||
// Merge Fork 2 into Fork 1 (Memory Merge)
|
||||
// This merge should succeed as they touch different files/areas (mostly)
|
||||
// But wait, Fork 1 modified base.txt, Fork 2 kept it as is.
|
||||
// Git merge should take Fork 1's change and include Fork 2's new file.
|
||||
|
||||
// We need to commit the merge result to verify it
|
||||
let merge_tree_oid = store.merge_trees(req_id, &base_commit, &commit1, &commit2)?;
|
||||
|
||||
// Manually create a commit from the merge tree to verify content (optional but good)
|
||||
// In real system, Orchestrator would do this.
|
||||
// For test, we can just verify the tree contains what we expect or use a helper.
|
||||
// Or we can just trust merge_trees returns an OID on success.
|
||||
|
||||
assert!(!merge_tree_oid.is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_conflict() -> anyhow::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let store = Vgcs::new(temp_dir.path());
|
||||
let req_id = "req-conflict";
|
||||
store.init_repo(req_id)?;
|
||||
|
||||
// Base
|
||||
let mut tx = store.begin_transaction(req_id, ZERO_OID)?;
|
||||
tx.write("conflict.txt", b"Base Version")?;
|
||||
let base_commit = tx.commit("Base", "System")?;
|
||||
|
||||
// Branch A: Edit conflict.txt
|
||||
let mut tx_a = store.begin_transaction(req_id, &base_commit)?;
|
||||
tx_a.write("conflict.txt", b"Version A")?;
|
||||
let commit_a = tx_a.commit("Commit A", "User A")?;
|
||||
|
||||
// Branch B: Edit conflict.txt differently
|
||||
let mut tx_b = store.begin_transaction(req_id, &base_commit)?;
|
||||
tx_b.write("conflict.txt", b"Version B")?;
|
||||
let commit_b = tx_b.commit("Commit B", "User B")?;
|
||||
|
||||
// Try Merge
|
||||
let result = store.merge_trees(req_id, &base_commit, &commit_a, &commit_b);
|
||||
|
||||
// Should fail with conflict
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err();
|
||||
assert!(err.to_string().contains("Merge conflict"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
142
crates/workflow-context/tests/worker_runtime_tests.rs
Normal file
@ -0,0 +1,142 @@
|
||||
use workflow_context::{WorkerContext, ContextShell, OutputFormat, FindOptions, Vgcs, ContextStore};
|
||||
use tempfile::TempDir;
|
||||
|
||||
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
||||
|
||||
fn setup_env() -> (TempDir, String, String) {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let data_path = temp_dir.path().to_str().unwrap().to_string();
|
||||
let req_id = "req-shell-test".to_string();
|
||||
|
||||
// Init Repo
|
||||
let vgcs = Vgcs::new(&data_path);
|
||||
vgcs.init_repo(&req_id).unwrap();
|
||||
|
||||
(temp_dir, data_path, req_id)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_comprehensive() -> anyhow::Result<()> {
|
||||
let (_tmp, data_path, req_id) = setup_env();
|
||||
|
||||
// 1. Setup Initial Context
|
||||
let mut ctx = WorkerContext::new(&data_path, &req_id, ZERO_OID);
|
||||
|
||||
ctx.write_file("README.md", "Project Root\n\nIntroduction here.")?;
|
||||
ctx.write_file("src/main.rs", "fn main() {\n println!(\"Hello\");\n println!(\"Hello\");\n}")?; // Double Hello for ambiguity test
|
||||
ctx.write_file("src/util.rs", "pub fn util() -> i32 { 42 }")?;
|
||||
ctx.write_file("data/config.json", "{\n \"key\": \"value\",\n \"retries\": 3\n}")?;
|
||||
ctx.write_file("文档/说明.txt", "这是一个中文文件。")?; // Unicode Path & Content
|
||||
|
||||
let commit_1 = ctx.commit("Init")?;
|
||||
let mut ctx = WorkerContext::new(&data_path, &req_id, &commit_1);
|
||||
|
||||
// --- Find Tests ---
|
||||
println!("Testing Find...");
|
||||
|
||||
// Test: Recursive vs Non-recursive
|
||||
// Note: Includes directories (src, data, 文档) + files (5) = 8
|
||||
let all_nodes = ctx.find("**/*", FindOptions { recursive: true, ..Default::default() })?;
|
||||
assert_eq!(all_nodes.len(), 8);
|
||||
|
||||
// Test: Only Files
|
||||
let only_files = ctx.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("File".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
assert_eq!(only_files.len(), 5);
|
||||
|
||||
// Test: Non-recursive (Top level)
|
||||
let root_nodes = ctx.find("*", FindOptions { recursive: false, ..Default::default() })?;
|
||||
// Expect README.md, src(dir), data(dir), 文档(dir)
|
||||
assert!(root_nodes.iter().any(|f| f.path == "README.md"));
|
||||
assert!(root_nodes.iter().any(|f| f.path == "src"));
|
||||
|
||||
// Test: Type Filter (Dir)
|
||||
let dirs = ctx.find("**/*", FindOptions {
|
||||
recursive: true,
|
||||
type_filter: Some("Dir".to_string()),
|
||||
..Default::default()
|
||||
})?;
|
||||
assert!(dirs.iter().any(|d| d.path == "src"));
|
||||
assert!(dirs.iter().any(|d| d.path == "data"));
|
||||
assert!(dirs.iter().any(|d| d.path == "文档"));
|
||||
assert!(!dirs.iter().any(|d| d.path == "README.md"));
|
||||
|
||||
// --- Grep Tests ---
|
||||
println!("Testing Grep...");
|
||||
|
||||
// Test: Regex Match
|
||||
let matches = ctx.grep(r"fn \w+\(\)", None)?;
|
||||
assert_eq!(matches.len(), 2); // main() and util()
|
||||
|
||||
// Test: Unicode Content
|
||||
let zh_matches = ctx.grep("中文", None)?;
|
||||
assert_eq!(zh_matches.len(), 1);
|
||||
assert_eq!(zh_matches[0].path, "文档/说明.txt");
|
||||
|
||||
// Test: Invalid Regex
|
||||
let bad_regex = ctx.grep("(", None);
|
||||
assert!(bad_regex.is_err());
|
||||
|
||||
// --- Patch Tests ---
|
||||
println!("Testing Patch...");
|
||||
|
||||
// Test: Ambiguous Match (Safety Check)
|
||||
// src/main.rs has two "println!(\"Hello\");"
|
||||
let res = ctx.patch("src/main.rs", "println!(\"Hello\");", "println!(\"World\");");
|
||||
assert!(res.is_err(), "Should fail on ambiguous match");
|
||||
let err_msg = res.unwrap_err().to_string();
|
||||
assert!(err_msg.contains("Ambiguous match"), "Error message mismatch: {}", err_msg);
|
||||
|
||||
// Test: Unique Match
|
||||
// Patch "Introduction here." to "Intro v2." in README.md
|
||||
ctx.patch("README.md", "Introduction here.", "Intro v2.")?;
|
||||
ctx.commit("Patch 1")?; // Must commit to verify via read (if read uses committed state)
|
||||
|
||||
// Verify
|
||||
let readme = ctx.read_text("README.md")?;
|
||||
assert!(readme.contains("Intro v2."));
|
||||
|
||||
// Test: Special Characters (Literal Match)
|
||||
// Let's try to patch JSON which has braces and quotes
|
||||
ctx.patch("data/config.json", "\"retries\": 3", "\"retries\": 5")?;
|
||||
ctx.commit("Patch 2")?;
|
||||
|
||||
let config = ctx.read_text("data/config.json")?;
|
||||
assert!(config.contains("\"retries\": 5"));
|
||||
|
||||
// Test: Cross-line Patch
|
||||
// Replace the whole function body in util.rs
|
||||
let old_block = "pub fn util() -> i32 { 42 }";
|
||||
let new_block = "pub fn util() -> i32 {\n return 100;\n}";
|
||||
ctx.patch("src/util.rs", old_block, new_block)?;
|
||||
ctx.commit("Patch 3")?;
|
||||
|
||||
let util = ctx.read_text("src/util.rs")?;
|
||||
assert!(util.contains("return 100;"));
|
||||
|
||||
// Test: Patch non-existent file
|
||||
let res = ctx.patch("ghost.txt", "foo", "bar");
|
||||
assert!(res.is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_schema_validity() {
|
||||
let defs = WorkerContext::get_tool_definitions();
|
||||
assert!(defs.is_array());
|
||||
let arr = defs.as_array().unwrap();
|
||||
|
||||
// Verify critical fields exist for OpenAI
|
||||
for tool in arr {
|
||||
let obj = tool.as_object().unwrap();
|
||||
assert_eq!(obj["type"], "function");
|
||||
let func = obj["function"].as_object().unwrap();
|
||||
assert!(func.contains_key("name"));
|
||||
assert!(func.contains_key("description"));
|
||||
assert!(func.contains_key("parameters"));
|
||||
}
|
||||
}
|
||||
69
docker-compose.e2e.yml
Normal file
@ -0,0 +1,69 @@
|
||||
services:
|
||||
api-gateway:
|
||||
ports:
|
||||
- "4000:4000"
|
||||
|
||||
workflow-orchestrator-service:
|
||||
ports:
|
||||
- "8005:8005" # Expose for debugging if needed
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
alphavantage-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
tushare-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
finnhub-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
yfinance-provider-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
report-generator-service:
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
|
||||
mock-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/mock-provider-service/Dockerfile
|
||||
container_name: mock-provider-service
|
||||
environment:
|
||||
SERVER_PORT: 8006
|
||||
NATS_ADDR: nats://nats:4222
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
SERVICE_HOST: mock-provider-service
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
depends_on:
|
||||
- nats
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8006/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
51
docker-compose.test.yml
Normal file
@ -0,0 +1,51 @@
|
||||
services:
|
||||
postgres-test:
|
||||
image: timescale/timescaledb:2.15.2-pg16
|
||||
container_name: fundamental-postgres-test
|
||||
command: -c shared_preload_libraries=timescaledb
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: fundamental_test
|
||||
ports:
|
||||
- "5433:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres -d fundamental_test"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
nats-test:
|
||||
image: nats:2.9
|
||||
container_name: fundamental-nats-test
|
||||
ports:
|
||||
- "4223:4222"
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
data-persistence-test:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/data-persistence-service/Dockerfile
|
||||
container_name: data-persistence-service-test
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
# Connect to postgres-test using internal docker network alias
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres-test:5432/fundamental_test
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
ports:
|
||||
- "3005:3000"
|
||||
depends_on:
|
||||
postgres-test:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- test-network
|
||||
|
||||
networks:
|
||||
test-network:
|
||||
|
||||
|
||||
@ -16,6 +16,8 @@ services:
|
||||
retries: 10
|
||||
networks:
|
||||
- app-network
|
||||
ports:
|
||||
- "5434:5432"
|
||||
nats:
|
||||
image: nats:2.9
|
||||
volumes:
|
||||
@ -26,8 +28,10 @@ services:
|
||||
data-persistence-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/data-persistence-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: data-persistence-service
|
||||
working_dir: /app/services/data-persistence-service
|
||||
command: ["cargo", "watch", "-x", "run --bin data-persistence-service-server"]
|
||||
environment:
|
||||
HOST: 0.0.0.0
|
||||
PORT: 3000
|
||||
@ -38,9 +42,15 @@ services:
|
||||
depends_on:
|
||||
postgres-db:
|
||||
condition: service_healthy
|
||||
# If you prefer live-reload or local code mount, consider switching to a dev Dockerfile.
|
||||
# volumes:
|
||||
# - ./:/workspace
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
volumes:
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
networks:
|
||||
- app-network
|
||||
|
||||
@ -51,8 +61,10 @@ services:
|
||||
dockerfile: frontend/Dockerfile
|
||||
container_name: fundamental-frontend
|
||||
working_dir: /workspace/frontend
|
||||
command: npm run dev
|
||||
command: ["/workspace/frontend/scripts/docker-dev-entrypoint.sh"]
|
||||
environment:
|
||||
# Vite Proxy Target
|
||||
VITE_API_TARGET: http://api-gateway:4000
|
||||
# 让 Next 的 API 路由代理到新的 api-gateway
|
||||
NEXT_PUBLIC_BACKEND_URL: http://api-gateway:4000/v1
|
||||
# SSR 内部访问自身 API 的内部地址,避免使用 x-forwarded-host 导致访问宿主机端口
|
||||
@ -76,26 +88,35 @@ services:
|
||||
api-gateway:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/api-gateway/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: api-gateway
|
||||
restart: unless-stopped
|
||||
working_dir: /app/services/api-gateway
|
||||
command: ["cargo", "watch", "-x", "run --bin api-gateway"]
|
||||
ports:
|
||||
- "4000:4000"
|
||||
environment:
|
||||
SERVER_PORT: 4000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
REPORT_GENERATOR_SERVICE_URL: http://report-generator-service:8004
|
||||
# provider_services via explicit JSON for deterministic parsing
|
||||
PROVIDER_SERVICES: '["http://alphavantage-provider-service:8000", "http://tushare-provider-service:8001", "http://finnhub-provider-service:8002", "http://yfinance-provider-service:8003"]'
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
- alphavantage-provider-service
|
||||
- tushare-provider-service
|
||||
- finnhub-provider-service
|
||||
- yfinance-provider-service
|
||||
- report-generator-service
|
||||
nats:
|
||||
condition: service_started
|
||||
data-persistence-service:
|
||||
condition: service_healthy
|
||||
alphavantage-provider-service:
|
||||
condition: service_started
|
||||
tushare-provider-service:
|
||||
condition: service_started
|
||||
finnhub-provider-service:
|
||||
condition: service_started
|
||||
yfinance-provider-service:
|
||||
condition: service_started
|
||||
report-generator-service:
|
||||
condition: service_started
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
@ -103,16 +124,30 @@ services:
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
volumes:
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
|
||||
alphavantage-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/alphavantage-provider-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: alphavantage-provider-service
|
||||
working_dir: /app/services/alphavantage-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8000
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: alphavantage-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
@ -129,13 +164,23 @@ services:
|
||||
tushare-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/tushare-provider-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: tushare-provider-service
|
||||
working_dir: /app/services/tushare-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8001
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
TUSHARE_API_URL: http://api.waditu.com
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: tushare-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
@ -152,13 +197,23 @@ services:
|
||||
finnhub-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/finnhub-provider-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: finnhub-provider-service
|
||||
working_dir: /app/services/finnhub-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8002
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
FINNHUB_API_URL: https://finnhub.io/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: finnhub-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
@ -175,12 +230,22 @@ services:
|
||||
yfinance-provider-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/yfinance-provider-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: yfinance-provider-service
|
||||
working_dir: /app/services/yfinance-provider-service
|
||||
command: ["cargo", "watch", "-x", "run"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8003
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
API_GATEWAY_URL: http://api-gateway:4000
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
SERVICE_HOST: yfinance-provider-service
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
@ -200,12 +265,20 @@ services:
|
||||
report-generator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: services/report-generator-service/Dockerfile
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: report-generator-service
|
||||
working_dir: /app/services/report-generator-service
|
||||
command: ["cargo", "watch", "-x", "run --bin report-generator-service"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8004
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info,axum=info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
@ -219,16 +292,49 @@ services:
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
workflow-orchestrator-service:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
container_name: workflow-orchestrator-service
|
||||
working_dir: /app/services/workflow-orchestrator-service
|
||||
command: ["cargo", "watch", "-x", "run --bin workflow-orchestrator-service"]
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
- ./:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
environment:
|
||||
SERVER_PORT: 8005
|
||||
NATS_ADDR: nats://nats:4222
|
||||
DATA_PERSISTENCE_SERVICE_URL: http://data-persistence-service:3000/api/v1
|
||||
WORKFLOW_DATA_PATH: /mnt/workflow_data
|
||||
RUST_LOG: info
|
||||
RUST_BACKTRACE: "1"
|
||||
depends_on:
|
||||
- nats
|
||||
- data-persistence-service
|
||||
networks:
|
||||
- app-network
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -fsS http://localhost:8005/health >/dev/null || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
|
||||
# =================================================================
|
||||
# Python Services (Legacy - to be replaced)
|
||||
# =================================================================
|
||||
|
||||
volumes:
|
||||
workflow_data:
|
||||
pgdata:
|
||||
frontend_node_modules:
|
||||
nats_data:
|
||||
cargo-target:
|
||||
driver: local
|
||||
cargo-cache:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
app-network:
|
||||
|
||||
|
||||
|
||||
13
docker/Dockerfile.dev
Normal file
@ -0,0 +1,13 @@
|
||||
FROM rust:1.90-bookworm
|
||||
|
||||
# Install cargo-watch for hot reload
|
||||
RUN cargo install cargo-watch
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create target and cache directories to ensure permissions
|
||||
RUN mkdir -p /app/target && mkdir -p /usr/local/cargo
|
||||
|
||||
# Default command
|
||||
CMD ["cargo", "watch", "-x", "run"]
|
||||
|
||||
63
docs/1_requirements/20251122_[Active]_user-guide_v2.md
Normal file
@ -0,0 +1,63 @@
|
||||
# Fundamental Analysis Platform 用户指南 (v2.0 - Vite Refactor)
|
||||
日期: 2025-11-22
|
||||
版本: 2.0
|
||||
|
||||
## 1. 简介
|
||||
Fundamental Analysis Platform 是一个基于 AI Agent 的深度基本面投研平台,旨在通过自动化工作流聚合多源金融数据,并利用 LLM(大语言模型)生成专业的财务分析报告。
|
||||
|
||||
v2.0 版本采用了全新的 Vite + React SPA 架构,提供了更流畅的交互体验和实时的分析状态可视化。
|
||||
|
||||
## 2. 核心功能
|
||||
|
||||
### 2.1 仪表盘 (Dashboard)
|
||||
平台首页,提供简洁的分析入口。
|
||||
* **股票代码**: 支持输入 A股 (如 `600519.SS`)、美股 (如 `AAPL`) 或港股代码。
|
||||
* **市场选择**: 下拉选择 CN (中国)、US (美国) 或 HK (香港)。
|
||||
* **开始分析**: 点击“生成分析报告”按钮即可启动分析流程。
|
||||
|
||||
### 2.2 分析报告页 (Report View)
|
||||
核心工作区,分为左侧状态栏和右侧详情区。
|
||||
|
||||
#### 左侧:工作流状态
|
||||
* **可视化 DAG**: 展示当前的分析任务依赖图。
|
||||
* **节点颜色**: 灰色(等待)、蓝色(运行中)、绿色(完成)、红色(失败)。
|
||||
* **动态连线**: 当任务运行时,连接线会有流光动画指示数据流向。
|
||||
* **实时日志**: 滚动展示所有后台任务的执行日志,支持实时查看数据抓取和分析进度。
|
||||
|
||||
#### 右侧:详情面板
|
||||
* **Analysis Report**: 展示由 AI 生成的最终分析报告。支持 Markdown 格式(标题、表格、加粗、引用),并带有打字机生成特效。
|
||||
* **Fundamental Data**: (开发中) 展示抓取到的原始财务数据表格。
|
||||
* **Stock Chart**: (开发中) 展示股价走势图。
|
||||
|
||||
### 2.3 系统配置 (Config)
|
||||
集中管理平台的所有外部连接和参数。
|
||||
|
||||
* **AI Provider**:
|
||||
* 管理 LLM 供应商 (OpenAI, Anthropic, Local Ollama 等)。
|
||||
* 配置 API Key 和 Base URL。
|
||||
* 刷新并选择可用的模型 (GPT-4o, Claude-3.5 等)。
|
||||
* **数据源配置**:
|
||||
* 启用/禁用金融数据源 (Tushare, Finnhub, AlphaVantage)。
|
||||
* 输入对应的 API Token。
|
||||
* 支持连接测试。
|
||||
* **分析模板**:
|
||||
* 查看当前的分析流程模板(如 "Quick Scan")。
|
||||
* 查看每个模块使用的 Prompt 模板及模型配置。
|
||||
* **系统状态**:
|
||||
* 监控微服务集群 (API Gateway, Orchestrator 等) 的健康状态。
|
||||
|
||||
## 3. 快速开始
|
||||
|
||||
1. 进入 **配置页** -> **AI Provider**,添加您的 OpenAI API Key。
|
||||
2. 进入 **配置页** -> **数据源配置**,启用 Tushare 并输入 Token。
|
||||
3. 回到 **首页**,输入 `600519.SS`,选择 `CN` 市场。
|
||||
4. 点击 **生成分析报告**,观察工作流运行及报告生成。
|
||||
|
||||
## 4. 常见问题
|
||||
|
||||
* **Q: 报告生成卡住怎么办?**
|
||||
* A: 检查左侧“实时日志”,查看是否有 API 连接超时或配额耗尽的错误。
|
||||
* **Q: 如何添加本地模型?**
|
||||
* A: 在 AI Provider 页添加新的 Provider,Base URL 填入 `http://localhost:11434/v1` (Ollama 默认地址)。
|
||||
|
||||
|
||||
112
docs/2_architecture/20251129_refactor_history_context.md
Normal file
@ -0,0 +1,112 @@
|
||||
# 设计文档:统一历史记录与上下文管理重构
|
||||
|
||||
## 1. 目标
|
||||
实现一个统一且一致的历史管理系统,达成以下目标:
|
||||
1. **原子化历史记录**:一个“历史记录”严格对应**一次 Workflow 执行**(由 `request_id` 标识),彻底解决历史列表重复/碎片化问题。
|
||||
2. **单一数据源**:全局上下文(VGCS/Git)作为所有文件产物(报告、日志、数据)的唯一真实存储源。
|
||||
3. **轻量化索引**:数据库(`session_data` 或新表)仅存储结构化的“索引”(Snapshot),指向 VGCS 中的 Commit 和文件路径。
|
||||
|
||||
## 2. 现状分析
|
||||
- **碎片化**:目前 `analysis_results` 表存储的是单个 Task 的结果。如果一个工作流包含 N 个分析步骤,历史列表中就会出现 N 条记录。
|
||||
- **数据冗余**:结果内容(Markdown 等)既作为文件存在 VGCS 中,又作为文本列存在 Postgres 数据库中。
|
||||
- **历史视图缺失**:缺乏一个能够代表整次执行状态(包含拓扑结构、状态、所有产物引用)的根对象,导致查询历史列表时困难。
|
||||
|
||||
## 3. 架构方案
|
||||
|
||||
### 3.1. 核心概念:工作流快照 (Workflow Snapshot)
|
||||
不再将每个 Task 视为独立的历史记录,我们定义 **Workflow Snapshot** 为历史的原子单位。
|
||||
|
||||
一个 Snapshot 包含:
|
||||
- **元数据**:`request_id`(请求ID), `symbol`(标的), `market`(市场), `template_id`(模板ID), `start_time`(开始时间), `end_time`(结束时间), `final_status`(最终状态)。
|
||||
- **拓扑结构**:DAG 结构(节点与边)。
|
||||
- **执行状态**:针对每个节点记录:
|
||||
- `status`:状态 (Completed, Failed, Skipped)
|
||||
- `output_commit`:该节点产生的 VGCS Commit Hash。
|
||||
- `artifacts`:产物映射表,Key 为产物名称,Value 为 VGCS 文件路径 (例如 `{"report": "analysis/summary.md", "log": "analysis/execution.log"}`)。
|
||||
|
||||
### 3.2. 数据存储变更
|
||||
|
||||
#### A. `workflow_history` 表 (或重构后的 `session_data`)
|
||||
我们将引入一张专用表(或规范化 `session_data` 的使用)来存储 **Workflow Manifest**。
|
||||
|
||||
```sql
|
||||
CREATE TABLE workflow_history (
|
||||
request_id UUID PRIMARY KEY,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
market VARCHAR(10) NOT NULL,
|
||||
template_id VARCHAR(50),
|
||||
status VARCHAR(20) NOT NULL, -- 'Completed', 'Failed'
|
||||
start_time TIMESTAMPTZ NOT NULL,
|
||||
end_time TIMESTAMPTZ,
|
||||
|
||||
-- Snapshot JSON 数据
|
||||
snapshot_data JSONB NOT NULL
|
||||
-- {
|
||||
-- "dag": { ... },
|
||||
-- "tasks": {
|
||||
-- "task_id_1": { "status": "Completed", "commit": "abc1234", "paths": { "report": "..." } }
|
||||
-- }
|
||||
-- }
|
||||
);
|
||||
```
|
||||
|
||||
*注:为了减少迁移摩擦,我们可以继续使用 `session_data` 表,并指定 `data_type = 'workflow_snapshot'`,但建立专用表更有利于查询和维护。*
|
||||
|
||||
#### B. VGCS (Git 上下文) 的使用规范
|
||||
- **输入**:初始 Commit 包含 `request.json`。
|
||||
- **过程**:每个 Task (Worker) 检出基础 Commit,执行工作,写入文件(报告、日志),并创建 **New Commit**。
|
||||
- **合并**:Orchestrator 负责追踪这些 Commit 的 DAG 关系。
|
||||
- **终态**:Orchestrator 创建最终的“Merge Commit”(可选,或仅引用各叶子节点的 Commit)并在 `workflow_history` 中记录。
|
||||
|
||||
### 3.3. 组件职责划分
|
||||
|
||||
#### 1. Worker 服务 (Report Gen, Providers)
|
||||
- **输入**:接收 `base_commit`, `task_id`, `output_path_config`。
|
||||
- **动作**:
|
||||
- 初始化 `WorkerContext` (VGCS)。
|
||||
- 将 `report.md` 写入 `output_path`。
|
||||
- 将 `_execution.md` (日志) 写入 `log_path`。
|
||||
- **Commit**:提交更改,生成 Commit Hash。
|
||||
- **输出**:返回 `new_commit_hash` 和 `artifact_paths` (Map<Name, Path>) 给 Orchestrator。
|
||||
- **禁止**:Worker 不再直接向数据库的 `analysis_results` 表写入数据。
|
||||
|
||||
#### 2. Workflow Orchestrator (编排器)
|
||||
- **协调**:从 `TaskCompleted` 事件中收集 `new_commit_hash` 和 `artifact_paths`。
|
||||
- **状态追踪**:更新内存中的 DAG 状态。
|
||||
- **完成处理**:
|
||||
- 当所有任务结束后,生成 **Workflow Snapshot**。
|
||||
- 调用 `persistence-service` 将 Snapshot 保存至 `workflow_history`。
|
||||
- 发送 `WorkflowCompleted` 事件。
|
||||
|
||||
#### 3. Data Persistence Service (持久化服务)
|
||||
- **新接口**:`GET /api/v1/history`
|
||||
- 返回 `workflow_history` 列表(摘要信息)。
|
||||
- **新接口**:`GET /api/v1/history/{request_id}`
|
||||
- 返回完整的 Snapshot(详情信息)。
|
||||
- **旧接口处理**:废弃 `GET /api/v1/analysis-results` 或将其重定向为查询 `workflow_history`。
|
||||
|
||||
#### 4. Frontend (前端)
|
||||
- **历史页**:调用 `/api/v1/history`。每个 `request_id` 只展示一行。
|
||||
- **报告页**:
|
||||
- 获取特定的历史详情。
|
||||
- 使用 `artifact_paths` + `commit_hash` 通过 VGCS API (或代理)以此获取文件内容。
|
||||
|
||||
## 4. 实施计划
|
||||
|
||||
1. **Schema 定义**:定义 `WorkflowSnapshot` 结构体及 SQL 迁移脚本 (`workflow_history`)。
|
||||
2. **Orchestrator 改造**:
|
||||
- 修改 `handle_task_completed` 以聚合 `artifact_paths`。
|
||||
- 实现 `finalize_workflow` 逻辑,用于构建并保存 Snapshot。
|
||||
3. **Worker 改造**:
|
||||
- 确保 `report-generator` 在 `TaskResult` 中返回结构化的 `artifact_paths`。
|
||||
- 移除 `report-generator` 中对 `create_analysis_result` 的数据库调用。
|
||||
4. **Persistence Service 改造**:
|
||||
- 实现 `workflow_history` 的 CRUD 操作。
|
||||
5. **Frontend 改造**:
|
||||
- 更新 API 调用以适配新的历史记录接口。
|
||||
|
||||
## 5. 核心收益
|
||||
- **单一事实来源**:文件存 Git,元数据存 DB,杜绝数据不同步。
|
||||
- **历史记录原子性**:一次运行 = 一条记录。
|
||||
- **可追溯性**:每个产物都精确关联到一个 Git Commit。
|
||||
|
||||
@ -0,0 +1,193 @@
|
||||
# 前端报告页面重构设计文档 (Frontend Refactoring Design Doc)
|
||||
|
||||
**日期**: 2025-11-19
|
||||
**状态**: 待评审 (Draft)
|
||||
**目标**: 重构 `app/report/[symbol]` 页面,消除历史技术债务,严格对齐 V2 后端微服务架构。
|
||||
|
||||
## 1. 核心原则
|
||||
|
||||
1. **单一数据源 (SSOT)**: 前端不再维护任务进度、依赖关系或倒计时。所有状态严格来自后端 API (`/api/tasks/{id}`, `/api/analysis-results`).
|
||||
2. **无隐式逻辑 (No Implicit Logic)**: 严格按照用户选择的 Template ID 渲染,后端未返回的数据即视为不存在,不进行客户端推断或 Fallback。
|
||||
3. **真·流式传输 (True Streaming)**: 废弃数据库轮询方案。采用 **Server-Sent Events (SSE)** 技术。
|
||||
* 后端在内存中维护 `tokio::sync::broadcast` 通道。
|
||||
* LLM 生成的 Token 实时推送到通道,直达前端。
|
||||
* 数据库只负责存储**最终完成**的分析结果 (Persistence),不参与流式传输过程。
|
||||
|
||||
## 2. 页面布局设计
|
||||
|
||||
页面采用“固定框架 + 动态内容”的布局模式。
|
||||
|
||||
```text
|
||||
+-----------------------------------------------------------------------+
|
||||
| [Header Area] |
|
||||
| Symbol: AAPL | Market: US | Price: $230.5 (Snapshot) | [Status Badge]|
|
||||
| Control: [ Template Select Dropdown [v] ] [ Trigger Analysis Button ]|
|
||||
+-----------------------------------------------------------------------+
|
||||
| |
|
||||
| [ Tab Navigation Bar ] |
|
||||
| +-----------+ +--------------+ +------------+ +------------+ +-----+ |
|
||||
| | 股价图表 | | 基本面数据 | | 分析模块A | | 分析模块B | | ... | |
|
||||
| +-----------+ +--------------+ +------------+ +------------+ +-----+ |
|
||||
| | |
|
||||
+-----------------------------------------------------------------------+
|
||||
| [ Main Content Area ] |
|
||||
| |
|
||||
| (Content changes based on selected Tab) |
|
||||
| |
|
||||
| SCENARIO 1: Stock Chart Tab |
|
||||
| +-------------------------------------------------+ |
|
||||
| | [ PLACEHOLDER: TradingView / K-Line Chart ] | |
|
||||
| | (Future: Connect to Time-Series DB) | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
| SCENARIO 2: Fundamental Data Tab |
|
||||
| +-------------------------------------------------+ |
|
||||
| | Status: Waiting for Providers (2/3)... | |
|
||||
| | --------------------------------------------- | |
|
||||
| | [Tushare]: OK (JSON/Table Dump) | |
|
||||
| | [Finnhub]: OK (JSON/Table Dump) | |
|
||||
| | [AlphaV ]: Pending... | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
| SCENARIO 3: Analysis Module Tab (e.g., Valuation) |
|
||||
| +-------------------------------------------------+ |
|
||||
| | [Markdown Renderer] | |
|
||||
| | ## Valuation Analysis | |
|
||||
| | Based on the PE ratio of 30... | |
|
||||
| | (Streaming Cursor) _ | |
|
||||
| +-------------------------------------------------+ |
|
||||
| |
|
||||
+-----------------------------------------------------------------------+
|
||||
| [ Execution Details Footer / Tab ] |
|
||||
| Total Time: 12s | Tokens: 4050 | Cost: $0.02 |
|
||||
+-----------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
## 3. 数据流与状态机
|
||||
|
||||
### 3.1 固定 Tab 定义
|
||||
无论选择何种模板,以下 Tab 始终存在(Fixed Tabs):
|
||||
|
||||
1. **股价图表 (Stock Chart)**
|
||||
* **数据源**: 独立的实时行情 API / 时间序列数据库。
|
||||
* **当前实现**: 占位符 (Placeholder)。
|
||||
2. **基本面数据 (Fundamental Data)**
|
||||
* **定义**: 所有已启用的 Data Providers 返回的原始数据聚合。
|
||||
* **状态逻辑**:
|
||||
* 此 Tab 代表“数据准备阶段”。
|
||||
* 必须等待后端 `FetchCompanyDataCommand` 对应的 Task 状态为 Completed/Partial/Failed。
|
||||
* UI 展示所有 Provider 的回执。只有当所有 Provider 都有定论(成功或失败),此阶段才算结束。
|
||||
* **作为后续分析的“门控”**: 此阶段未完成前,后续分析 Tab 处于“等待中”状态。
|
||||
3. **执行详情 (Execution Details)**
|
||||
* **定义**: 工作流的元数据汇总。
|
||||
* **内容**: 耗时统计、Token 消耗、API 调用清单。
|
||||
|
||||
### 3.2 动态 Tab 定义 (Analysis Modules)
|
||||
* **来源**: 根据当前选中的 `Template ID` 从后端获取 `AnalysisTemplateConfig`。
|
||||
* **生成逻辑**:
|
||||
* Template 中定义了 Modules: `[Module A, Module B, Module C]`.
|
||||
* 前端直接映射为 Tab A, Tab B, Tab C。
|
||||
* **渲染**:
|
||||
* **Loading**: 后端 `AnalysisResult` 状态为 `processing`。
|
||||
* **Streaming**: 通过 SSE (`/api/analysis-results/stream`) 接收增量内容。
|
||||
* **Done**: 后端流结束,或直接从 DB 读取完整内容。
|
||||
|
||||
### 3.3 状态机 (useReportEngine Hook)
|
||||
|
||||
我们将废弃旧的 Hook,实现一个纯粹的 `useReportEngine`。
|
||||
|
||||
```typescript
|
||||
interface ReportState {
|
||||
// 1. 配置上下文
|
||||
symbol: string;
|
||||
templateId: string;
|
||||
templateConfig: AnalysisTemplateSet | null; // 用于生成动态 Tab
|
||||
|
||||
// 2. 阶段状态
|
||||
fetchStatus: 'idle' | 'fetching' | 'complete' | 'error'; // 基本面数据阶段
|
||||
analysisStatus: 'idle' | 'running' | 'complete'; // 分析阶段
|
||||
|
||||
// 3. 数据持有
|
||||
fundamentalData: any[]; // 来自各个 Provider 的原始数据
|
||||
analysisResults: Record<string, AnalysisResultDto>; // Key: ModuleID
|
||||
|
||||
// 4. 进度
|
||||
executionMeta: {
|
||||
startTime: number;
|
||||
elapsed: number;
|
||||
tokens: number;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 交互流程
|
||||
|
||||
1. **初始化**:
|
||||
* 用户进入页面 -> 加载 `api/configs/analysis_template_sets` -> 填充下拉框。
|
||||
* 如果 URL 或历史数据中有 `template_id`,自动选中。
|
||||
|
||||
2. **触发 (Trigger)**:
|
||||
* 用户点击“开始分析”。
|
||||
* 前端 POST `/api/data-requests` (payload: `{ symbol, template_id }`)。
|
||||
* **前端重置所有动态 Tab 内容为空**。
|
||||
* 进入 `fetchStatus: fetching`。
|
||||
|
||||
3. **阶段一:基本面数据获取**:
|
||||
* 前端轮询 `/api/tasks/{request_id}`。
|
||||
* **基本面 Tab** 高亮/显示 Spinner。
|
||||
* 展示各个 Provider 的子任务进度。
|
||||
* 当 Task 状态 = Completed -> 进入阶段二。
|
||||
|
||||
4. **阶段二:流式分析 (SSE)**:
|
||||
* 前端建立 EventSource 连接 `/api/analysis-results/stream?request_id={id}`。
|
||||
* **智能切换 Tab**: (可选) 当某个 Module 开始生成 (收到 SSE 事件 `module_start`) 时,UI 可以自动切换到该 Tab。
|
||||
* **渲染**: 收到 `content` 事件,追加到对应 Module 的内容中。
|
||||
* **持久化**: 只有当 SSE 收到 `DONE` 事件时,后端才保证数据已落库。
|
||||
|
||||
5. **完成**:
|
||||
* SSE 连接关闭。
|
||||
* 状态转为 `complete`。
|
||||
|
||||
## 5. 架构设计 (Architecture Design)
|
||||
|
||||
为了实现真流式传输,后端架构调整如下:
|
||||
|
||||
1. **内存状态管理 (In-Memory State)**:
|
||||
* `AppState` 中增加 `stream_manager: StreamManager`。
|
||||
* `StreamManager` 维护 `HashMap<RequestId, BroadcastSender<StreamEvent>>`。
|
||||
* 这消除了对数据库的中间状态写入压力。
|
||||
2. **Worker 职责**:
|
||||
* Worker 执行 LLM 请求。
|
||||
* 收到 Token -> 写入 `BroadcastSender` (Fire and forget)。
|
||||
* 同时将 Token 累积在内存 Buffer 中。
|
||||
* 生成结束 -> 将完整 Buffer 写入数据库 (PostgreSQL) -> 广播 `ModuleDone` 事件。
|
||||
3. **API 职责**:
|
||||
* `GET /stream`:
|
||||
* 检查内存中是否有对应的 `BroadcastSender`?
|
||||
* **有**: 建立 SSE 连接,订阅并转发事件。
|
||||
* **无**: 检查数据库是否已完成?
|
||||
* **已完成**: 一次性返回完整内容 (模拟 SSE 或直接返回 JSON)。
|
||||
* **未开始/不存在**: 返回 404 或等待。
|
||||
|
||||
## 6. 迁移计划 (Action Items)
|
||||
|
||||
### 6.1 清理与归档 (Cleanup)
|
||||
- [x] 创建 `frontend/archive/v1_report` 目录。
|
||||
- [x] 移动 `app/report/[symbol]/components` 下的旧组件(`ExecutionDetails.tsx`, `TaskStatus.tsx`, `ReportHeader.tsx`, `AnalysisContent.tsx`)到 archive。
|
||||
- [x] 移动 `app/report/[symbol]/hooks` 下的 `useAnalysisRunner.ts` 和 `useReportData.ts` 到 archive。
|
||||
|
||||
### 6.2 核心构建 (Core Scaffolding)
|
||||
- [x] 创建 `hooks/useReportEngine.ts`: 实现上述状态机,严格对接后端 API。
|
||||
- [x] 创建 `components/ReportLayout.tsx`: 实现新的布局框架(Header + Tabs + Content)。
|
||||
- [x] 创建 `components/RawDataViewer.tsx`: 用于展示基本面原始数据(JSON View)。
|
||||
- [x] 创建 `components/AnalysisViewer.tsx`: 用于展示分析结果(Markdown Streaming)。
|
||||
|
||||
### 6.3 页面集成 (Integration)
|
||||
- [x] 重写 `app/report/[symbol]/page.tsx`: 引入 `useReportEngine` 和新组件。
|
||||
- [ ] 验证全流程:Trigger -> Task Fetching -> Analysis Streaming -> Finish。
|
||||
|
||||
### 6.4 后端重构 (Backend Refactoring) - NEW
|
||||
- [x] **State Upgrade**: 更新 `AppState` 引入 `tokio::sync::broadcast` 用于流式广播。
|
||||
- [x] **Worker Update**: 修改 `run_report_generation_workflow`,不再生成完才写库,也不中间写库,而是**中间发广播,最后写库**。
|
||||
- [x] **API Update**: 新增 `GET /api/analysis-results/stream` (SSE Endpoint),对接广播通道。
|
||||
- [x] **Frontend Update**: 修改 `useReportEngine.ts`,将轮询 `analysis-results` 改为 `EventSource` 连接。
|
||||
@ -0,0 +1,148 @@
|
||||
# 供应商隔离的数据新鲜度与缓存设计方案
|
||||
|
||||
## 1. 背景 (Background)
|
||||
|
||||
当前系统使用 `company_profiles` 表中的全局 `updated_at` 时间戳来判断某个股票的数据是否“新鲜”(例如:过去 24 小时内更新过)。
|
||||
|
||||
**现有问题:**
|
||||
这种方法在多供应商(Multi-Provider)环境中会导致严重的竞态条件(Race Condition):
|
||||
1. **Tushare**(A股数据源)通常响应较快,获取数据并更新了 `company_profiles` 表的 `updated_at`。
|
||||
2. `updated_at` 时间戳被更新为 `NOW()`。
|
||||
3. **YFinance** 或 **AlphaVantage**(全球数据源)稍后启动任务。
|
||||
4. 它们检查 `company_profiles` 表,发现 `updated_at` 非常新,因此错误地认为**自己的**数据也是最新的。
|
||||
5. 结果:YFinance/AlphaVantage 跳过执行,导致这些特定字段的数据为空或陈旧。
|
||||
|
||||
## 2. 目标 (Objective)
|
||||
|
||||
实现一个**供应商隔离的缓存机制**,允许每个数据供应商(Tushare, YFinance, AlphaVantage, Finnhub)能够:
|
||||
1. 独立追踪其最后一次成功更新数据的时间。
|
||||
2. 仅根据**自己的**数据新鲜度来决定是否执行任务。
|
||||
3. 避免干扰其他供应商的执行逻辑。
|
||||
|
||||
## 3. 设计原则 (Design Principles)
|
||||
|
||||
1. **不新增数据表**:利用数据库现有的文档-关系混合特性(Document-Relational)。具体来说,使用 `company_profiles` 表中的 `additional_info` (JSONB) 字段。
|
||||
2. **服务层抽象**:解析和管理这些元数据的复杂性应封装在 `Data Persistence Service` 内部,向各 Provider Service 暴露简洁的 API。
|
||||
3. **并发安全**:确保不同供应商的并发更新不会覆盖彼此的元数据状态。
|
||||
|
||||
## 4. 数据结构设计 (Data Structure Design)
|
||||
|
||||
我们将利用现有的 `company_profiles.additional_info` 字段(类型:`JSONB`)来存储一个供应商状态字典。
|
||||
|
||||
### `additional_info` JSON Schema 设计
|
||||
|
||||
```json
|
||||
{
|
||||
"provider_status": {
|
||||
"tushare": {
|
||||
"last_updated": "2025-11-19T10:00:00Z",
|
||||
"data_version": "v1",
|
||||
"status": "success"
|
||||
},
|
||||
"yfinance": {
|
||||
"last_updated": "2025-11-18T09:30:00Z",
|
||||
"status": "success"
|
||||
},
|
||||
"alphavantage": {
|
||||
"last_updated": "2025-11-15T14:00:00Z",
|
||||
"status": "partial_success" // 例如:触发了速率限制
|
||||
}
|
||||
},
|
||||
"other_metadata": "..." // 保留其他现有元数据
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 实施计划 (Implementation Plan)
|
||||
|
||||
### 5.1. 数据持久化服务更新 (Data Persistence Service)
|
||||
|
||||
我们需要扩展 `PersistenceClient` 及其底层 API,以支持细粒度的元数据更新。
|
||||
|
||||
**新增/更新 API 端点:**
|
||||
|
||||
1. **`PUT /companies/{symbol}/providers/{provider_id}/status`** (新增)
|
||||
* **目的**:原子更新特定供应商的状态,无需读取/写入完整的 profile。
|
||||
* **实现**:使用 Postgres 的 `jsonb_set` 函数,直接更新 JSON 路径 `['provider_status', provider_id]`。
|
||||
* **Payload**:
|
||||
```json
|
||||
{
|
||||
"last_updated": "2025-11-19T12:00:00Z",
|
||||
"status": "success"
|
||||
}
|
||||
```
|
||||
|
||||
2. **`GET /companies/{symbol}/providers/{provider_id}/status`** (新增)
|
||||
* **目的**:辅助接口,用于获取特定供应商的当前缓存状态。
|
||||
|
||||
### 5.2. 供应商服务工作流更新 (Provider Service)
|
||||
|
||||
每个 Provider Service(例如 `yfinance-provider-service`)将修改其 `worker.rs` 中的逻辑:
|
||||
|
||||
**现有逻辑(有缺陷):**
|
||||
```rust
|
||||
let profile = client.get_company_profile(symbol).await?;
|
||||
if profile.updated_at > 24h_ago { return; } // 全局检查
|
||||
```
|
||||
|
||||
**新逻辑:**
|
||||
```rust
|
||||
// 1. 检查 Provider 专属缓存
|
||||
let status = client.get_provider_status(symbol, "yfinance").await?;
|
||||
if let Some(s) = status {
|
||||
if s.last_updated > 24h_ago {
|
||||
info!("YFinance 数据较新,跳过执行。");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 获取并持久化数据
|
||||
// ... fetch ...
|
||||
client.upsert_company_profile(profile).await?; // 更新基本信息
|
||||
client.batch_insert_financials(financials).await?;
|
||||
|
||||
// 3. 更新 Provider 状态
|
||||
client.update_provider_status(symbol, "yfinance", ProviderStatus {
|
||||
last_updated: Utc::now(),
|
||||
status: "success"
|
||||
}).await?;
|
||||
```
|
||||
|
||||
## 6. 风险管理与迁移 (Risk Management & Migration)
|
||||
|
||||
* **竞态条件 (Race Conditions)**:通过在数据库层使用 `jsonb_set` 进行部分更新,我们避免了“读-改-写”的竞态条件,确保 Provider A 的更新不会覆盖 Provider B 同时写入的状态。
|
||||
* **数据迁移 (Migration)**:
|
||||
* **策略**:**Lazy Migration (懒迁移)**。
|
||||
* 现有数据中没有 `provider_status` 字段。代码将优雅地处理 `null` 或缺失键的情况(将其视为“陈旧/从未运行”,触发重新获取)。
|
||||
* **无需**编写专门的 SQL 迁移脚本去清洗历史数据。旧数据会随着新的抓取任务运行而自动补充上状态信息。
|
||||
* 如果必须清理,可以直接执行 `UPDATE company_profiles SET additional_info = additional_info - 'provider_status';` 来重置所有缓存状态。
|
||||
|
||||
## 7. 实施清单 (Implementation Checklist)
|
||||
|
||||
- [x] **Phase 1: Common Contracts & DTOs**
|
||||
- [x] 在 `services/common-contracts/src/dtos.rs` 中定义 `ProviderStatusDto`.
|
||||
|
||||
- [x] **Phase 2: Data Persistence Service API**
|
||||
- [x] 实现 DB 层逻辑: `get_provider_status` (读取 JSONB).
|
||||
- [x] 实现 DB 层逻辑: `update_provider_status` (使用 `jsonb_set`).
|
||||
- [x] 添加 API Handler: `GET /companies/{symbol}/providers/{provider_id}/status`.
|
||||
- [x] 添加 API Handler: `PUT /companies/{symbol}/providers/{provider_id}/status`.
|
||||
- [x] 注册路由并测试接口.
|
||||
|
||||
- [x] **Phase 3: Client Logic Update**
|
||||
- [x] 更新各服务中的 `PersistenceClient` (如 `services/yfinance-provider-service/src/persistence.rs` 等),增加 `get_provider_status` 和 `update_provider_status` 方法.
|
||||
|
||||
- [x] **Phase 4: Provider Services Integration**
|
||||
- [x] **Tushare Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **YFinance Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **AlphaVantage Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
- [x] **Finnhub Service**: 更新 `worker.rs`,集成新的缓存检查逻辑.
|
||||
|
||||
- [ ] **Phase 5: Verification (验证)**
|
||||
- [ ] 运行 `scripts/test_data_fetch.py` 验证全流程.
|
||||
- [ ] 验证不同 Provider 的状态互不干扰.
|
||||
|
||||
- [ ] **Phase 6: Caching Logic Abstraction (缓存逻辑抽象 - 智能客户端)**
|
||||
- [ ] 将 `PersistenceClient` 迁移至 `services/common-contracts/src/persistence_client.rs`(或新建 `service-sdk` 库),消除重复代码。
|
||||
- [ ] 在共享客户端中实现高层方法 `should_fetch_data(symbol, provider, ttl)`。
|
||||
- [ ] 重构所有 Provider Service 以使用共享的 `PersistenceClient`。
|
||||
- [ ] 验证所有 Provider 的缓存逻辑是否一致且无需手动实现。
|
||||
@ -0,0 +1,128 @@
|
||||
# 报告生成优化与 UI 状态反馈改进设计文档
|
||||
|
||||
**状态**: Draft
|
||||
**日期**: 2025-11-19
|
||||
**涉及模块**: Report Generator Service (Backend), Frontend (UI)
|
||||
|
||||
## 1. 背景与问题分析
|
||||
|
||||
当前系统的报告生成流程存在两个主要痛点,导致用户体验不佳且生成内容质量低下:
|
||||
|
||||
1. **数据注入缺失 (Data Injection Gap)**:
|
||||
* 后端在执行 Prompt 渲染时,`financial_data` 被硬编码为 `"..."`。
|
||||
* 大模型(LLM)缺乏上下文输入,导致输出“幻觉”内容(如自我介绍、复读指令)或通用废话。
|
||||
* 依赖链条虽然在拓扑排序上是正确的,但由于上游(如“基本面分析”)输出无效内容,下游(如“最终结论”)的输入也随之失效。
|
||||
|
||||
2. **UI 状态反馈缺失 (UI/UX Gap)**:
|
||||
* 前端仅有简单的“有数据/无数据”判断。
|
||||
* 点击“重新生成”时,UI 往往显示旧的缓存数据,缺乏“生成中”或“进度更新”的实时反馈。
|
||||
* 用户无法区分“旧报告”和“正在生成的新报告”。
|
||||
|
||||
## 2. 后端优化设计 (Report Generator Service)
|
||||
|
||||
### 2.1 数据注入逻辑修复 (Fixing Financial Data Injection)
|
||||
|
||||
我们将把当前的“基本面数据获取”视为一个**内置的基础工具(Native Tool)**。
|
||||
|
||||
* **当前逻辑**: 直接透传数据库 Raw Data。
|
||||
* **改进逻辑**: 在 `worker.rs` 中实现一个数据格式化器,将 `Vec<TimeSeriesFinancialDto>` 转换为 LLM 易读的 Markdown 表格或结构化文本。
|
||||
|
||||
**实现细节**:
|
||||
1. **格式化函数**: 实现 `format_financials_to_markdown(financials: &[TimeSeriesFinancialDto]) -> String`。
|
||||
* 按年份/季度降序排列。
|
||||
* 提取关键指标(营收、净利润、ROE、毛利率等)。
|
||||
* 生成 Markdown Table。
|
||||
2. **注入 Context**:
|
||||
* 在 `Tera` 模板渲染前,调用上述函数。
|
||||
* 替换占位符: `context.insert("financial_data", &formatted_data);`。
|
||||
3. **上游依赖注入 (保持不变)**:
|
||||
* 继续保留现有的 `generated_results` 注入逻辑,确保上游模块(如 `market_analysis`)的输出能正确传递给下游(如 `final_conclusion`)。
|
||||
|
||||
### 2.2 执行状态管理 (Execution Status Management)
|
||||
|
||||
为了支持前端的“实时状态”,后端需要能够区分“排队中”、“生成中”和“已完成”。
|
||||
|
||||
* **现状**: 只有生成完成后才写入 `analysis_results` 表。
|
||||
* **改进**: 引入任务状态流转。
|
||||
|
||||
**方案 A (基于数据库 - 推荐 MVP)**:
|
||||
利用现有的 `analysis_results` 表或新建 `analysis_tasks` 表。
|
||||
1. **任务开始时**:
|
||||
* Worker 开始处理某个 `module_id` 时,立即写入/更新一条记录。
|
||||
* `status`: `PROCESSING`
|
||||
* `content`: 空或 "Analysis in progress..."
|
||||
2. **任务完成时**:
|
||||
* 更新记录。
|
||||
* `status`: `COMPLETED`
|
||||
* `content`: 实际生成的 Markdown。
|
||||
3. **任务失败时**:
|
||||
* `status`: `FAILED`
|
||||
* `content`: 错误信息。
|
||||
|
||||
### 2.3 未来扩展性:工具模块 (Future Tool Module)
|
||||
|
||||
* 当前设计中,`financial_data` 是硬编码注入的。
|
||||
* **未来规划**: 在 Prompt 模板配置中,增加 `tools` 字段。
|
||||
```json
|
||||
"tools": ["financial_aggregator", "news_search", "calculator"]
|
||||
```
|
||||
* Worker 在渲染 Prompt 前,先解析 `tools` 配置,并行执行对应的工具函数(如 Python 数据清洗脚本),获取输出后注入 Context。当前修复的 `financial_data` 本质上就是 `financial_aggregator` 工具的默认实现。
|
||||
|
||||
## 3. 前端优化设计 (Frontend)
|
||||
|
||||
### 3.1 状态感知与交互
|
||||
|
||||
**目标**: 让用户清晰感知到“正在生成”。
|
||||
|
||||
1. **重新生成按钮行为**:
|
||||
* 点击“重新生成”后,**立即**将当前模块的 UI 状态置为 `GENERATING`。
|
||||
* **视觉反馈**:
|
||||
* 方案一(简单):清空旧内容,显示 Skeleton(骨架屏)+ 进度条/Spinner。
|
||||
* 方案二(平滑):保留旧内容,但在上方覆盖一层半透明遮罩,并显示“正在更新分析...”。(推荐方案二,避免内容跳动)。
|
||||
|
||||
2. **状态轮询 (Polling)**:
|
||||
* 由于后端暂未实现 SSE (Server-Sent Events),前端需采用轮询机制。
|
||||
* 当状态为 `GENERATING` 时,每隔 2-3 秒调用一次 API 检查该 `module_id` 的状态。
|
||||
* 当后端返回状态变更为 `COMPLETED` 时,停止轮询,刷新显示内容。
|
||||
|
||||
### 3.2 组件结构调整
|
||||
|
||||
修改 `AnalysisContent.tsx` 组件:
|
||||
|
||||
```typescript
|
||||
interface AnalysisState {
|
||||
status: 'idle' | 'loading' | 'success' | 'error';
|
||||
data: string | null; // Markdown content
|
||||
isStale: boolean; // 标记当前显示的是否为旧缓存
|
||||
}
|
||||
```
|
||||
|
||||
* **Idle**: 初始状态。
|
||||
* **Loading**: 点击生成后,显示加载动画。
|
||||
* **Success**: 获取到新数据。
|
||||
* **IsStale**: 点击重新生成瞬间,将 `isStale` 设为 true。UI 上可以给旧文本加灰色滤镜,直到新数据到来。
|
||||
|
||||
## 4. 实施计划 (Action Plan)
|
||||
|
||||
### Phase 1: 后端数据修正 (Backend Core)
|
||||
- [ ] 修改 `services/report-generator-service/src/worker.rs`。
|
||||
- [ ] 实现 `format_financial_data` 辅助函数。
|
||||
- [ ] 将格式化后的数据注入 Tera Context。
|
||||
- [ ] 验证大模型输出不再包含“幻觉”文本。
|
||||
|
||||
### Phase 2: 后端状态透出 (Backend API)
|
||||
- [ ] 确认 `NewAnalysisResult` 或相关 DTO 是否支持状态字段。
|
||||
- [ ] 在 Worker 开始处理模块时,写入 `PROCESSING` 状态到数据库。
|
||||
- [ ] 确保 API 查询接口能返回 `status` 字段。
|
||||
|
||||
### Phase 3: 前端体验升级 (Frontend UI)
|
||||
- [ ] 修改 `AnalysisContent.tsx`,增加对 `status` 字段的处理。
|
||||
- [ ] 实现“重新生成”时的 UI 遮罩或 Loading 状态,不再单纯依赖 `useQuery` 的缓存。
|
||||
- [ ] 优化 Markdown 渲染区的用户体验。
|
||||
|
||||
## 5. 验收标准 (Acceptance Criteria)
|
||||
|
||||
1. **内容质量**: 市场分析、基本面分析报告中包含具体的财务数字(如营收、利润),且引用正确,不再出现“请提供数据”的字样。
|
||||
2. **流程闭环**: 点击“重新生成”,UI 显示加载状态 -> 后端处理 -> UI 自动刷新为新内容。
|
||||
3. **无闪烁**: 页面不会因为轮询而频繁闪烁,状态切换平滑。
|
||||
|
||||
@ -0,0 +1,225 @@
|
||||
# 架构重构设计文档:引入 Workflow Orchestrator
|
||||
|
||||
## 1. 背景与目标
|
||||
当前系统存在 `api-gateway` 职责过载、业务逻辑分散、状态机隐式且脆弱、前后端状态不同步等核心问题。为了彻底解决这些架构痛点,本设计提出引入 **Workflow Orchestrator Service**,作为系统的“大脑”,负责集中管理业务流程、状态流转与事件协调。
|
||||
|
||||
### 核心目标
|
||||
1. **解耦 (Decoupling)**: 将业务协调逻辑从 `api-gateway` 剥离,Gateway 回归纯粹的流量入口和连接管理职责。
|
||||
2. **状态一致性 (Consistency)**: 建立单一事实来源 (Single Source of Truth),所有业务状态由 Orchestrator 统一维护并广播。
|
||||
3. **细粒度任务编排 (Fine-Grained Orchestration)**: 废除粗粒度的“阶段”概念,转向基于 DAG (有向无环图) 的任务编排。后端只负责执行任务和广播每个任务的状态,前端根据任务状态自由决定呈现逻辑。
|
||||
|
||||
## 2. 架构全景图 (Architecture Overview)
|
||||
|
||||
### 2.1 服务角色重定义
|
||||
|
||||
| 服务 | 现有职责 | **新职责** |
|
||||
| :--- | :--- | :--- |
|
||||
| **API Gateway** | 路由, 鉴权, 注册发现, 业务聚合, 流程触发 | 路由, 鉴权, 注册发现, **SSE/WS 代理 (Frontend Proxy)** |
|
||||
| **Workflow Orchestrator** | *(新服务)* | **DAG 调度**, **任务依赖管理**, **事件广播**, **状态快照** |
|
||||
| **Data Providers** | 数据抓取, 存库, 发 NATS 消息 | (保持不变) 接收指令 -> 干活 -> 发结果事件 |
|
||||
| **Report Generator** | 报告生成, 发 NATS 消息 | (保持不变) 接收指令 -> 干活 -> 发进度/结果事件 |
|
||||
| **Data Processors** | *(新服务类型)* | **数据清洗/转换** (接收上下文 -> 转换 -> 更新上下文) |
|
||||
|
||||
### 2.2 数据流向 (Data Flow)
|
||||
|
||||
1. **启动**: 前端 -> Gateway (`POST /start`) -> **Orchestrator** (NATS: `StartWorkflow`)
|
||||
2. **调度**: **Orchestrator** 解析模板构建 DAG -> NATS: 触发无依赖的 Tasks (如 Data Fetching)
|
||||
3. **反馈**: Executors (Providers/ReportGen/Processors) -> NATS: `TaskCompleted` -> **Orchestrator**
|
||||
4. **流转**: **Orchestrator** 检查依赖 -> NATS: 触发下一层 Tasks
|
||||
5. **广播**: **Orchestrator** -> NATS: `WorkflowEvent` (Task Status Updates) -> Gateway -> 前端 (SSE)
|
||||
|
||||
## 3. 接口与协议定义 (Contracts & Schemas)
|
||||
|
||||
需在 `services/common-contracts` 中进行以下调整:
|
||||
|
||||
### 3.1 新增 Commands (NATS Subject: `workflow.commands.*`)
|
||||
|
||||
```rust
|
||||
// Topic: workflow.commands.start
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct StartWorkflowCommand {
|
||||
pub request_id: Uuid,
|
||||
pub symbol: CanonicalSymbol,
|
||||
pub market: String,
|
||||
pub template_id: String,
|
||||
}
|
||||
|
||||
// 新增:用于手动请求状态对齐 (Reconnect Scenario)
|
||||
// Topic: workflow.commands.sync_state
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct SyncStateCommand {
|
||||
pub request_id: Uuid,
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 新增 Events (NATS Subject: `events.workflow.{request_id}`)
|
||||
|
||||
这是前端唯一需要订阅的流。
|
||||
|
||||
```rust
|
||||
// Topic: events.workflow.{request_id}
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "type", content = "payload")]
|
||||
pub enum WorkflowEvent {
|
||||
// 1. 流程初始化 (携带完整的任务依赖图)
|
||||
WorkflowStarted {
|
||||
timestamp: i64,
|
||||
// 定义所有任务及其依赖关系,前端可据此绘制流程图或进度条
|
||||
task_graph: WorkflowDag
|
||||
},
|
||||
|
||||
// 2. 任务状态变更 (核心事件)
|
||||
TaskStateChanged {
|
||||
task_id: String, // e.g., "fetch:tushare", "process:clean_financials", "module:swot_analysis"
|
||||
task_type: TaskType, // DataFetch | DataProcessing | Analysis
|
||||
status: TaskStatus, // Pending, Scheduled, Running, Completed, Failed, Skipped
|
||||
message: Option<String>,
|
||||
timestamp: i64
|
||||
},
|
||||
|
||||
// 3. 任务流式输出 (用于 LLM 打字机效果)
|
||||
TaskStreamUpdate {
|
||||
task_id: String,
|
||||
content_delta: String,
|
||||
index: u32
|
||||
},
|
||||
|
||||
// 4. 流程整体结束
|
||||
WorkflowCompleted {
|
||||
result_summary: serde_json::Value,
|
||||
end_timestamp: i64
|
||||
},
|
||||
|
||||
WorkflowFailed {
|
||||
reason: String,
|
||||
is_fatal: bool,
|
||||
end_timestamp: i64
|
||||
},
|
||||
|
||||
// 5. 状态快照 (用于重连/丢包恢复)
|
||||
// 当前端重连或显式发送 SyncStateCommand 时,Orchestrator 发送此事件
|
||||
WorkflowStateSnapshot {
|
||||
timestamp: i64,
|
||||
task_graph: WorkflowDag,
|
||||
tasks_status: HashMap<String, TaskStatus>, // 当前所有任务的最新状态
|
||||
tasks_output: HashMap<String, Option<String>> // (可选) 已完成任务的关键输出摘要
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct WorkflowDag {
|
||||
pub nodes: Vec<TaskNode>,
|
||||
pub edges: Vec<TaskDependency> // from -> to
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TaskNode {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub type: TaskType,
|
||||
pub initial_status: TaskStatus
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq)]
|
||||
pub enum TaskType {
|
||||
DataFetch, // 创造原始上下文
|
||||
DataProcessing, // 消耗并转换上下文 (New)
|
||||
Analysis // 读取上下文生成新内容
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq)]
|
||||
pub enum TaskStatus {
|
||||
Pending, // 等待依赖
|
||||
Scheduled, // 依赖满足,已下发给 Worker
|
||||
Running, // Worker 正在执行
|
||||
Completed, // 执行成功
|
||||
Failed, // 执行失败
|
||||
Skipped // 因上游失败或策略原因被跳过
|
||||
}
|
||||
```
|
||||
|
||||
### 3.3 调整现有 Messages
|
||||
|
||||
* **`FetchCompanyDataCommand`**: Publisher 变更为 `Workflow Orchestrator`。
|
||||
* **`GenerateReportCommand`**: Publisher 变更为 `Workflow Orchestrator`。
|
||||
|
||||
## 4. Workflow Orchestrator 内部设计
|
||||
|
||||
### 4.1 DAG 调度器 (DAG Scheduler)
|
||||
每个 `request_id` 对应一个 DAG 实例。
|
||||
|
||||
1. **初始化**: 根据 `TemplateID` 读取配置。
|
||||
* 创建 Data Fetch Tasks (作为 DAG 的 Root Nodes)。
|
||||
* 创建 Analysis Module Tasks (根据 `dependencies` 配置连接边)。
|
||||
2. **依赖检查**:
|
||||
* 监听 Task 状态变更。
|
||||
* 当 Task A 变成 `Completed` -> 检查依赖 A 的 Task B。
|
||||
* 如果 Task B 的所有依赖都 `Completed` -> 触发 Task B。
|
||||
* 如果 Task A `Failed` -> 将依赖 A 的 Task B 标记为 `Skipped` (除非有容错策略)。
|
||||
|
||||
### 4.2 状态对齐机制 (State Alignment / Snapshot)
|
||||
为了解决前端刷新或网络丢包导致的状态不一致:
|
||||
|
||||
1. **主动推送快照 (On Connect)**:
|
||||
* Gateway 在前端建立 SSE 连接时,向 Orchestrator 发送 `SyncStateCommand`。
|
||||
* Orchestrator 收到命令后,将当前内存中的完整 DAG 状态打包成 `WorkflowStateSnapshot` 事件发送。
|
||||
2. **前端合并逻辑**:
|
||||
* 前端收到 Snapshot 后,全量替换本地的任务状态树。
|
||||
* 如果 Snapshot 显示某任务 `Running`,前端恢复 Loading 动画。
|
||||
* 如果 Snapshot 显示某任务 `Completed`,前端渲染结果。
|
||||
|
||||
### 4.3 容错策略 (Policy)
|
||||
Orchestrator 需要内置策略来处理非二元结果。
|
||||
* **Data Fetch Policy**: 并非所有 Data Fetch 必须成功。可以配置 "At least one data source" 策略。如果满足策略,Orchestrator 将下游的 Analysis Task 依赖视为满足。
|
||||
|
||||
## 5. 实施步骤 (Implementation Checklist)
|
||||
|
||||
### Phase 1: Contract & Interface
|
||||
- [x] **Update common-contracts**:
|
||||
- [x] Add `StartWorkflowCommand` and `SyncStateCommand`.
|
||||
- [x] Add `WorkflowEvent` enum (incl. Started, StateChanged, StreamUpdate, Completed, Failed, Snapshot).
|
||||
- [x] Add `WorkflowDag`, `TaskNode`, `TaskType`, `TaskStatus` structs.
|
||||
- [x] Update publishers for `FetchCompanyDataCommand` and `GenerateReportCommand`.
|
||||
- [x] Bump version and publish crate.
|
||||
|
||||
### Phase 2: Workflow Orchestrator Service (New)
|
||||
- [x] **Scaffold Service**:
|
||||
- [x] Create new Rust service `services/workflow-orchestrator-service`.
|
||||
- [x] Setup `Dockerfile`, `Cargo.toml`, and `main.rs`.
|
||||
- [x] Implement NATS connection and multi-topic subscription.
|
||||
- [x] **Core Logic - State Machine**:
|
||||
- [x] Implement `WorkflowState` struct (InMemory + Redis/DB persistence optional for MVP).
|
||||
- [x] Implement `DagScheduler`: Logic to parse template and build dependency graph.
|
||||
- [x] **Core Logic - Handlers**:
|
||||
- [x] Handle `StartWorkflowCommand`: Init DAG, fire initial tasks.
|
||||
- [x] Handle `TaskCompleted` events (from Providers/ReportGen): Update DAG, trigger next tasks.
|
||||
- [x] Handle `SyncStateCommand`: Serialize current state and emit `WorkflowStateSnapshot`.
|
||||
- [x] **Policy Engine**:
|
||||
- [x] Implement "At least one provider" policy for data fetching.
|
||||
|
||||
### Phase 3: API Gateway Refactoring
|
||||
- [x] **Remove Legacy Logic**:
|
||||
- [x] Delete `aggregator.rs` completely.
|
||||
- [x] Remove `trigger_data_fetch` aggregation logic.
|
||||
- [x] Remove `/api/tasks` polling endpoint.
|
||||
- [x] **Implement Proxy Logic**:
|
||||
- [x] Add `POST /api/v2/workflow/start` -> Publishes `StartWorkflowCommand`.
|
||||
- [x] Add `GET /api/v2/workflow/events/{id}` -> Subscribes to NATS, sends `SyncStateCommand` on open, proxies events to SSE.
|
||||
|
||||
### Phase 4: Integration & Frontend
|
||||
- [x] **Docker Compose**: Add `workflow-orchestrator-service` to stack.
|
||||
- [x] **Frontend Adapter**:
|
||||
- [x] **Type Definitions**: Define `WorkflowEvent`, `WorkflowDag`, `TaskStatus` in `src/types/workflow.ts`.
|
||||
- [x] **API Proxy**: Implement Next.js Route Handlers for `POST /workflow/start` and `GET /workflow/events/{id}` (SSE).
|
||||
- [x] **Core Logic (`useWorkflow`)**:
|
||||
- [x] Implement SSE connection management with auto-reconnect.
|
||||
- [x] Handle `WorkflowStarted`, `TaskStreamUpdate`, `WorkflowCompleted`.
|
||||
- [x] Implement state restoration via `WorkflowStateSnapshot`.
|
||||
- [x] **UI Components**:
|
||||
- [x] `WorkflowVisualizer`: Task list and status tracking.
|
||||
- [x] `TaskOutputViewer`: Markdown-rendered stream output.
|
||||
- [x] `WorkflowReportLayout`: Integrated analysis page layout.
|
||||
- [x] **Page Integration**: Refactor `app/report/[symbol]/page.tsx` to use the new workflow engine.
|
||||
|
||||
---
|
||||
*Updated: 2025-11-20 - Added Implementation Checklist*
|
||||
@ -0,0 +1,175 @@
|
||||
# 架构修订:基于会话的数据快照与分层存储 (Session-Based Data Snapshotting)
|
||||
|
||||
## 1. 核心理念修订 (Core Philosophy Refinement)
|
||||
|
||||
基于您的反馈,我们修正了架构的核心逻辑,将数据明确划分为两类,并采取不同的存储策略。
|
||||
|
||||
### 1.1 数据分类 (Data Classification)
|
||||
|
||||
1. **客观历史数据 (Objective History / Time-Series)**
|
||||
* **定义**: 股价、成交量、K线图等交易数据。
|
||||
* **特性**: "出现即历史",不可篡改,全球唯一。
|
||||
* **存储策略**: **全局共享存储**。不需要按 Session 隔离,不需要存多份。
|
||||
* **表**: 现有的 `daily_market_data` (TimescaleDB) 保持不变。
|
||||
|
||||
2. **观测型数据 (Observational Data / Fundamentals)**
|
||||
* **定义**: 财务报表、公司简介、以及 Provider 返回的原始非结构化或半结构化信息。
|
||||
* **特性**: 不同来源(Providers)说法不一;可能随时间修正(Restatement);分析依赖于“当时”获取的版本。
|
||||
* **存储策略**: **基于 Session 的快照存储**。每一次 Session 都必须保存一份当时获取的原始数据的完整副本。
|
||||
* **表**: 新增 `session_raw_data` 表。
|
||||
|
||||
### 1.2 解决的问题
|
||||
* **会话隔离**: 新的 Session 拥有自己独立的一套基础面数据,不受历史 Session 干扰,也不污染未来 Session。
|
||||
* **历史回溯**: 即使 Provider 变了,查看历史 Report 时,依然能看到当时是基于什么数据得出的结论。
|
||||
* **数据清洗解耦**: 我们现在只负责“收集并快照”,不负责“清洗和聚合”。复杂的清洗逻辑(WASM/AI)留待后续模块处理。
|
||||
|
||||
---
|
||||
|
||||
## 2. 数据库架构设计 (Schema Design)
|
||||
|
||||
### 2.1 新增:会话原始数据表 (`session_raw_data`)
|
||||
|
||||
这是本次架构调整的核心。我们不再试图把财务数据强行塞进一个全局唯一的标准表,而是忠实记录每个 Provider 在该 Session 中返回的内容。
|
||||
|
||||
```sql
|
||||
CREATE TABLE session_raw_data (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
request_id UUID NOT NULL, -- 关联的 Session ID
|
||||
symbol VARCHAR(32) NOT NULL,
|
||||
provider VARCHAR(64) NOT NULL, -- e.g., 'tushare', 'alphavantage'
|
||||
data_type VARCHAR(32) NOT NULL, -- e.g., 'financial_statements', 'company_profile'
|
||||
|
||||
-- 核心:直接存储 Provider 返回的(或稍微标准化的)完整 JSON
|
||||
data_payload JSONB NOT NULL,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
-- 索引:为了快速查询某次 Session 的数据
|
||||
CONSTRAINT fk_request_id FOREIGN KEY (request_id) REFERENCES requests(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX idx_session_data_req ON session_raw_data(request_id);
|
||||
```
|
||||
|
||||
### 2.2 新增:供应商缓存表 (`provider_response_cache`)
|
||||
|
||||
为了优化性能和节省 API 调用次数,我们在全局层引入缓存。但请注意:**缓存仅作为读取源,不作为 Session 的存储地。**
|
||||
|
||||
```sql
|
||||
CREATE TABLE provider_response_cache (
|
||||
cache_key VARCHAR(255) PRIMARY KEY, -- e.g., "tushare:AAPL:financials"
|
||||
data_payload JSONB NOT NULL,
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
expires_at TIMESTAMPTZ NOT NULL
|
||||
);
|
||||
```
|
||||
|
||||
### 2.3 保持不变:市场数据表 (`daily_market_data`)
|
||||
* 继续使用 TimescaleDB 存储 `open`, `high`, `low`, `close`, `volume`。
|
||||
* 所有 Session 共享读取此表。
|
||||
|
||||
---
|
||||
|
||||
## 3. 数据流转逻辑 (Data Lifecycle)
|
||||
|
||||
### Phase 1: Session 启动与数据获取 (Acquisition)
|
||||
|
||||
1. **Start**: API Gateway 生成 `request_id`。
|
||||
2. **Fetch & Cache Logic (在 Provider Service 中执行)**:
|
||||
* Provider 收到任务 (Symbol: AAPL)。
|
||||
* **Check Cache**: 查询 `provider_response_cache`。
|
||||
* *Hit*: 拿出现成的 JSON。
|
||||
* *Miss*: 调用外部 API,获得 JSON,写入 Cache (设置过期时间如 24h)。
|
||||
3. **Snapshot (关键步骤)**:
|
||||
* Provider 将拿到的 JSON (无论来自 Cache 还是 API),作为一条**新记录**写入 `session_raw_data`。
|
||||
* 字段: `request_id=UUID`, `provider=tushare`, `data=JSON`。
|
||||
|
||||
### Phase 2: 展示与分析 (Consumption)
|
||||
|
||||
1. **Frontend Raw View (UI)**:
|
||||
* 前端调用 `GET /api/v1/session/{request_id}/raw-data`。
|
||||
* 后端 `SELECT * FROM session_raw_data WHERE request_id = ...`。
|
||||
* UI 依然可以使用之前的 Accordion 结构,展示 "Tushare: Financials", "AlphaVantage: Profile"。这就是用户看到的“本次调查的原始底稿”。
|
||||
|
||||
2. **Analysis (LLM)**:
|
||||
* Report Generator 获取 `request_id` 对应的所有 raw data。
|
||||
* 将这些 Raw Data 作为 Context 喂给 LLM。
|
||||
* (未来扩展): 在这一步之前,插入一个 "Data Cleaning Agent/Wasm",读取 raw data,输出 clean data,再喂给 LLM。
|
||||
|
||||
### Phase 3: 归档与清理 (Cleanup)
|
||||
|
||||
* **Session Deletion**: 当我们需要清理某个历史 Session 时,只需 `DELETE FROM session_raw_data WHERE request_id = ...`。
|
||||
* **副作用**: 零。因为 `daily_market_data` 是共享的(留着也没事),而 Session 独享的 `raw_data` 被彻底删除了。
|
||||
|
||||
---
|
||||
|
||||
## 4. 实施路线图 (Implementation Roadmap)
|
||||
|
||||
1. **Database Migration**:
|
||||
* 创建 `session_raw_data` 表。
|
||||
* 创建 `provider_response_cache` 表。
|
||||
* (清理旧表): 废弃 `time_series_financials` 表(原计划用于存标准化的财务指标,现在确认不需要。我们只存 `session_raw_data` 中的原始基本面数据,财务报表由原始数据动态推导)。
|
||||
* **保留** `daily_market_data` 表(存储股价、K线等客观时间序列数据,保持全局共享)。
|
||||
|
||||
2. **Provider Services**:
|
||||
* 引入 Cache 检查逻辑。
|
||||
* 修改输出逻辑:不再尝试 Upsert 全局表,而是 Insert `session_raw_data`。
|
||||
|
||||
3. **Frontend Refactor**:
|
||||
* 修改 `RawDataViewer` 的数据源,从读取“最后一次更新”改为读取“当前 Session 的 Raw Data”。
|
||||
* 这完美解决了“刷新页面看到旧数据”的问题——如果是一个新 Session ID,它的 `session_raw_data` 一开始是空的,UI 就会显示为空/Loading,直到新的 Snapshot 写入。
|
||||
|
||||
4. **Future Extensibility (Aggregation)**:
|
||||
* 当前架构下,Frontend 直接展示 Raw Data。
|
||||
* 未来:新增 `DataProcessorService`。它监听 "Data Fetched" 事件,读取 `session_raw_data`,执行聚合逻辑,将结果写入 `session_clean_data` (假想表),供 UI 显示“完美报表”。
|
||||
|
||||
---
|
||||
|
||||
## 5. Step-by-Step Task List
|
||||
|
||||
### Phase 1: Data Persistence Service & Database (Foundation)
|
||||
- [x] **Task 1.1**: Create new SQL migration file.
|
||||
- Define `session_raw_data` table (Columns: `id`, `request_id`, `symbol`, `provider`, `data_type`, `data_payload`, `created_at`).
|
||||
- Define `provider_response_cache` table (Columns: `cache_key`, `data_payload`, `updated_at`, `expires_at`).
|
||||
- (Optional) Rename `time_series_financials` to `_deprecated_time_series_financials` to prevent accidental usage.
|
||||
- [x] **Task 1.2**: Run SQL migration (`sqlx migrate run`).
|
||||
- [x] **Task 1.3**: Implement `db/session_data.rs` in Data Persistence Service.
|
||||
- Function: `insert_session_data(pool, request_id, provider, data_type, payload)`.
|
||||
- Function: `get_session_data(pool, request_id)`.
|
||||
- [x] **Task 1.4**: Implement `db/provider_cache.rs` in Data Persistence Service.
|
||||
- Function: `get_cache(pool, key) -> Option<Payload>`.
|
||||
- Function: `set_cache(pool, key, payload, ttl)`.
|
||||
- [x] **Task 1.5**: Expose new API endpoints in `api/`.
|
||||
- `POST /api/v1/session-data` (Internal use by Providers).
|
||||
- `GET /api/v1/session-data/:request_id` (Used by ReportGen & Frontend).
|
||||
- `GET/POST /api/v1/provider-cache` (Internal use by Providers).
|
||||
|
||||
### Phase 2: Common Contracts & SDK (Glue Code)
|
||||
- [x] **Task 2.1**: Update `common-contracts`.
|
||||
- Add DTOs for `SessionData` and `CacheEntry`.
|
||||
- Update `PersistenceClient` struct to include methods for calling new endpoints (`save_session_data`, `get_cache`, `set_cache`).
|
||||
|
||||
### Phase 3: Provider Services (Logic Update)
|
||||
- [x] **Task 3.1**: Refactor `tushare-provider-service`.
|
||||
- Update Worker to check Cache first.
|
||||
- On Cache Miss: Call Tushare API -> Save to Cache.
|
||||
- **Final Step**: Post data to `POST /api/v1/session-data` (instead of old batch insert).
|
||||
- Ensure `request_id` is propagated correctly.
|
||||
- [x] **Task 3.2**: Refactor `alphavantage-provider-service` (same logic).
|
||||
- [x] **Task 3.3**: Refactor `yfinance-provider-service` (same logic).
|
||||
- [x] **Task 3.4**: Verify `FinancialsPersistedEvent` is still emitted (or similar event) to trigger Gateway aggregation.
|
||||
|
||||
### Phase 4: API Gateway & Report Generator (Consumption)
|
||||
- [x] **Task 4.1**: Update `api-gateway` routing.
|
||||
- Proxy `GET /api/v1/session-data/:request_id` for Frontend.
|
||||
- [x] **Task 4.2**: Update `report-generator-service`.
|
||||
- In `worker.rs`, change data fetching logic.
|
||||
- Instead of `get_financials_by_symbol`, call `get_session_data(request_id)`.
|
||||
- Pass the raw JSON list to the LLM Context Builder.
|
||||
|
||||
### Phase 5: Frontend (UI Update)
|
||||
- [x] **Task 5.1**: Update `useReportEngine.ts`.
|
||||
- Change polling/fetching logic to request `GET /api/v1/session-data/${requestId}`.
|
||||
- [x] **Task 5.2**: Update `RawDataViewer.tsx`.
|
||||
- Adapt to new data structure (List of `{ provider, data_type, payload }`).
|
||||
- Ensure the UI correctly groups these raw snapshots by Provider.
|
||||
@ -0,0 +1,110 @@
|
||||
# 动态服务注册与发现机制设计方案 (Dynamic Service Registration & Discovery Proposal)
|
||||
|
||||
## 1. 问题陈述 (Problem Statement)
|
||||
目前的 **API Gateway** 依赖于静态配置(环境变量中的 `provider_services` 映射表)来获知可用的数据提供商服务 (Data Provider Services)。
|
||||
* **脆弱性 (Brittleness)**: 增加或迁移 Provider 需要修改 Gateway 配置并重启。
|
||||
* **缺乏健康感知 (Lack of Health Awareness)**: Gateway 会盲目地尝试连接配置的 URL。如果某个服务挂了(但配置还在),请求会遭遇超时或连接错误。
|
||||
* **运维复杂 (Operational Complexity)**: 手动管理 URL 既机械又容易出错。
|
||||
|
||||
## 2. 解决方案:动态注册系统 (Dynamic Registration System)
|
||||
我们将实施**服务注册 (Service Registry)** 模式,由 API Gateway 充当注册中心。
|
||||
|
||||
### 2.1. "注册" 生命周期
|
||||
1. **启动 (Startup)**: 当一个 Provider Service (例如 Tushare) 启动时,它向 API Gateway 发送 `POST /v1/registry/register` 请求。
|
||||
* 载荷包括:服务 ID、基础 URL、能力标识(如 "tushare")。
|
||||
2. **存活心跳 (Liveness/Heartbeat)**: Provider Service 运行一个后台任务,每隔 **N 秒** (建议 **10秒**) 发送一次 `POST /v1/registry/heartbeat`。
|
||||
* **注意**: 由于我们主要在本地容器网络运行,网络开销极低,我们可以使用较短的心跳周期(如 10秒)来实现快速的故障检测。
|
||||
3. **发现 (Discovery)**: API Gateway 在内存中维护活跃服务列表。
|
||||
* 如果超过 **2 * N 秒** (如 20秒) 未收到心跳,该服务将被标记为“不健康”或被移除。
|
||||
4. **关闭 (Shutdown)**: 在优雅退出 (Graceful Shutdown, SIGTERM/SIGINT) 时,Provider 发送 `POST /v1/registry/deregister`。
|
||||
|
||||
### 2.2. 架构变更
|
||||
|
||||
#### A. 共享契约 (`common-contracts`)
|
||||
定义注册所需的数据结构。
|
||||
|
||||
```rust
|
||||
// services/common-contracts/src/registry.rs
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct ServiceRegistration {
|
||||
pub service_id: String, // 唯一ID, 例如 "tushare-provider-1"
|
||||
pub service_name: String, // 类型, 例如 "tushare"
|
||||
pub base_url: String, // 例如 "http://10.0.1.5:8000"
|
||||
pub health_check_url: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct Heartbeat {
|
||||
pub service_id: String,
|
||||
pub status: ServiceStatus, // Active, Degraded
|
||||
}
|
||||
```
|
||||
|
||||
#### B. API Gateway (`api-gateway`)
|
||||
* **新组件**: `ServiceRegistry` (带 TTL 的线程安全 Map)。
|
||||
* **新接口**:
|
||||
* `POST /v1/registry/register`: 添加/更新条目。
|
||||
* `POST /v1/registry/heartbeat`: 刷新 TTL。
|
||||
* `POST /v1/registry/deregister`: 移除条目。
|
||||
* **逻辑变更**: `get_task_progress` 和 `trigger_data_fetch` 将不再读取静态配置,而是查询动态的 `ServiceRegistry`。
|
||||
|
||||
#### C. Provider Services (`*-provider-service`)
|
||||
我们需要一个统一的机制来处理这个生命周期。
|
||||
建议在 `common-contracts` 中引入一个标准的生命周期处理模块。
|
||||
|
||||
**建议的 Trait / 辅助结构体:**
|
||||
|
||||
```rust
|
||||
// services/common-contracts/src/lifecycle.rs (New)
|
||||
|
||||
pub struct ServiceRegistrar {
|
||||
gateway_url: String,
|
||||
registration: ServiceRegistration,
|
||||
// ...
|
||||
}
|
||||
|
||||
impl ServiceRegistrar {
|
||||
/// 注册服务 (重试直到成功)
|
||||
pub async fn register(&self) -> Result<()>;
|
||||
/// 启动后台心跳循环 (10s 间隔)
|
||||
pub async fn start_heartbeat_loop(&self);
|
||||
/// 注销服务
|
||||
pub async fn deregister(&self) -> Result<()>;
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 实施计划 (TODO List)
|
||||
|
||||
### Phase 1: 基础建设 (Infrastructure)
|
||||
* [ ] **Task 1.1 (Contracts)**: 在 `services/common-contracts` 中创建 `registry.rs`,定义 `ServiceRegistration` 和 `Heartbeat` 结构体。
|
||||
* [ ] **Task 1.2 (Library)**: 在 `services/common-contracts` 中实现 `ServiceRegistrar` 逻辑。
|
||||
* 包含重试机制的 `register`。
|
||||
* 包含 `tokio::time::interval` (10s) 的 `start_heartbeat_loop`。
|
||||
* 确保能从环境变量 (如 `API_GATEWAY_URL`) 获取 Gateway 地址。
|
||||
* [ ] **Task 1.3 (Gateway Core)**: 在 `api-gateway` 中实现 `ServiceRegistry` 状态管理(使用 `Arc<RwLock<HashMap<...>>>`)。
|
||||
* [ ] **Task 1.4 (Gateway API)**: 在 `api-gateway` 中添加 `/v1/registry/*` 路由并挂载 Handler。
|
||||
|
||||
### Phase 2: Provider 改造 (Provider Migration)
|
||||
*由于所有 Provider 架构一致,以下步骤需在 `tushare`, `finnhub`, `alphavantage`, `yfinance` 四个服务中重复执行:*
|
||||
|
||||
* [ ] **Task 2.1 (Config)**: 更新 `AppConfig`,增加 `gateway_url` 配置项。
|
||||
* [ ] **Task 2.2 (Main Loop)**: 修改 `main.rs`。
|
||||
* 初始化 `ServiceRegistrar`。
|
||||
* 在 HTTP Server 启动前(或同时)调用 `registrar.register().await`。
|
||||
* 使用 `tokio::spawn` 启动 `registrar.start_heartbeat_loop()`。
|
||||
* [ ] **Task 2.3 (Shutdown)**: 添加 Graceful Shutdown 钩子,确保在收到 Ctrl+C 时调用 `registrar.deregister()`。
|
||||
|
||||
### Phase 3: 消费端适配 (Gateway Consumption)
|
||||
* [ ] **Task 3.1**: 修改 `api-gateway` 的 `test_data_source_config`,不再查 Config,改为查 Registry。
|
||||
* [ ] **Task 3.2**: 修改 `api-gateway` 的 `trigger_data_fetch`,根据 `service_name` (如 "tushare") 从 Registry 查找可用的 `base_url`。
|
||||
* 如果找到多个同名服务,可以做简单的 Load Balance(轮询)。
|
||||
* [ ] **Task 3.3**: 修改 `api-gateway` 的 `get_task_progress`,遍历 Registry 中的所有服务来聚合状态。
|
||||
|
||||
### Phase 4: 清理 (Cleanup)
|
||||
* [ ] **Task 4.1**: 移除 `api-gateway` 中关于 `provider_services` 的静态配置代码和环境变量。
|
||||
|
||||
## 4. 预期收益
|
||||
* **即插即用 (Plug-and-Play)**: 启动一个新的 Provider 实例,它会自动出现在系统中。
|
||||
* **自愈 (Self-Healing)**: 如果 Provider 崩溃,它会从注册表中消失(TTL 过期),Gateway 不会再向其发送请求,避免了无意义的等待和超时。
|
||||
* **零配置 (Zero-Config)**: 扩容或迁移 Provider 时无需修改 Gateway 环境变量。
|
||||
@ -0,0 +1,45 @@
|
||||
# 前端架构重构计划:状态管理与工作流控制权移交
|
||||
|
||||
## 1. 背景与现状
|
||||
当前的 `fundamental-analysis` 前端项目源自一个 POC (Proof of Concept) 原型。在快速迭代过程中,遗留了大量“为了跑通流程而写”的临时逻辑。核心问题在于**前端承担了过多的业务控制逻辑**,导致前后端状态不一致、错误处理困难、用户体验割裂。
|
||||
|
||||
### 核心痛点
|
||||
1. **“自嗨式”状态流转**:前端自行判断何时从“数据获取”切换到“分析报告”阶段(基于轮询结果推断),而非响应后端的明确指令。
|
||||
2. **脆弱的 Polling + SSE 混合模式**:前端先轮询 HTTP 接口查询进度,再断开连接 SSE 流。这两者之间存在状态断层,且严重依赖 HTTP 接口的实时性(而这个接口又是后端实时聚合下游得来的,极易超时)。
|
||||
3. **缺乏统一的状态源 (Source of Truth)**:前端维护了一套复杂的 `ReportState`,后端也有一套状态,两者通过不稳定的网络请求同步,经常出现“前端显示完成,后端还在跑”或“后端报错,前端还在转圈”的情况。
|
||||
|
||||
## 2. 重构目标
|
||||
**原则:前端归前端(UI展示),后端归后端(业务逻辑与流转控制)。**
|
||||
|
||||
1. **控制权移交**:所有涉及业务流程流转(Phase Transition)的逻辑,必须由后端通过事件或状态字段明确驱动。前端只负责渲染当前状态。
|
||||
2. **单一数据流 (Single Stream)**:废除“HTTP Polling -> SSE”的混合模式,建立统一的 WebSocket 或 SSE 通道。从发请求那一刻起,所有状态变更(包括数据获取进度、分析进度、报错)全由服务端推送。
|
||||
3. **简化状态机**:前端 `useReportEngine` 钩子应简化为单纯的“状态订阅者”,不再包含复杂的判断逻辑(如 `if (tasks.every(t => t.success)) switchPhase()`)。
|
||||
|
||||
## 3. 实施方案 (Tasks)
|
||||
|
||||
### Phase 1: 后端基础设施准备 (Backend Readiness)
|
||||
- [ ] **统一事件流接口**:在 `api-gateway` 实现一个统一的 SSE/WebSocket 端点(如 `/v2/workflow/events`)。
|
||||
- 该端点应聚合:`DataFetchProgress` (NATS), `WorkflowStart` (NATS), `ModuleProgress` (ReportGenerator), `WorkflowComplete`。
|
||||
- [ ] **Gateway 状态缓存**:`api-gateway` 需要维护一个轻量级的 Request 状态缓存(Redis 或 内存),不再实时透传查询请求给下游 Provider,而是直接返回缓存的最新状态。
|
||||
- [ ] **定义统一状态协议**:制定前后端通用的状态枚举(`PENDING`, `DATA_FETCHING`, `ANALYZING`, `COMPLETED`, `FAILED`)。
|
||||
|
||||
### Phase 2: 前端逻辑剥离 (Frontend Refactoring)
|
||||
- [ ] **废除 useReportEngine 里的推断逻辑**:删除所有 `useEffect` 里关于状态切换的 `if/else` 判断代码。
|
||||
- [ ] **实现 Event-Driven Hook**:重写 `useReportEngine`,使其核心逻辑变为:连接流 -> 收到事件 -> 更新 State。
|
||||
- 收到 `STATUS_CHANGED: DATA_FETCHING` -> 显示数据加载 UI。
|
||||
- 收到 `STATUS_CHANGED: ANALYZING` -> 自动切换到分析 UI(无需前端判断数据是否齐备)。
|
||||
- 收到 `ERROR` -> 显示错误 UI。
|
||||
- [ ] **清理旧代码**:移除对 `/api/tasks` 轮询的依赖代码。
|
||||
|
||||
### Phase 3: 验证与兜底
|
||||
- [ ] **断线重连机制**:实现 SSE/WS 的自动重连,并能从后端获取“当前快照”来恢复状态,防止刷新页面丢失进度。
|
||||
- [ ] **超时兜底**:仅保留最基本的网络超时提示(如“服务器连接中断”),不再处理业务逻辑超时。
|
||||
|
||||
## 4. 复杂度评估与建议
|
||||
- **复杂度**:中等偏高 (Medium-High)。涉及前后端协议变更和核心 Hook 重写。
|
||||
- **风险**:高。这是系统的心脏部位,重构期间可能会导致整个分析流程暂时不可用。
|
||||
- **建议**:**单独开一个线程(Branch/Session)进行**。不要在当前修复 Bug 的线程中混合进行。这需要系统性的设计和逐步替换,无法通过简单的 Patch 完成。
|
||||
|
||||
---
|
||||
*Created: 2025-11-20*
|
||||
|
||||
@ -0,0 +1,59 @@
|
||||
# 系统日志分析与调试报告 (2025-11-20)
|
||||
|
||||
## 1. 系统现状快照
|
||||
|
||||
基于 `scripts/inspect_logs.sh` 的执行结果,当前系统各服务状态如下:
|
||||
|
||||
| 服务名称 | 状态 | 关键日志/行为 |
|
||||
| :--- | :--- | :--- |
|
||||
| **API Gateway** | 🟢 Running | 成功接收数据获取请求 (`FetchCompanyDataCommand`);成功注册服务;**未观测到**发送 `GenerateReportCommand`。 |
|
||||
| **Data Persistence** | 🟢 Running | 数据库连接正常;成功写入 `session_data` (Source: `yfinance`, `tushare`)。 |
|
||||
| **Report Generator** | 🟢 Running | 已启动并连接 NATS;**无**收到任务的日志;服务似乎在 13:43 重启过。 |
|
||||
| **Alphavantage** | 🟢 Running | 任务执行成功 (Task Completed)。 |
|
||||
| **YFinance** | 🟢 Running | 任务执行成功 (Cache HIT)。 |
|
||||
| **Tushare** | 🟢 Running | 配置轮询正常;有数据写入记录。 |
|
||||
| **Finnhub** | 🟡 Degraded | **配置错误**:`No enabled Finnhub configuration found`,导致服务降级,无法执行任务。 |
|
||||
| **NATS** | 🟢 Running | 正常运行。 |
|
||||
|
||||
## 2. 现象分析
|
||||
|
||||
### 2.1 核心问题:报告生成流程中断
|
||||
用户反馈 "点击后无反应/报错",日志显示:
|
||||
1. **数据获取阶段 (Data Fetch)**:
|
||||
* API Gateway 接收到了数据获取请求 (Req ID: `935e6999...`)。
|
||||
* Alphavantage, YFinance, Tushare 成功响应并写入数据。
|
||||
* **Finnhub 失败/超时**:由于配置缺失,Finnhub Provider 处于降级状态,无法处理请求。
|
||||
* API Gateway 的 Aggregator 显示 `Received 2/4 responses`。它可能在等待所有 Provider 返回,导致整体任务状态卡在 "InProgress"。
|
||||
|
||||
2. **报告生成阶段 (Report Generation)**:
|
||||
* **完全未触发**。`api-gateway` 日志中没有 `Publishing analysis generation command`。
|
||||
* `report-generator-service` 日志中没有 `Received NATS command`。
|
||||
|
||||
### 2.2 根因推断
|
||||
前端 (Frontend) 或 API Gateway 的聚合逻辑可能存在**"全有或全无" (All-or-Nothing)** 的依赖:
|
||||
* 前端通常轮询 `/tasks/{id}`。
|
||||
* 如果 Finnhub 任务从未完成(挂起或失败未上报),聚合状态可能永远不是 "Completed"。
|
||||
* 前端因此卡在进度条,从未发送 `POST /analysis-requests/{symbol}` 来触发下一步的报告生成。
|
||||
|
||||
## 3. 潜在风险与待办
|
||||
|
||||
1. **Finnhub 配置缺失**:导致服务不可用,拖累整体流程。
|
||||
2. **容错性不足**:单个 Provider (Finnhub) 的失败似乎阻塞了整个 Pipeline。我们需要确保 "部分成功" 也能继续后续流程。
|
||||
3. **Report Generator 重启**:日志显示该服务在 13:43 重启。如果此前有请求,可能因 Crash 丢失。需要关注其稳定性。
|
||||
|
||||
## 4. 下一步调试与修复计划
|
||||
|
||||
### Phase 1: 修复阻塞点
|
||||
- [ ] **修复 Finnhub 配置**:检查数据库中的 `data_sources_config`,确保 Finnhub 有效启用且 API Key 正确。
|
||||
- [ ] **验证容错逻辑**:检查 API Gateway 的 `Aggregator` 和 Frontend 的 `useReportEngine`,确保设置超时机制。如果 3/4 成功,1/4 超时,应允许用户继续生成报告。
|
||||
|
||||
### Phase 2: 验证报告生成器
|
||||
- [ ] **手动触发**:使用 Postman 或 `curl` 直接调用 `POST http://localhost:4000/v1/analysis-requests/{symbol}`,绕过前端等待逻辑,验证 Report Generator 是否能正常工作。
|
||||
- [ ] **观察日志**:确认 Report Generator 收到指令并开始流式输出。
|
||||
|
||||
### Phase 3: 增强可观测性
|
||||
- [ ] **完善日志**:Report Generator 的日志偏少,建议增加 "Start processing module X" 等详细步骤日志。
|
||||
|
||||
---
|
||||
*Report generated by AI Assistant.*
|
||||
|
||||
@ -0,0 +1,90 @@
|
||||
# UI Improvement: Parallel Data Provider Status & Error Reporting
|
||||
|
||||
## 1. Problem Statement
|
||||
Currently, the Fundamental Analysis page shows a generic "Fetching Data..." loading state. The detailed status and errors from individual data providers (Tushare, YFinance, AlphaVantage) are aggregated into a single status in the API Gateway.
|
||||
|
||||
This causes two issues:
|
||||
1. **Ambiguity**: Users cannot see which provider is working, finished, or failed.
|
||||
2. **Hidden Errors**: If one provider fails (e.g., database error) but the overall task is still "in progress" (or generic failed), the specific error details are lost or not displayed prominently.
|
||||
|
||||
## 2. Goal
|
||||
Update the API and UI to reflect the parallel nature of data fetching. The UI should display a "control panel" style view where each Data Provider has its own status card, showing:
|
||||
- Provider Name (e.g., "Tushare")
|
||||
- Current Status (Queued, In Progress, Completed, Failed)
|
||||
- Progress Details (e.g., "Fetching data...", "Persisting...", "Error: 500 Internal Server Error")
|
||||
|
||||
## 3. Proposed Changes
|
||||
|
||||
### 3.1 Backend (API Gateway)
|
||||
**Endpoint**: `GET /v1/tasks/{request_id}`
|
||||
|
||||
**Current Behavior**: Returns a single `TaskProgress` object (the first one found).
|
||||
|
||||
**New Behavior**: Returns a list of all tasks associated with the `request_id`.
|
||||
|
||||
**Response Schema Change**:
|
||||
```json
|
||||
// BEFORE
|
||||
{
|
||||
"request_id": "uuid",
|
||||
"task_name": "tushare:600519.SS",
|
||||
"status": "in_progress",
|
||||
...
|
||||
}
|
||||
|
||||
// AFTER
|
||||
[
|
||||
{
|
||||
"request_id": "uuid",
|
||||
"task_name": "tushare:600519.SS",
|
||||
"status": "failed",
|
||||
"details": "Error: 500 ...",
|
||||
...
|
||||
},
|
||||
{
|
||||
"request_id": "uuid",
|
||||
"task_name": "yfinance:600519.SS",
|
||||
"status": "completed",
|
||||
...
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### 3.2 Frontend
|
||||
|
||||
#### Types
|
||||
Update `TaskProgress` handling to support array responses.
|
||||
|
||||
#### Logic (`useReportEngine` & `useTaskProgress`)
|
||||
- **Aggregation Logic**:
|
||||
- The overall "Phase Status" (Fetching vs Complete) depends on *all* provider tasks.
|
||||
- **Fetching**: If *any* task is `queued` or `in_progress`.
|
||||
- **Complete**: When *all* tasks are `completed` or `failed`.
|
||||
- **Error Handling**: Do not fail the whole report if one provider fails. Allow partial success.
|
||||
|
||||
#### UI (`RawDataViewer` & `FinancialTable`)
|
||||
Replace the single loader with a grid layout:
|
||||
|
||||
```tsx
|
||||
// Conceptual Layout
|
||||
<div className="grid grid-cols-3 gap-4">
|
||||
<ProviderStatusCard name="Tushare" task={tushareTask} />
|
||||
<ProviderStatusCard name="YFinance" task={yfinanceTask} />
|
||||
<ProviderStatusCard name="AlphaVantage" task={avTask} />
|
||||
</div>
|
||||
```
|
||||
|
||||
**Card States**:
|
||||
- **Waiting**: Gray / Spinner
|
||||
- **Success**: Green Checkmark + "Data retrieved"
|
||||
- **Error**: Red X + Error Message (expanded or tooltip)
|
||||
|
||||
## 4. Implementation Steps
|
||||
1. **Backend**: Modify `services/api-gateway/src/api.rs` to return `Vec<TaskProgress>`.
|
||||
2. **Frontend**:
|
||||
- Update `TaskProgress` type definition.
|
||||
- Update `useTaskProgress` fetcher.
|
||||
- Update `useReportEngine` polling logic to handle array.
|
||||
- Create `ProviderStatusCard` component.
|
||||
- Update `RawDataViewer` to render the grid.
|
||||
|
||||
@ -0,0 +1,99 @@
|
||||
# 系统生命周期与异常处理分析 (System Lifecycle Analysis)
|
||||
|
||||
## 1. 核心问题 (Core Issue)
|
||||
目前系统的业务逻辑缺乏**确定性 (Determinism)** 和 **闭环 (Closed-loop Lifecycle)**。
|
||||
虽然各个微服务独立运行,但缺乏统一的状态协调机制。当“快乐路径” (Happy Path) 被打断(如DB报错)时,下游服务无法感知上游的失败,导致系统处于“僵尸状态” (Zombie State)。
|
||||
|
||||
> **用户反馈**:“有始必有终...你接了这个任务你就要负责把它结束掉...我们既然是微服务,那这个有始有终,可以说是跟生命性一样重要的一个基本原则。”
|
||||
|
||||
## 2. 现状分析 (Current State Analysis)
|
||||
|
||||
### 2.1 当前的数据流与控制流
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
User->>API Gateway: 1. POST /data-requests
|
||||
API Gateway->>NATS: 2. Pub "data_fetch_commands"
|
||||
|
||||
par Provider Execution
|
||||
NATS->>Provider: 3. Receive Command
|
||||
Provider->>Provider: 4. Fetch External Data
|
||||
Provider-->>DB: 5. Persist Data (Upsert)
|
||||
end
|
||||
|
||||
rect rgb(20, 0, 0)
|
||||
Note right of DB: [CRITICAL FAILURE POINT]
|
||||
DB-->>Provider: 500 Error (Panic)
|
||||
end
|
||||
|
||||
alt Happy Path
|
||||
Provider->>NATS: 6. Pub "events.financials_persisted"
|
||||
NATS->>Report Gen: 7. Trigger Analysis
|
||||
else Failure Path (Current)
|
||||
Provider->>Log: Log Error
|
||||
Provider->>TaskStore: Update Task = Failed
|
||||
Note right of Provider: 链条在此断裂 (Chain Breaks Here)
|
||||
end
|
||||
|
||||
User->>API Gateway: 8. Poll Task Status
|
||||
API Gateway-->>User: All Failed
|
||||
|
||||
User->>User: 9. Frontend Logic: "All Done" -> Switch to Analysis UI
|
||||
User->>API Gateway: 10. Connect SSE (Analysis Stream)
|
||||
Note right of User: Hangs forever (Waiting for Report Gen that never started)
|
||||
```
|
||||
|
||||
### 2.2 存在的具体缺陷
|
||||
|
||||
1. **隐式依赖链 (Implicit Dependency Chain)**:
|
||||
* Report Generator 被动等待 `FinancialsPersistedEvent`。如果 Provider 挂了,事件永远不发,Report Generator 就像一个不知道此时该上班的工人,一直在睡觉。
|
||||
|
||||
2. **缺乏全局协调者 (Lack of Orchestration)**:
|
||||
* API Gateway 把命令发出去就不管了(除了被动提供查询)。
|
||||
* 没有人负责说:“嘿,数据获取全部失败了,取消本次分析任务。”
|
||||
|
||||
3. **前端的状态误判**:
|
||||
* 前端认为 `Failed` 也是一种 `Completed`(终止态),这是对的。但前端错误地假设“只要终止了就可以进行下一步”。
|
||||
* **修正原则**:只有 `Success` 才能驱动下一步。`Failed` 应该导致整个工作流的**熔断 (Circuit Break)**。
|
||||
|
||||
## 3. 改进方案 (Improvement Plan)
|
||||
|
||||
我们需要引入**Rustic**的确定性原则:**如果不能保证成功,就明确地失败。**
|
||||
|
||||
### 3.1 方案一:引入显式的工作流状态 (Explicit Workflow State) - 推荐
|
||||
我们不需要引入沉重的 Workflow Engine (如 Temporal),但在逻辑上必须闭环。
|
||||
|
||||
**后端改进:**
|
||||
1. **修复数据库错误**:这是首要任务。`unexpected null` 必须被修复。
|
||||
2. **事件驱动的失败传播 (Failure Propagation)**:
|
||||
* 如果 Provider 失败,发送 `events.data_fetch_failed`。
|
||||
* Report Generator 或者 API Gateway 监听这个失败事件?
|
||||
* **更好方案**:Report Generator 不需要监听失败。API Gateway 需要聚合状态。
|
||||
|
||||
**前端/交互改进:**
|
||||
1. **熔断机制**:
|
||||
* 在 `useReportEngine` 中,如果所有 Task 都是 `Failed`,**绝对不要**进入 Analysis 阶段。
|
||||
* 直接在界面显示:“数据获取失败,无法生成最新报告。是否查看历史数据?”
|
||||
|
||||
### 3.2 具体的实施步骤 (Action Items)
|
||||
|
||||
#### Phase 1: 修复根本错误 (Fix the Root Cause)
|
||||
* **Task**: 调试并修复 `data-persistence-service` 中的 `500 Internal Server Error`。
|
||||
* 原因推测:数据库 schema 中某列允许 NULL,但 Rust 代码中定义为非 Option 类型;或者反之。
|
||||
* 错误日志:`unexpected null; try decoding as an Option`。
|
||||
|
||||
#### Phase 2: 完善生命周期逻辑 (Lifecycle Logic)
|
||||
* **Task (Frontend)**: 修改 `useReportEngine`。
|
||||
* 逻辑变更:`if (allTasksFailed) { stop(); show_error(); }`
|
||||
* 逻辑变更:`if (partialSuccess) { proceed_with_warning(); }`
|
||||
* **Task (Backend - ReportGen)**: 增加超时机制。
|
||||
* 如果用户连接了 SSE 但长时间没有数据(因为没收到事件),应该发送一个 Timeout 消息给前端,结束连接,而不是无限挂起。
|
||||
|
||||
## 4. 结论
|
||||
目前的“卡在 Analyzing”是因为**上游失败导致下游触发器丢失**,叠加**前端盲目推进流程**导致的。
|
||||
我们必须:
|
||||
1. 修好 DB 错误(让快乐路径通畅)。
|
||||
2. 在前端增加“失败熔断”,不要在没有新数据的情况下假装去分析。
|
||||
|
||||
---
|
||||
*Created: 2025-11-20*
|
||||
|
||||
@ -0,0 +1,110 @@
|
||||
# 系统日志分析与调试操作指南 (System Debugging Guide)
|
||||
|
||||
本文档旨在记录当前系统的运行状况、已知问题以及标准化的调试流程。它将指导开发人员如何利用现有工具(如 Docker、Tilt、自定义脚本)快速定位问题。
|
||||
|
||||
## 1. 系统现状 (System Status Snapshot)
|
||||
|
||||
截至 2025-11-20,Fundamental Analysis 系统由多个微服务组成,采用 Docker Compose 编排,并通过 Tilt 进行开发环境的热重载管理。
|
||||
|
||||
### 1.1 服务概览
|
||||
|
||||
| 服务名称 | 职责 | 当前状态 | 关键依赖 |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **API Gateway** | 流量入口,任务分发,服务发现 | 🟢 正常 | NATS, Providers |
|
||||
| **Report Generator** | 接收指令,调用 LLM 生成报告 | 🟢 正常 (但在等待任务) | NATS, Data Persistence, LLM API |
|
||||
| **Data Persistence** | 数据库读写,配置管理,Session 数据隔离 | 🟢 正常 (已恢复 Seeding) | Postgres |
|
||||
| **Alphavantage** | 美股数据 Provider | 🟢 正常 | NATS, External API |
|
||||
| **YFinance** | 雅虎财经 Provider | 🟢 正常 | NATS, External API |
|
||||
| **Tushare** | A股数据 Provider | 🟢 正常 | NATS, External API |
|
||||
| **Finnhub** | 市场数据 Provider | 🟡 **降级 (Degraded)** | 缺少 API Key 配置 |
|
||||
|
||||
### 1.2 核心问题:报告生成流程阻塞
|
||||
目前用户在前端点击 "生成报告" 后无反应。
|
||||
* **现象**:API Gateway 未收到生成报告的请求,Report Generator 未收到 NATS 消息。
|
||||
* **原因推断**:Finnhub Provider 因配置缺失处于 "Degraded" 状态,导致前端轮询的任务列表 (`GET /tasks/{id}`) 中始终包含未完成/失败的任务。前端逻辑可能因等待所有 Provider 完成而阻塞了后续 "Generate Report" 请求的发送。
|
||||
|
||||
---
|
||||
|
||||
## 2. 运维与开发流程 (DevOps & Workflow)
|
||||
|
||||
我们使用 **Tilt** 管理 Docker Compose 环境。这意味着你不需要手动 `docker-compose up/down` 来应用代码变更。
|
||||
|
||||
### 2.1 启动与更新
|
||||
1. **启动环境**:
|
||||
在项目根目录运行:
|
||||
```bash
|
||||
tilt up
|
||||
```
|
||||
这会启动所有服务,并打开 Tilt UI (通常在 `http://localhost:10350`)。
|
||||
|
||||
2. **代码更新**:
|
||||
* 直接在 IDE 中修改代码并保存。
|
||||
* **Tilt 会自动检测变更**:
|
||||
* 如果是前端代码,Tilt 会触发前端热更新。
|
||||
* 如果是 Rust 服务代码,Tilt 会在容器内或宿主机触发增量编译并重启服务。
|
||||
* **操作建议**:修改代码后,只需**等待一会儿**,观察 Tilt UI 变绿即可。无需手动重启容器。
|
||||
|
||||
3. **配置变更**:
|
||||
* 如果修改了 `docker-compose.yml` 或 `.env`,Tilt 通常也会检测到并重建相关资源。
|
||||
|
||||
### 2.2 快速重置数据库 (如有必要)
|
||||
如果遇到严重的数据不一致或认证问题,可使用以下命令重置数据库(**警告:数据将丢失,但会自动 Seed 默认模板**):
|
||||
```bash
|
||||
docker-compose down postgres-db
|
||||
docker volume rm fundamental_analysis_pgdata
|
||||
docker-compose up -d postgres-db
|
||||
# 等待几秒后
|
||||
# Tilt 会自动重启依赖 DB 的服务,触发 Seeding
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 调试与分析工具 (Debugging Tools)
|
||||
|
||||
为了快速诊断跨服务的问题,我们提供了一个能够聚合查看所有容器最新日志的脚本。
|
||||
|
||||
### 3.1 `inspect_logs.sh` 使用指南
|
||||
|
||||
该脚本位于 `scripts/inspect_logs.sh`。它能一次性输出所有关键服务的最后 N 行日志,避免手动切换容器查看。
|
||||
|
||||
* **基本用法** (默认显示最后 10 行):
|
||||
```bash
|
||||
./scripts/inspect_logs.sh
|
||||
```
|
||||
|
||||
* **指定行数** (例如查看最后 50 行):
|
||||
```bash
|
||||
./scripts/inspect_logs.sh 50
|
||||
```
|
||||
|
||||
### 3.2 分析策略
|
||||
|
||||
当遇到 "点击无反应" 或 "流程卡住" 时,请按以下步骤操作:
|
||||
|
||||
1. **运行脚本**:`./scripts/inspect_logs.sh 20`
|
||||
2. **检查 API Gateway**:
|
||||
* 是否有 `Received data fetch request`? -> 如果无,说明前端没发请求。
|
||||
* 是否有 `Publishing analysis generation command`? -> 如果无,说明 Gateway 没收到生成指令,或者内部逻辑(如等待 Provider)卡住了。
|
||||
3. **检查 Provider**:
|
||||
* 是否有 `Degraded` 或 `Error` 日志?(如当前的 Finnhub 问题)
|
||||
4. **检查 Report Generator**:
|
||||
* 是否有 `Received NATS command`? -> 如果无,说明消息没发过来。
|
||||
|
||||
---
|
||||
|
||||
## 4. 当前待办与修复建议 (Action Items)
|
||||
|
||||
为了打通流程,我们需要解决 Finnhub 导致的阻塞问题。
|
||||
|
||||
1. **修复配置**:
|
||||
* 在 `config/data_sources.yaml` (或数据库 `configs` 表) 中配置有效的 Finnhub API Key。
|
||||
* 或者,暂时在配置中**禁用** Finnhub (`enabled: false`),让前端忽略该 Provider。
|
||||
|
||||
2. **前端容错**:
|
||||
* 检查前端 `useReportEngine.ts`。
|
||||
* 确保即使某个 Provider 失败/超时,用户依然可以强制触发 "Generate Report"。
|
||||
|
||||
3. **验证**:
|
||||
* 使用 `inspect_logs.sh` 确认 Finnhub 不再报错,或已被跳过。
|
||||
* 确认 API Gateway 日志中出现 `Publishing analysis generation command`。
|
||||
|
||||
@ -0,0 +1,144 @@
|
||||
# 测试策略设计文档:基于 Docker 环境的组件测试与 Orchestrator 逻辑验证
|
||||
|
||||
> **文档使用说明**:
|
||||
> 本文档不仅作为测试设计方案,也是测试实施过程中的**Living Document (活文档)**。
|
||||
> 请参阅第 4 节 "执行状态追踪 (Execution Status Tracking)" 了解当前进度、Milestones 和 Pending Tasks。
|
||||
> 在每次完成重要步骤后,请更新此文档的状态部分。
|
||||
|
||||
## 1. 策略概述 (Strategy Overview)
|
||||
|
||||
响应“无 Mock、全真实环境”的要求,结合“Rustic 强类型”设计原则,我们将采用 **混合测试策略 (Hybrid Strategy)**:
|
||||
|
||||
1. **I/O 密集型服务 (Providers & ReportGen)**: 采用 **基于 Docker Compose 的组件集成测试**。
|
||||
* 直接连接真实的 Postgres, NATS 和第三方 API (Alphavantage/LLM)。
|
||||
* 验证“端到端”的功能可用性(Key 是否有效、数据格式是否兼容)。
|
||||
2. **逻辑密集型服务 (Orchestrator)**: 采用 **基于 Trait 的内存测试 (In-Memory Testing)**。
|
||||
* 通过 Trait 抽象外部依赖,使用简单的内存实现 (Fake) 替代真实服务。
|
||||
* 实现毫秒级反馈,覆盖复杂的状态机跳转和边界条件。
|
||||
|
||||
---
|
||||
|
||||
## 2. 实施阶段 (Implementation Phases)
|
||||
|
||||
### Phase 1: 测试基础设施 (Infrastructure)
|
||||
|
||||
* **Docker Environment**: `docker-compose.test.yml`
|
||||
* `postgres-test`: 端口 `5433:5432`
|
||||
* `nats-test`: 端口 `4223:4222`
|
||||
* `persistence-test`: 端口 `3001:3000` (Data Persistence Service 本身也视作基础设施的一部分)
|
||||
* **Abstraction (Refactoring)**:
|
||||
* 在 `workflow-orchestrator-service` 中定义 `WorkflowRepository` 和 `CommandPublisher` traits,用于解耦逻辑测试。
|
||||
|
||||
### Phase 2: 微服务组件测试 (IO-Heavy Services)
|
||||
**执行方式**: 宿主机运行 `cargo test`,环境变量指向 Phase 1 启动的 Docker 端口。
|
||||
|
||||
#### 1. Data Providers (数据源)
|
||||
验证从 API 获取数据并存入系统的能力。
|
||||
* **Alphavantage Provider**: (Key: `alphaventage_key`)
|
||||
* Input: `FetchCompanyDataCommand`
|
||||
* Assert: DB 中存入 SessionData (Profile/Financials),NATS 发出 `FinancialsPersistedEvent`。
|
||||
* **Tushare Provider**: (Key: `tushare_key`)
|
||||
* Input: `FetchCompanyDataCommand` (CN Market)
|
||||
* Assert: 同上。
|
||||
* **Finnhub Provider**: (Key: `finnhub_key`)
|
||||
* Input: `FetchCompanyDataCommand`
|
||||
* Assert: 同上。
|
||||
* **YFinance Provider**: (No Key)
|
||||
* Input: `FetchCompanyDataCommand`
|
||||
* Assert: 同上。
|
||||
|
||||
#### 2. Report Generator (报告生成器)
|
||||
验证从 Persistence 读取数据并调用 LLM 生成报告的能力。
|
||||
* **Key**: `openrouter_key` (Model: `google/gemini-flash-1.5` 或其他低成本模型)
|
||||
* **Pre-condition**: 需要先往 Persistence (localhost:3001) 插入一些伪造的 SessionData (Financials/Price),否则 LLM 上下文为空。
|
||||
* **Input**: `GenerateReportCommand`
|
||||
* **Logic**:
|
||||
1. Service 从 Persistence 读取数据。
|
||||
2. Service 组装 Prompt 调用 OpenRouter API。
|
||||
3. Service 将生成的 Markdown 存回 Persistence。
|
||||
* **Assert**:
|
||||
* NATS 收到 `ReportGeneratedEvent`。
|
||||
* Persistence 中能查到 `analysis_report` 类型的 SessionData,且内容非空。
|
||||
|
||||
### Phase 3: Orchestrator 逻辑测试 (Logic-Heavy)
|
||||
**执行方式**: 纯内存单元测试,无需 Docker。
|
||||
|
||||
* **Refactoring**: 将 Orchestrator 的核心逻辑 `WorkflowEngine` 修改为接受 `Box<dyn WorkflowRepository>` 和 `Box<dyn CommandPublisher>`。
|
||||
* **Test Suite**:
|
||||
* **DAG Construction**: 给定不同 Template ID,验证生成的 DAG 结构(依赖关系)是否正确。
|
||||
* **State Transition**:
|
||||
* Scenario 1: Happy Path (所有 Task 成功 -> Workflow 完成)。
|
||||
* Scenario 2: Dependency Failure (上游失败 -> 下游 Skipped)。
|
||||
* Scenario 3: Resume (模拟服务重启,从 Repository 加载状态并继续)。
|
||||
* **Policy Check**: 验证 "At least one provider" 策略是否生效。
|
||||
|
||||
### Phase 4: 全链路验收测试 (E2E)
|
||||
**执行方式**: `scripts/run_e2e.sh` (Docker + Rust Test Runner)
|
||||
|
||||
* **配置策略**:
|
||||
* 动态注入测试配置 (`setup_test_environment`):
|
||||
* 注册 `simple_test_analysis` 模板。
|
||||
* 配置 LLM Provider (`openrouter`/`new_api`) 使用 `google/gemini-2.5-flash-lite`。
|
||||
* **超时控制**:
|
||||
* SSE 连接监听设置 60秒硬性超时,防止长连接假死。
|
||||
* **Scenarios**:
|
||||
* **Scenario A (Happy Path)**: 使用 `simple_test_analysis` 模板完整运行。
|
||||
* **Scenario B (Recovery)**: 模拟 Orchestrator 重启,验证状态恢复。 (SKIPPED: Requires DB Persistence)
|
||||
* **Scenario C (Partial Failure)**: 模拟非关键 Provider (Tushare) 故障,验证工作流不受影响。
|
||||
* **Scenario D (Invalid Input)**: 使用无效 Symbol,验证错误传播和快速失败。
|
||||
* **Scenario E (Module Failure)**: 模拟 Analysis 模块内部错误(如配置错误),验证工作流终止。
|
||||
* **Status**: ✅ Completed (2025-11-21)
|
||||
|
||||
---
|
||||
|
||||
## 3. 执行计划 (Action Plan)
|
||||
|
||||
1. **Environment**: 创建 `docker-compose.test.yml` 和控制脚本。 ✅
|
||||
2. **Providers Test**: 编写 4 个 Data Provider 的集成测试。 ✅
|
||||
3. **ReportGen Test**: 编写 Report Generator 的集成测试(含数据预埋逻辑)。 ✅
|
||||
4. **Orchestrator Refactor**: 引入 Traits 并编写内存测试。 ✅
|
||||
5. **Final Verification**: 运行全套测试。 ✅
|
||||
|
||||
---
|
||||
|
||||
## 4. 执行状态追踪 (Execution Status Tracking)
|
||||
|
||||
### 当前状态 (Current Status)
|
||||
* **日期**: 2025-11-21
|
||||
* **阶段**: Phase 4 - E2E Testing Completed
|
||||
* **最近活动**:
|
||||
* 修复了测试模板配置错误导致 Scenario A 超时的问题。
|
||||
* 修复了 Orchestrator 错误广播 Analysis 失败导致 Scenario C 误判的问题。
|
||||
* 完整验证了 Scenario A, C, D, E。
|
||||
* 暂时跳过 Scenario B (待持久化层就绪后启用)。
|
||||
|
||||
### 历史记录 (Milestones)
|
||||
| 日期 | 阶段 | 事件/变更 | 状态 |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| 2025-11-20 | Planning | 完成测试策略文档编写,确定混合测试方案。 | ✅ Completed |
|
||||
| 2025-11-20 | Phase 1 | 创建 `docker-compose.test.yml` 和基础设施。 | ✅ Completed |
|
||||
| 2025-11-20 | Phase 2 | 完成 Data Providers 集成测试代码。 | ✅ Completed |
|
||||
| 2025-11-20 | Phase 2 | 完成 Report Generator 集成测试代码。 | ✅ Completed |
|
||||
| 2025-11-20 | Phase 3 | 完成 Orchestrator 重构与内存测试。 | ✅ Completed |
|
||||
| 2025-11-21 | Phase 4 | 修复 SSE 超时问题,增加动态配置注入。 | ✅ Completed |
|
||||
| 2025-11-21 | Phase 4 | 实现并验证异常场景 (Partial Failure, Invalid Input, Module Error)。 | ✅ Completed |
|
||||
|
||||
### 待处理项 (Next Steps)
|
||||
- [ ] **Persistence**: 为 Orchestrator 引入 Postgres 存储,启用 Scenario B。
|
||||
- [ ] **CI Integration**: 将 `run_e2e.sh` 集成到 CI 流水线。
|
||||
|
||||
## 5. 未来展望 (Future Outlook)
|
||||
|
||||
随着系统演进,建议增加以下测试场景:
|
||||
|
||||
1. **Network Resilience (网络分区)**:
|
||||
* 使用 `toxiproxy` 或 Docker Network 操作模拟网络中断。
|
||||
* 验证服务的重试机制 (Retry Policy) 和幂等性。
|
||||
2. **Concurrency & Load (并发与负载)**:
|
||||
* 同时启动 10+ 个工作流,验证 Orchestrator 调度和 Provider 吞吐量。
|
||||
* 验证 Rate Limiting 是否生效(避免被上游 API 封禁)。
|
||||
3. **Long-Running Workflows (长流程)**:
|
||||
* 测试包含数十个步骤、运行时间超过 5 分钟的复杂模板。
|
||||
* 验证 SSE 连接保活和超时处理。
|
||||
4. **Data Integrity (数据一致性)**:
|
||||
* 验证 Fetch -> Persistence -> Report Gen 链路中的数据精度(小数位、时区)。
|
||||
@ -0,0 +1,68 @@
|
||||
# 后端 API 就绪性与接口验证报告
|
||||
|
||||
**日期**: 2025-11-21
|
||||
**状态**: ✅ Backend Ready for Frontend Integration (全链路通过)
|
||||
**作者**: AI Assistant
|
||||
|
||||
## 1. 概述
|
||||
|
||||
本报告总结了对 Fundamental Analysis System 后端进行的全面 API 级端到端测试结果。
|
||||
我们通过 CURL 脚本完全模拟了前端的用户行为(配置加载、工作流触发、SSE 事件监听、数据回读),验证了后端的契约实现和稳定性。
|
||||
|
||||
测试表明,后端核心功能已经就绪,前端可以开始进行对接和调试。所有关键数据源接口(包括此前不稳定的 Profile 获取)均已修复并验证通过。
|
||||
|
||||
## 2. 测试结果摘要
|
||||
|
||||
| 测试项 | 描述 | 结果 | 备注 |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| **System Health** | API Gateway 健康检查 | ✅ PASS | HTTP 200 |
|
||||
| **Configuration** | LLM Providers & Templates 配置读取 | ✅ PASS | 成功加载配置 |
|
||||
| **Workflow Core** | 启动工作流 -> 任务调度 -> 完成 | ✅ PASS | 无超时,无卡死 |
|
||||
| **SSE Streaming** | 实时事件推送 (Started, TaskUpdate, Completed) | ✅ PASS | 前端进度条可正常驱动 |
|
||||
| **LLM Integration** | 提示词组装 -> 调用 OpenRouter -> 生成报告 | ✅ PASS | **已修复 64K Context 限制问题** |
|
||||
| **Data Persistence** | 分析报告 (AnalysisResult) 入库 | ✅ PASS | 最终结果可查 |
|
||||
| **Data Fetching** | 财务数据 (Financials) 入库 | ✅ PASS | 成功拉取并解析数据 |
|
||||
| **Company Profile** | 公司基本信息入库 | ✅ PASS | **已修复并发限流问题** |
|
||||
|
||||
## 3. 关键修复与改进
|
||||
|
||||
在验证过程中,我们发现并修复了以下阻碍性问题:
|
||||
|
||||
1. **Docker 网络与端口暴露**:
|
||||
* 修改 `docker-compose.yml`,暴露 `api-gateway` 的 `4000` 端口。
|
||||
* 修改 `frontend/next.config.mjs`,支持动态配置后端地址 (`API_GATEWAY_URL`)。
|
||||
|
||||
2. **LLM Context 溢出保护**:
|
||||
* 发现 `report-generator-service` 在处理大量财务数据时可能生成超过 LLM 上下文限制的 Prompt。
|
||||
* **修复**: 实施了 **64K 字符硬截断** 策略。如果 Prompt 过长,会自动截断并附加系统警告,确保 LLM 请求永远不会因为 Payload 过大而超时或被拒。
|
||||
|
||||
3. **AlphaVantage 数据源稳定性 (Profile 404 修复)**:
|
||||
* **现象**: 免费版 Key 存在 5次/分钟 的 API 速率限制,并发请求导致 Profile 接口频繁失败。
|
||||
* **修复**: 重构了 `alphavantage-provider-service` 的 Worker 逻辑,将并发请求改为 **串行执行**,并在每个请求间增加了 **2秒强制延迟**。同时引入了显式的错误检查机制("Early Fail"),确保不会静默吞掉 API 错误。验证证实现在可以稳定获取 `CompanyProfile`。
|
||||
|
||||
4. **测试脚本竞态条件**:
|
||||
* 优化了 E2E 测试脚本,解决了 SSE 连接建立与工作流启动之间的微小时序问题,确保能稳定捕获所有事件。
|
||||
|
||||
## 4. 工具与资源
|
||||
|
||||
### 4.1 调试工具 (Baseline Script)
|
||||
我们交付了一个强大的 API 测试脚本,可用作未来的回归测试基准:
|
||||
* 路径: `tests/api-e2e/run_api_test.sh`
|
||||
* 用法: `./tests/api-e2e/run_api_test.sh http://localhost:4000/v1`
|
||||
|
||||
## 5. 下一步 (前端对接指南)
|
||||
|
||||
前端开发环境已准备就绪。您可以直接启动前端进行联调:
|
||||
|
||||
1. **确保后端运行**: `tilt up` 或 `docker-compose up -d`。
|
||||
2. **启动前端**:
|
||||
```bash
|
||||
cd frontend
|
||||
# 指向本地暴露的 4000 端口
|
||||
export API_GATEWAY_URL=http://localhost:4000
|
||||
npm run dev
|
||||
```
|
||||
3. **验证**: 打开浏览器访问 `http://localhost:3000`,尝试输入 "AAPL" 或 "IBM" 进行分析。
|
||||
|
||||
---
|
||||
**结论**: 后端 API 契约稳定,逻辑闭环,数据源集成问题已解决,已完全具备与前端集成的条件。
|
||||
@ -0,0 +1,96 @@
|
||||
# Phase 4: End-to-End (E2E) 测试计划与执行方案
|
||||
|
||||
## 1. 测试目标
|
||||
本次 E2E 测试旨在验证系统在“全链路真实环境”下的行为,涵盖**正常流程**、**异常恢复**及**组件动态插拔**场景。不涉及前端 UI,而是通过模拟 HTTP/SSE 客户端直接与后端交互。
|
||||
|
||||
核心验证点:
|
||||
1. **业务闭环**: 从 `POST /start` 到 SSE 接收 `WorkflowCompleted` 再到最终报告生成。
|
||||
2. **状态一致性**: Orchestrator 重启后,能否通过 `SyncStateCommand` 恢复上下文并继续执行。
|
||||
3. **容错机制**: 当部分 Data Provider 下线时,策略引擎是否按预期工作(如 "At least one provider")。
|
||||
4. **并发稳定性**: 多个 Workflow 同时运行时互不干扰。
|
||||
|
||||
## 2. 测试环境架构
|
||||
测试运行器 (`end-to-end` Rust Crate) 将作为外部观察者和控制器。
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
TestRunner[Rust E2E Runner] -->|HTTP/SSE| Gateway[API Gateway]
|
||||
TestRunner -->|Docker API| Docker[Docker Engine]
|
||||
|
||||
subgraph "Docker Compose Stack"
|
||||
Gateway --> Orchestrator
|
||||
Orchestrator --> NATS
|
||||
NATS --> Providers
|
||||
NATS --> ReportGen
|
||||
Providers --> Postgres
|
||||
end
|
||||
|
||||
Docker -.->|Stop/Start| Orchestrator
|
||||
Docker -.->|Stop/Start| Providers
|
||||
```
|
||||
|
||||
## 3. 详细测试场景 (Scenarios)
|
||||
|
||||
### Scenario A: The Happy Path (基准测试)
|
||||
* **目标**: 验证标准流程无误。
|
||||
* **步骤**:
|
||||
1. 发送 `POST /api/v2/workflow/start` (Symbol: AAPL/000001.SZ)。
|
||||
2. 建立 SSE 连接监听 `events.workflow.{id}`。
|
||||
3. 验证接收到的事件序列:
|
||||
* `WorkflowStarted` (含完整 DAG)
|
||||
* `TaskStateChanged` (Pending -> Running -> Completed)
|
||||
* `TaskStreamUpdate` (Report 内容流式传输)
|
||||
* `WorkflowCompleted`
|
||||
4. **断言**: 最终报告内容非空,数据库中存在 Analysis 记录。
|
||||
|
||||
### Scenario B: Brain Transplant (Orchestrator 宕机恢复)
|
||||
* **目标**: 验证 Orchestrator 的状态持久化与快照恢复能力。
|
||||
* **步骤**:
|
||||
1. 启动 Workflow。
|
||||
2. 等待至少一个 Data Fetch Task 完成 (Receiving `TaskCompleted`)。
|
||||
3. **Action**: `docker stop workflow-orchestrator-service`。
|
||||
4. 等待 5 秒,**Action**: `docker start workflow-orchestrator-service`。
|
||||
5. Test Runner 重新建立 SSE 连接 (自动触发 `SyncStateCommand`)。
|
||||
6. **断言**:
|
||||
* 收到 `WorkflowStateSnapshot` 事件。
|
||||
* 快照中已完成的任务状态保持 `Completed`。
|
||||
* 流程继续向下执行,直到最终完成。
|
||||
|
||||
### Scenario C: Partial Failure (组件拔插)
|
||||
* **目标**: 验证 "At least one provider" 容错策略。
|
||||
* **步骤**:
|
||||
1. **Action**: `docker stop tushare-provider-service` (模拟 Tushare 挂掉)。
|
||||
2. 启动 Workflow (Symbol: 000001.SZ,需涉及 Tushare)。
|
||||
3. **断言**:
|
||||
* Tushare 对应的 Task 状态变为 `Failed` 或 `Skipped`。
|
||||
* 由于还有其他 Provider (或模拟数据),Orchestrator 判定满足 "At least one" 策略。
|
||||
* 下游 Analysis Task **正常启动** (而不是被 Block)。
|
||||
* 流程最终显示 `WorkflowCompleted` (可能带有 Warning)。
|
||||
4. **Cleanup**: `docker start tushare-provider-service`。
|
||||
|
||||
### Scenario D: Network Jitter (网络中断模拟)
|
||||
* **目标**: 验证 Gateway 到 Orchestrator 通讯中断后的恢复。
|
||||
* **步骤**:
|
||||
1. 启动 Workflow。
|
||||
2. Test Runner 主动断开 SSE 连接。
|
||||
3. 等待 10 秒。
|
||||
4. Test Runner 重连 SSE。
|
||||
5. **断言**: 立即收到 `WorkflowStateSnapshot`,且补齐了断连期间产生的状态变更。
|
||||
|
||||
## 4. 工程实现 (Rustic Implementation)
|
||||
新建独立 Rust Crate `tests/end-to-end`,不依赖现有 workspace 的构建配置,独立编译运行。
|
||||
|
||||
**依赖栈**:
|
||||
* `reqwest`: HTTP Client
|
||||
* `eventsource-stream` + `futures`: SSE Handling
|
||||
* `bollard`: Docker Control API
|
||||
* `tokio`: Async Runtime
|
||||
* `anyhow`: Error Handling
|
||||
* `serde`: JSON Parsing
|
||||
|
||||
**执行方式**:
|
||||
```bash
|
||||
# 在 tests/end-to-end 目录下
|
||||
cargo run -- --target-env test
|
||||
```
|
||||
|
||||
@ -0,0 +1,130 @@
|
||||
# 重构任务:动态数据提供商配置架构 (Dynamic Data Provider Configuration)
|
||||
|
||||
## 1. 背景与目标 (Background & Objective)
|
||||
|
||||
目前系统的前端页面 (`DataSourceTab`) 硬编码了支持的数据源列表(Tushare, Finnhub 等)及其配置表单。这导致每增加一个新的数据源,都需要修改前端代码,违反了“单一来源”和“开闭原则”。
|
||||
|
||||
本次重构的目标是实现 **“前端无知 (Frontend Agnostic)”** 的架构:
|
||||
1. **后端驱动**:各 Provider 服务在启动注册时,声明自己的元数据(名称、描述)和配置规范(需要哪些字段)。
|
||||
2. **动态渲染**:前端通过 Gateway 获取所有已注册服务的元数据,动态生成配置表单。
|
||||
3. **通用交互**:测试连接、保存配置等操作通过统一的接口进行,不再针对特定 Provider 编写逻辑。
|
||||
|
||||
## 2. 核心数据结构设计 (Core Data Structures)
|
||||
|
||||
我们在 `common-contracts` 中扩展了服务注册的定义。
|
||||
|
||||
### 2.1 配置字段定义 (Config Field Definition)
|
||||
|
||||
不使用复杂的通用 JSON Schema,而是定义一套符合我们 UI 需求的强类型 Schema。
|
||||
|
||||
```rust
|
||||
/// 字段类型枚举
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub enum FieldType {
|
||||
Text, // 普通文本
|
||||
Password, // 密码/Token (前端掩码显示)
|
||||
Url, // URL 地址
|
||||
Boolean, // 开关
|
||||
Select, // 下拉选框 (需要 options)
|
||||
}
|
||||
|
||||
/// 单个配置字段的定义
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ConfigFieldSchema {
|
||||
pub key: String, // 字段键名 (如 "api_key")
|
||||
pub label: String, // 显示名称 (如 "API Token")
|
||||
pub field_type: FieldType,// 字段类型
|
||||
pub required: bool, // 是否必填
|
||||
pub placeholder: Option<String>, // 占位符
|
||||
pub default_value: Option<String>, // 默认值
|
||||
pub description: Option<String>, // 字段说明
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 服务元数据扩展 (Service Metadata Extension)
|
||||
|
||||
修改 `ServiceRegistration` 并增加了 `ProviderMetadata` 结构。
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct ProviderMetadata {
|
||||
pub id: String, // 服务ID (如 "tushare")
|
||||
pub name_en: String, // 英文名 (如 "Tushare Pro")
|
||||
pub name_cn: String, // 中文名 (如 "Tushare Pro (中国股市)")
|
||||
pub description: String, // 描述
|
||||
pub icon_url: Option<String>, // 图标 (可选)
|
||||
|
||||
/// 该服务需要的配置字段列表
|
||||
pub config_schema: Vec<ConfigFieldSchema>,
|
||||
|
||||
/// 是否支持“测试连接”功能
|
||||
pub supports_test_connection: bool,
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 架构交互流程 (Architecture Flow)
|
||||
|
||||
### 3.1 服务注册 (Registration Phase)
|
||||
1. **Provider 启动** (如 `tushare-provider-service`):
|
||||
* 构建 `ProviderMetadata`,声明自己需要 `api_key` (必填, Password) 和 `api_url` (选填, Url, 默认值...)。
|
||||
* 调用 API Gateway 的 `/register` 接口,将 Metadata 发送给 Gateway。
|
||||
|
||||
2. **API Gateway**:
|
||||
* 在内存 Registry 中存储这些 Metadata。
|
||||
|
||||
### 3.2 前端渲染 (Rendering Phase)
|
||||
1. **前端请求**: `GET /v1/registry/providers` (网关聚合接口)。
|
||||
2. **网关响应**: 返回所有活跃 Provider 的 `ProviderMetadata` 列表。
|
||||
3. **前端请求**: `GET /v1/configs/data_sources` (获取用户已保存的配置值)。
|
||||
4. **UI 生成**:
|
||||
* 前端遍历 Metadata 列表。
|
||||
* 对每个 Provider,根据 `config_schema` 动态生成 Input/Switch 组件。
|
||||
* 将已保存的配置值填充到表单中。
|
||||
|
||||
### 3.3 动作执行 (Action Phase)
|
||||
1. **保存 (Save)**:
|
||||
* 前端收集表单数据,发送标准 JSON 到 `POST /v1/configs/data_sources`。
|
||||
* 后端持久化服务保存数据。
|
||||
|
||||
2. **测试连接 (Test Connection)**:
|
||||
* 前端发送 `POST /v1/configs/test` (注意:Gateway已有通用接口)。
|
||||
* Payload: `{ type: "tushare", data: { "api_key": "..." } }`。
|
||||
* 网关将请求路由到对应的 Provider 微服务进行自我检查。
|
||||
|
||||
## 4. 任务拆解 (Task Breakdown)
|
||||
|
||||
### 4.1 后端开发 (Backend Development) - [已完成/Completed]
|
||||
1. **Common Contracts**: 更新 `registry.rs`,添加 `ProviderMetadata` 和 `ConfigFieldSchema` 定义。 (Done)
|
||||
2. **API Gateway**:
|
||||
* 更新注册逻辑,接收并存储 Metadata。 (Done)
|
||||
* 新增接口 `GET /v1/registry/providers` 供前端获取元数据。 (Done)
|
||||
* **移除旧版接口**: 已删除 Legacy 的静态 schema 接口,强制转向动态机制。 (Done)
|
||||
* **单元测试**: 实现了 `ServiceRegistry` 的单元测试 (`registry_unit_test.rs`),覆盖注册、心跳、发现、注销等核心逻辑。 (Done)
|
||||
3. **Provider Services** (Tushare, Finnhub, etc.):
|
||||
* 实现 `ConfigSchema` 的构建逻辑。 (Done)
|
||||
* 更新注册调用,发送 Schema。 (Done for Tushare, Finnhub, AlphaVantage, YFinance)
|
||||
|
||||
### 4.2 后端验证测试 (Backend Verification) - [已完成/Completed]
|
||||
在开始前端重构前,已进行一次集成测试,确保后端服务启动后能正确交互。
|
||||
|
||||
1. **启动服务**: 使用 `tests/api-e2e/run_registry_test.sh` 启动最小化服务集。
|
||||
2. **API 验证**:
|
||||
* 调用 `GET /v1/registry/providers`,验证返回的 JSON 是否包含所有 Provider 的 Metadata 和 Schema。
|
||||
* 使用 `tests/api-e2e/registry_verifier.py` 脚本验证了 Tushare 的 Schema 字段正确性。
|
||||
3. **结果确认**:
|
||||
* ✅ Tushare 服务成功注册。
|
||||
* ✅ Schema 正确包含 `api_token` (Password, Required)。
|
||||
* ✅ E2E 测试集 (`tests/end-to-end`) 也已更新并验证通过。
|
||||
|
||||
### 4.3 前端重构 (Frontend Refactor) - [待办/Pending]
|
||||
1. **API Client**: 更新 Client SDK 以支持新的元数据接口。
|
||||
* **自动化更新脚本**: `scripts/update_api_spec.sh`
|
||||
* **执行逻辑**:
|
||||
1. 调用 `cargo test ... generate_openapi_json` 生成 `openapi.json`。
|
||||
2. 调用 `npm run gen:api` 重新生成前端 TypeScript 类型定义。
|
||||
* **输出位置**: `frontend/src/api/schema.gen.ts`
|
||||
2. **State Management**: 修改 `useConfig` hook,增加获取 Provider Metadata 的逻辑。
|
||||
3. **UI Refactor**:
|
||||
* 废弃 `supportedProviders` 硬编码。
|
||||
* 创建 `DynamicConfigForm` 组件,根据 `ConfigFieldSchema` 渲染界面。
|
||||
* 对接新的测试连接和保存逻辑。
|
||||
@ -0,0 +1,89 @@
|
||||
# 重构任务:统一 Data Provider 工作流抽象
|
||||
|
||||
## 1. 背景 (Background)
|
||||
|
||||
目前的 Data Provider 服务(Tushare, YFinance, AlphaVantage 等)在架构上存在严重的重复代码和实现不一致问题。每个服务都独立实现了完整的工作流,包括:
|
||||
- NATS 消息接收与反序列化
|
||||
- 缓存检查与写入 (Cache-Aside Pattern)
|
||||
- 任务状态管理 (Observability/Task Progress)
|
||||
- Session Data 持久化
|
||||
- NATS 事件发布 (Success/Failure events)
|
||||
|
||||
这种"散弹式"架构导致了以下问题:
|
||||
1. **Bug 易发且难以统一修复**:例如 Tushare 服务因未执行 NATS Flush 导致事件丢失,而 YFinance 却因为实现方式不同而没有此问题。修复一个 Bug 需要在每个服务中重复操作。
|
||||
2. **逻辑不一致**:不同 Provider 对缓存策略、错误处理、重试机制的实现可能存在细微差异,违背了系统的统一性。
|
||||
3. **维护成本高**:新增一个 Provider 需要复制粘贴大量基础设施代码(Boilerplate),容易出错。
|
||||
|
||||
## 2. 目标 (Objectives)
|
||||
|
||||
贯彻 "Rustic" 的设计理念(强类型、单一来源、早失败),通过控制反转(IoC)和模板方法模式,将**业务逻辑**与**基础设施逻辑**彻底分离。
|
||||
|
||||
- **单一来源 (Single Source of Truth)**:工作流的核心逻辑(缓存、持久化、通知)只在一个地方定义和维护。
|
||||
- **降低耦合**:具体 Provider 只需关注 "如何从 API 获取数据",而无需关心 "如何与系统交互"。
|
||||
- **提升稳定性**:统一修复基础设施层面的问题(如 NATS Flush),所有 Provider 自动受益。
|
||||
|
||||
## 3. 技术方案 (Technical Design)
|
||||
|
||||
### 3.1 核心抽象 (The Trait)
|
||||
|
||||
在 `common-contracts` 中定义纯粹的业务逻辑接口:
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait DataProviderLogic: Send + Sync {
|
||||
/// Provider 的唯一标识符 (e.g., "tushare", "yfinance")
|
||||
fn provider_id(&self) -> &str;
|
||||
|
||||
/// 检查是否支持该市场 (前置检查)
|
||||
fn supports_market(&self, market: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// 核心业务:从外部源获取原始数据并转换为标准 DTO
|
||||
/// 不涉及任何 DB 或 NATS 操作
|
||||
async fn fetch_data(&self, symbol: &str) -> Result<(CompanyProfileDto, Vec<TimeSeriesFinancialDto>), anyhow::Error>;
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 通用工作流引擎 (The Engine)
|
||||
|
||||
实现一个泛型结构体或函数 `StandardFetchWorkflow`,封装所有基础设施逻辑:
|
||||
|
||||
1. **接收指令**:解析 `FetchCompanyDataCommand`。
|
||||
2. **前置检查**:调用 `supports_market`。
|
||||
3. **状态更新**:向 `AppState` 写入 "InProgress"。
|
||||
4. **缓存层**:
|
||||
* 检查 `persistence_client` 缓存。
|
||||
* HIT -> 直接返回。
|
||||
* MISS -> 调用 `fetch_data`,然后写入缓存。
|
||||
5. **持久化层**:将结果写入 `SessionData`。
|
||||
6. **事件通知**:
|
||||
* 构建 `FinancialsPersistedEvent`。
|
||||
* 发布 NATS 消息。
|
||||
* **关键:执行 `flush().await`。**
|
||||
7. **错误处理**:统一捕获错误,发布 `DataFetchFailedEvent`,更新 Task 状态为 Failed。
|
||||
|
||||
## 4. 执行步骤 (Execution Plan)
|
||||
|
||||
1. **基础设施准备**:
|
||||
* 在 `services/common-contracts` 中添加 `DataProviderLogic` trait。
|
||||
* 在 `services/common-contracts` (或新建 `service-kit` 模块) 中实现 `StandardFetchWorkflow`。
|
||||
|
||||
2. **重构 Tushare Service**:
|
||||
* 创建 `TushareFetcher` 实现 `DataProviderLogic`。
|
||||
* 删除 `worker.rs` 中的冗余代码,替换为对 `StandardFetchWorkflow` 的调用。
|
||||
* 验证 NATS Flush 问题是否自然解决。
|
||||
|
||||
3. **重构 YFinance Service**:
|
||||
* 同样方式重构,验证通用性。
|
||||
|
||||
4. **验证**:
|
||||
* 运行 E2E 测试,确保数据获取流程依然通畅。
|
||||
|
||||
## 5. 验收标准 (Acceptance Criteria)
|
||||
|
||||
- `common-contracts` 中包含清晰的 Trait 定义。
|
||||
- Tushare 和 YFinance 的 `worker.rs` 代码量显著减少(预计减少 60%+)。
|
||||
- 所有 Provider 的行为(日志格式、状态更新频率、缓存行为)完全一致。
|
||||
- 即使不手动写 `flush`,重构后的 Provider 也能可靠发送 NATS 消息。
|
||||
|
||||
@ -0,0 +1,97 @@
|
||||
# 设计方案 0: 系统总览与开发指南 (Overview & Setup)
|
||||
|
||||
## 1. 项目背景 (Context)
|
||||
|
||||
本项目是一个 **金融基本面分析系统 (Fundamental Analysis System)**。
|
||||
目标是通过自动化的工作流,从多个数据源(Tushare, YFinance)抓取数据,经过处理,最终生成一份包含估值、风险分析的综合报告。
|
||||
|
||||
本次重构旨在解决原系统调度逻辑僵化、上下文管理混乱、大文件支持差的问题。我们将构建一个基于 **Git 内核** 的分布式上下文管理系统。
|
||||
|
||||
## 2. 系统架构 (Architecture)
|
||||
|
||||
系统由四个核心模块组成:
|
||||
|
||||
1. **VGCS (Virtual Git Context System)**: 底层存储。基于 Git + Blob Store 的版本化文件系统。
|
||||
2. **DocOS (Document Object System)**: 逻辑层。提供“文档树”的裂变、聚合操作,屏蔽底层文件细节。
|
||||
3. **Worker Runtime**: 适配层。提供 SDK 给业务 Worker(Python/Rust),使其能读写上下文。
|
||||
4. **Orchestrator**: 调度层。负责任务分发、依赖管理和并行结果合并。
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Storage Layer (Shared Volume)"
|
||||
Repo[Git Bare Repo]
|
||||
Blob[Blob Store]
|
||||
end
|
||||
|
||||
subgraph "Library Layer (Rust Crate)"
|
||||
VGCS[VGCS Lib] --> Repo & Blob
|
||||
DocOS[DocOS Lib] --> VGCS
|
||||
end
|
||||
|
||||
subgraph "Execution Layer"
|
||||
Orch[Orchestrator Service] --> DocOS
|
||||
Worker[Python/Rust Worker] --> Runtime[Worker Runtime SDK]
|
||||
Runtime --> DocOS
|
||||
end
|
||||
|
||||
Orch -- NATS (RPC) --> Worker
|
||||
```
|
||||
|
||||
## 3. 技术栈规范 (Tech Stack)
|
||||
|
||||
### 3.1 Rust (VGCS, DocOS, Orchestrator)
|
||||
* **Version**: Stable (1.75+)
|
||||
* **Key Crates**:
|
||||
* `git2` (0.18): libgit2 bindings. **注意**: 默认开启 `vendored-openssl` feature 以确保静态链接。
|
||||
* `serde`, `serde_json`: 序列化。
|
||||
* `thiserror`, `anyhow`: 错误处理。
|
||||
* `async-nats` (0.33): 消息队列。
|
||||
* `tokio` (1.0+): 异步运行时。
|
||||
* `sha2`, `hex`: 哈希计算。
|
||||
|
||||
### 3.2 Python (Worker Runtime)
|
||||
* **Version**: 3.10+
|
||||
* **Package Manager**: Poetry
|
||||
* **Key Libs**:
|
||||
* `nats-py`: NATS Client.
|
||||
* `pydantic`: 数据验证。
|
||||
* (可选) `libgit2-python`: 如果需要高性能 Git 操作,否则通过 FFI 调用 Rust Lib 或 Shell Out。
|
||||
|
||||
## 4. 开发环境搭建 (Dev Setup)
|
||||
|
||||
### 4.1 目录准备
|
||||
在本地开发机上,我们需要模拟共享存储。
|
||||
```bash
|
||||
mkdir -p /tmp/workflow_dev/repos
|
||||
mkdir -p /tmp/workflow_dev/blobs
|
||||
export WORKFLOW_DATA_PATH=/tmp/workflow_dev
|
||||
```
|
||||
|
||||
### 4.2 Docker 环境
|
||||
为了确保一致性,所有服务应在 Docker Compose 中运行,挂载同一个 Volume。
|
||||
```yaml
|
||||
volumes:
|
||||
workflow_data:
|
||||
|
||||
services:
|
||||
orchestrator:
|
||||
image: rust-builder
|
||||
volumes:
|
||||
- workflow_data:/mnt/workflow_data
|
||||
environment:
|
||||
- WORKFLOW_DATA_PATH=/mnt/workflow_data
|
||||
```
|
||||
|
||||
## 5. 模块集成方式
|
||||
* **VGCS & DocOS**: 将实现为一个独立的 Rust Workspace Member (`crates/workflow-context`)。
|
||||
* **Orchestrator**: 引用该 Crate。
|
||||
* **Worker (Rust)**: 引用该 Crate。
|
||||
* **Worker (Python)**: 通过 `PyO3` 绑定该 Crate,或者(MVP阶段)重写部分逻辑/Shell调用。**建议 MVP 阶段 Python SDK 仅封装 git binary 调用以简化构建**。
|
||||
|
||||
## 6. 新人上手路径
|
||||
1. 阅读 `design_1_vgcs.md`,理解如何在不 clone 的情况下读写 git object。
|
||||
2. 阅读 `design_2_doc_os.md`,理解“文件变目录”的逻辑。
|
||||
3. 实现 Rust Crate `workflow-context` (包含 VGCS + DocOS)。
|
||||
4. 编写单元测试,验证 File -> Blob 的分流逻辑。
|
||||
5. 集成到 Orchestrator。
|
||||
|
||||
@ -0,0 +1,150 @@
|
||||
# 设计方案 1: VGCS (Virtual Git Context System) [详细设计版]
|
||||
|
||||
## 1. 定位与目标
|
||||
|
||||
VGCS 是底层存储引擎,提供版本化、高性能、支持大文件的分布式文件系统接口。
|
||||
它基于 **git2-rs (libgit2)** 实现,直接操作 Git Object DB。
|
||||
|
||||
## 2. 物理存储规范 (Specification)
|
||||
|
||||
所有服务共享挂载卷 `/mnt/workflow_data`。
|
||||
|
||||
### 2.1 目录结构
|
||||
```text
|
||||
/mnt/workflow_data/
|
||||
├── repos/
|
||||
│ └── {request_id}.git/ <-- Bare Git Repo
|
||||
│ ├── HEAD
|
||||
│ ├── config
|
||||
│ ├── objects/ <-- Standard Git Objects
|
||||
│ └── refs/
|
||||
└── blobs/
|
||||
└── {request_id}/ <-- Blob Store Root
|
||||
├── ab/
|
||||
│ └── ab1234... <-- Raw File (SHA-256 of content)
|
||||
└── cd/
|
||||
└── cd5678...
|
||||
```
|
||||
|
||||
### 2.2 Blob 引用文件格式 (.ref)
|
||||
当文件 > 1MB 时,Git Blob 存储如下 JSON 内容:
|
||||
```json
|
||||
{
|
||||
"$vgcs_ref": "v1",
|
||||
"sha256": "ab1234567890...",
|
||||
"size": 10485760,
|
||||
"mime_type": "application/json",
|
||||
"original_name": "data.json"
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 核心接口定义 (Rust Trait)
|
||||
|
||||
### 3.1 ContextStore Trait
|
||||
|
||||
```rust
|
||||
use anyhow::Result;
|
||||
use std::io::Read;
|
||||
|
||||
pub trait ContextStore {
|
||||
/// 初始化仓库
|
||||
fn init_repo(&self, req_id: &str) -> Result<()>;
|
||||
|
||||
/// 读取文件内容
|
||||
/// 自动逻辑:读取 Git Blob -> 解析 JSON -> 如果是 Ref,读 Blob Store;否则直接返回
|
||||
fn read_file(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Box<dyn Read>>;
|
||||
|
||||
/// 读取目录
|
||||
fn list_dir(&self, req_id: &str, commit_hash: &str, path: &str) -> Result<Vec<DirEntry>>;
|
||||
|
||||
/// 获取变更
|
||||
fn diff(&self, req_id: &str, from_commit: &str, to_commit: &str) -> Result<Vec<FileChange>>;
|
||||
|
||||
/// 三路合并 (In-Memory)
|
||||
/// 返回新生成的 Tree OID,不生成 Commit
|
||||
fn merge_trees(&self, req_id: &str, base: &str, ours: &str, theirs: &str) -> Result<String>;
|
||||
|
||||
/// 创建写事务
|
||||
fn begin_transaction(&self, req_id: &str, base_commit: &str) -> Result<Box<dyn Transaction>>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DirEntry {
|
||||
pub name: String,
|
||||
pub kind: EntryKind, // File | Dir
|
||||
pub object_id: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FileChange {
|
||||
Added(String),
|
||||
Modified(String),
|
||||
Deleted(String),
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 Transaction Trait (写操作)
|
||||
|
||||
```rust
|
||||
pub trait Transaction {
|
||||
/// 写入文件
|
||||
/// 内部逻辑:
|
||||
/// 1. 计算 content SHA-256
|
||||
/// 2. if size > 1MB: 写 Blob Store,构造 Ref JSON
|
||||
/// 3. 写 Git Blob (Raw 或 Ref)
|
||||
/// 4. 更新内存中的 Index/TreeBuilder
|
||||
fn write(&mut self, path: &str, content: &[u8]) -> Result<()>;
|
||||
|
||||
/// 删除文件
|
||||
fn remove(&mut self, path: &str) -> Result<()>;
|
||||
|
||||
/// 提交更改
|
||||
/// 1. Write Tree Object
|
||||
/// 2. Create Commit Object (Parent = base_commit)
|
||||
/// 3. Return new Commit Hash
|
||||
fn commit(self, message: &str, author: &str) -> Result<String>;
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 实现细节规范
|
||||
|
||||
### 4.1 读操作流程 (read_file)
|
||||
1. Open Repo: `/mnt/workflow_data/repos/{req_id}.git`
|
||||
2. Locate Tree: Parse `commit_hash` -> `tree_id`.
|
||||
3. Find Entry: 在 Tree 中查找 `path`.
|
||||
4. Read Blob: 读取 Git Blob 内容。
|
||||
5. **Check Ref**: 尝试解析为 `BlobRef` 结构。
|
||||
* **Success**: 构建 Blob Store 路径 `/mnt/workflow_data/blobs/{req_id}/{sha256[0..2]}/{sha256}`,打开文件流。
|
||||
* **Fail** (说明是普通小文件): 返回 `Cursor::new(blob_content)`.
|
||||
|
||||
### 4.2 写操作流程 (write)
|
||||
1. Check Size: `content.len()`.
|
||||
2. **Large File Path**:
|
||||
* Calc SHA-256.
|
||||
* Write to Blob Store (Ensure parent dir exists).
|
||||
* Content = JSON String of `BlobRef`.
|
||||
3. **Git Write**:
|
||||
* `odb.write(Blob, content)`.
|
||||
* Update in-memory `git2::TreeBuilder`.
|
||||
|
||||
### 4.3 合并流程 (merge_trees)
|
||||
1. Load Trees: `base_tree`, `our_tree`, `their_tree`.
|
||||
2. `repo.merge_trees(base, our, their, opts)`.
|
||||
3. Check Conflicts: `index.has_conflicts()`.
|
||||
* If conflict: Return Error (Complex resolution left to Orchestrator Agent).
|
||||
4. Write Result: `index.write_tree_to(repo)`.
|
||||
5. Return Tree Hash.
|
||||
|
||||
## 5. Web UI & Fuse
|
||||
* **Web**: 基于 Axum,路由 `GET /api/v1/repo/:req_id/tree/:commit/*path`。复用 `ContextStore` 接口。
|
||||
* **Fuse**: 基于 `fuser`。
|
||||
* Mount Point: `/mnt/vgcs_view/{req_id}/{commit}`.
|
||||
* `read()` -> `store.read_file()`.
|
||||
* `readdir()` -> `store.list_dir()`.
|
||||
* 只读挂载,用于 Debug。
|
||||
|
||||
## 6. 依赖库
|
||||
* `git2` (0.18): 核心操作。
|
||||
* `sha2` (0.10): 哈希计算。
|
||||
* `serde_json`: Ref 序列化。
|
||||
* `anyhow`: 错误处理。
|
||||
@ -0,0 +1,100 @@
|
||||
# 设计方案 2: DocOS (Document Object System) [详细设计版]
|
||||
|
||||
## 1. 定位与目标
|
||||
|
||||
DocOS 是构建在 VGCS 之上的逻辑层,负责处理文档结构的演进(裂变、聚合)。
|
||||
它不操作 Git Hash,而是操作逻辑路径。它通过 `ContextStore` 接口与底层交互。
|
||||
|
||||
## 2. 数据结构定义
|
||||
|
||||
### 2.1 逻辑节点 (DocNode)
|
||||
|
||||
这屏蔽了底层是 `File` 还是 `Dir` 的差异。
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum DocNodeKind {
|
||||
Leaf, // 纯内容节点 (对应文件)
|
||||
Composite, // 复合节点 (对应目录,含 index.md)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DocNode {
|
||||
pub name: String,
|
||||
pub path: String, // 逻辑路径 e.g., "Analysis/Revenue"
|
||||
pub kind: DocNodeKind,
|
||||
pub children: Vec<DocNode>, // 仅 Composite 有值
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 核心接口定义 (DocManager Trait)
|
||||
|
||||
```rust
|
||||
pub trait DocManager {
|
||||
/// 基于最新的 Commit 重新加载状态
|
||||
fn reload(&mut self, commit_hash: &str) -> Result<()>;
|
||||
|
||||
/// 获取当前文档树大纲
|
||||
fn get_outline(&self) -> Result<DocNode>;
|
||||
|
||||
/// 读取节点内容
|
||||
/// 逻辑:
|
||||
/// - Leaf: 读 `path`
|
||||
/// - Composite: 读 `path/index.md`
|
||||
fn read_content(&self, path: &str) -> Result<String>;
|
||||
|
||||
// --- 写入操作 (Buffer Action) ---
|
||||
|
||||
/// 写入内容 (Upsert)
|
||||
/// 逻辑:
|
||||
/// - 路径存在且是 Leaf -> 覆盖
|
||||
/// - 路径存在且是 Composite -> 覆盖 index.md
|
||||
/// - 路径不存在 -> 创建 Leaf
|
||||
fn write_content(&mut self, path: &str, content: &str);
|
||||
|
||||
/// 插入子章节 (Implies Promotion)
|
||||
/// 逻辑:
|
||||
/// - 如果 parent 是 Leaf -> 执行 Promote (Rename Leaf->Dir/index.md) -> 创建 Child
|
||||
/// - 如果 parent 是 Composite -> 直接创建 Child
|
||||
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str);
|
||||
|
||||
/// 提交变更
|
||||
fn save(&mut self, message: &str) -> Result<String>;
|
||||
}
|
||||
```
|
||||
## 4. 关键演进逻辑 (Implementation Specs)
|
||||
|
||||
### 4.1 裂变 (Promote Leaf to Composite)
|
||||
假设 `path = "A"` 是 Leaf (对应文件 `A`).
|
||||
Action `insert_subsection("A", "B")`:
|
||||
1. Read content of `A`.
|
||||
2. Delete file `A` (in transaction).
|
||||
3. Write content to `A/index.md`.
|
||||
4. Write new content to `A/B`.
|
||||
*注意*: Git 会将其视为 Delete + Add,或 Rename。VGCS 底层会处理。
|
||||
|
||||
### 4.2 聚合 (Demote Composite to Leaf) - *Optional*
|
||||
假设 `path = "A"` 是 Composite (目录 `A/`).
|
||||
Action `demote("A")`:
|
||||
1. Read `A/index.md`.
|
||||
2. Concatenate children content (Optional policy).
|
||||
3. Delete dir `A/`.
|
||||
4. Write content to file `A`.
|
||||
|
||||
### 4.3 路径规范
|
||||
* **Root**: `/` (对应 Repo Root).
|
||||
* **Meta**: `_meta.json` (用于存储手动排序信息,如果需要).
|
||||
* **Content File**:
|
||||
* Leaf: `Name` (No extension assumption, or `.md` default).
|
||||
* Composite: `Name/index.md`.
|
||||
|
||||
## 5. 实现依赖
|
||||
DocOS 需要持有一个 `Box<dyn Transaction>` 实例。
|
||||
所有的 `write_*` 操作都只是在调用 Transaction 的 `write`。
|
||||
只有调用 `save()` 时,Transaction 才会 `commit`。
|
||||
|
||||
## 6. 错误处理
|
||||
* `PathNotFound`: 读不存在的路径。
|
||||
* `PathCollision`: 尝试创建已存在的文件。
|
||||
* `InvalidOperation`: 尝试在 Leaf 节点下创建子节点(需显式调用 Promote 或 insert_subsection)。
|
||||
|
||||
@ -0,0 +1,181 @@
|
||||
# 设计方案 3: Worker Runtime (Context Shell & Utilities)
|
||||
|
||||
## 1. 定位与目标
|
||||
|
||||
Worker Runtime 是连接底层文档系统 (DocOS) 与上层业务逻辑 (LLM Worker) 的桥梁。
|
||||
如果说 VGCS 是硬盘,DocOS 是文件系统,那么 Worker Runtime 就是 **Shell (命令行工具集)**。
|
||||
|
||||
它的核心任务是:**高效地检索、过滤、组装上下文,为大模型准备输入 (Prompt Context)。**
|
||||
|
||||
## 2. 核心设计:Context Shell
|
||||
|
||||
我们借鉴 Linux Coreutils 及现代 Rust CLI 工具(如 `fd`, `ripgrep`, `exa`)的理念,提供一组高效、强类型、支持结构化输出的原语。
|
||||
|
||||
### 2.1 接口定义 (Rust Trait)
|
||||
|
||||
```rust
|
||||
pub enum OutputFormat {
|
||||
Text, // 人类/LLM 可读的文本 (如 ASCII Tree)
|
||||
Json, // 程序可读的结构化数据 (类似 jq 输入)
|
||||
}
|
||||
|
||||
pub trait ContextShell {
|
||||
/// [tree]: 生成目录树视图
|
||||
/// depth: 递归深度
|
||||
/// format: Text (ASCII Tree) | Json (Nested Objects)
|
||||
fn tree(&self, path: &str, depth: Option<usize>, format: OutputFormat) -> Result<String>;
|
||||
|
||||
/// [find]: 基于元数据的快速查找 (不读取内容)
|
||||
/// name_pattern: Glob (如 "*.rs")
|
||||
/// min_size/max_size: 大小过滤
|
||||
fn find(&self, name_pattern: &str, options: FindOptions) -> Result<Vec<NodeMetadata>>;
|
||||
|
||||
/// [grep]: 全文检索 (读取内容)
|
||||
/// pattern: Regex
|
||||
/// paths: 限制搜索的文件列表 (通常由 find 的结果输入)
|
||||
fn grep(&self, pattern: &str, paths: Option<Vec<String>>) -> Result<Vec<GrepMatch>>;
|
||||
|
||||
/// [cat]: 读取并组装内容
|
||||
/// 自动处理 Blob 下载,拼接多个文件,添加 header
|
||||
fn cat(&self, paths: &[&str]) -> Result<String>;
|
||||
|
||||
/// [wc]: 统计元数据 (行数, 字节数)
|
||||
fn wc(&self, paths: &[&str]) -> Result<Vec<FileStats>>;
|
||||
|
||||
/// [patch]: 局部修补 (新增)
|
||||
/// 基于精确文本匹配的替换,避免全量读写。
|
||||
/// original: 必须在文件中唯一存在的原文片段
|
||||
/// replacement: 替换后的新文本
|
||||
fn patch(&self, path: &str, original: &str, replacement: &str) -> Result<()>;
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 数据结构 (Serializable)
|
||||
|
||||
```rust
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct NodeMetadata {
|
||||
pub path: String,
|
||||
pub kind: NodeKind, // File | Dir
|
||||
pub size: u64,
|
||||
// pub modified: bool, // TODO: Implement diff check against base
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FindOptions {
|
||||
pub recursive: bool,
|
||||
pub max_depth: Option<usize>,
|
||||
pub type_filter: Option<String>, // "File" or "Dir"
|
||||
pub min_size: Option<u64>,
|
||||
pub max_size: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GrepMatch {
|
||||
pub path: String,
|
||||
pub line_number: usize,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FileStats {
|
||||
pub path: String,
|
||||
pub lines: usize,
|
||||
pub bytes: usize,
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 详细功能设计
|
||||
|
||||
### 3.1 tree (Structure Awareness)
|
||||
* **场景**: LLM 需要先看一眼 "Map" 才能知道去哪找宝藏。
|
||||
* **Text Mode**: 生成经典的 ASCII Tree,直接作为 Prompt 的一部分。
|
||||
* **Json Mode**: 嵌套的 JSON 对象,供代码逻辑分析结构。
|
||||
|
||||
### 3.2 find (Fast Filter)
|
||||
* **场景**: 效率优化。不要一开始就 `grep` 全文。
|
||||
* **原理**: 只遍历 Git Tree 对象(元数据),**不解压 Blob**。速度极快。
|
||||
|
||||
### 3.3 grep (Content Search)
|
||||
* **场景**: RAG (Retrieval) 环节。
|
||||
* **优化**: 接受 `find` 的输出作为 `paths` 参数,避免全盘扫描。
|
||||
* **并行**: 利用 Rayon 并行读取 Blob 并匹配。
|
||||
|
||||
### 3.4 cat (Assemble)
|
||||
* **场景**: 组装 Prompt Context。
|
||||
* **格式**: 使用 XML tags 包裹,明确文件边界。
|
||||
|
||||
### 3.5 patch (Atomic Update)
|
||||
* **场景**: 修正笔误或更新局部数据。
|
||||
* **逻辑**:
|
||||
1. 读取文件内容。
|
||||
2. 查找 `original` 字符串。
|
||||
3. **Fail Fast**: 如果找不到,或者找到多处(歧义),直接报错,防止改错位置。
|
||||
4. 执行替换并在内存中生成新 Blob,更新 Index。
|
||||
* **优势**: 相比 `write_file`,它不需要 Worker 回传整个文件内容,节省网络传输和 Token。
|
||||
|
||||
## 4. Tool Definition Schema (AI Configuration)
|
||||
|
||||
为了让 LLM 能够直接使用这些工具,Runtime 将提供一个方法导出标准的 JSON Schema (OpenAI Compatible)。
|
||||
|
||||
```rust
|
||||
impl WorkerContext {
|
||||
pub fn get_tool_definitions() -> serde_json::Value {
|
||||
serde_json::json!([
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "tree",
|
||||
"description": "List directory structure. Use this first to understand the file layout.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string", "description": "Root path to list (default: root)" },
|
||||
"depth": { "type": "integer", "description": "Recursion depth" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "patch",
|
||||
"description": "Replace a specific text block in a file. Use this for small corrections.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["path", "original", "replacement"],
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"original": { "type": "string", "description": "Exact text to look for. Must be unique in file." },
|
||||
"replacement": { "type": "string", "description": "New text to insert." }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// ... defined for grep, cat, find, etc.
|
||||
])
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 高效工作流示例 (The Funnel Pattern)
|
||||
|
||||
采用了类似 "漏斗" 的筛选机制,层层递进,效率最高。
|
||||
|
||||
```rust
|
||||
// 1. 全局感知 (Tree)
|
||||
let structure = ctx.tree("/", Some(2), OutputFormat::Text)?;
|
||||
|
||||
// 2. 快速定位 (Find)
|
||||
let json_files = ctx.find("**/*.json", FindOptions::default())?;
|
||||
|
||||
// 3. 精确检索 (Grep)
|
||||
let matches = ctx.grep("NetProfit", Some(json_files.map(|f| f.path)))?;
|
||||
|
||||
// 4. 局部修正 (Patch)
|
||||
// 假设我们在 matches 中发现了一个拼写错误 "NetProft"
|
||||
ctx.patch("data/financials.json", "\"NetProft\":", "\"NetProfit\":")?;
|
||||
|
||||
// 5. 组装阅读 (Cat)
|
||||
let context = ctx.cat(&distinct_files)?;
|
||||
```
|
||||
@ -0,0 +1,117 @@
|
||||
# 设计方案 4: Workflow Orchestrator (调度器) [详细设计版]
|
||||
|
||||
## 1. 定位与目标
|
||||
|
||||
Orchestrator 负责 DAG 调度、RPC 分发和冲突合并。它使用 Rust 实现。
|
||||
|
||||
**核心原则:**
|
||||
* **Git as Source of Truth**: 所有的任务产出(数据、报告)均存储在 VGCS (Git) 中。
|
||||
* **数据库轻量化**: 数据库仅用于存储系统配置、TimeSeries 缓存以及 Request ID 与 Git Commit Hash 的映射。Workflow 执行过程中不依赖数据库进行状态流转。
|
||||
* **Context 隔离**: 每次 Workflow 启动均为全新的 Context(或基于特定 Snapshot),无全局共享 Context。
|
||||
|
||||
## 2. 调度逻辑规范
|
||||
|
||||
### 2.1 RPC Subject 命名
|
||||
基于 NATS。
|
||||
* **Command Topic**: `workflow.cmd.{routing_key}`
|
||||
* e.g., `workflow.cmd.provider.tushare`
|
||||
* e.g., `workflow.cmd.analysis.report`
|
||||
* **Event Topic**: `workflow.evt.task_completed` (统一监听)
|
||||
|
||||
### 2.2 Command Payload Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"request_id": "uuid-v4",
|
||||
"task_id": "fetch_tushare",
|
||||
"routing_key": "provider.tushare",
|
||||
"config": {
|
||||
"market": "CN",
|
||||
"years": 5
|
||||
// Provider Specific Config
|
||||
},
|
||||
"context": {
|
||||
"base_commit": "a1b2c3d4...", // Empty for initial task
|
||||
"mount_path": "/Raw Data/Tushare"
|
||||
// 指示 Worker 建议把结果挂在哪里
|
||||
},
|
||||
"storage": {
|
||||
"root_path": "/mnt/workflow_data"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2.3 Event Payload Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"request_id": "uuid-v4",
|
||||
"task_id": "fetch_tushare",
|
||||
"status": "Completed",
|
||||
"result": {
|
||||
"new_commit": "e5f6g7h8...",
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 3. 合并策略 (Merge Strategy) 实现
|
||||
|
||||
### 3.1 串行合并 (Fast-Forward)
|
||||
DAG: A -> B
|
||||
1. A returns `C1`.
|
||||
2. Orchestrator dispatch B with `base_commit = C1`.
|
||||
|
||||
### 3.2 并行合并 (Three-Way Merge)
|
||||
DAG: A -> B, A -> C. (B, C parallel)
|
||||
1. Dispatch B with `base_commit = C1`.
|
||||
2. Dispatch C with `base_commit = C1`.
|
||||
3. B returns `C2`. C returns `C3`.
|
||||
4. **Merge Action**:
|
||||
* Wait for BOTH B and C to complete.
|
||||
* Call `VGCS.merge_trees(base=C1, ours=C2, theirs=C3)`.
|
||||
* Result: `TreeHash T4`.
|
||||
* Create Commit `C4` (Parents: C2, C3).
|
||||
5. Dispatch D (dependent on B, C) with `base_commit = C4`.
|
||||
|
||||
### 3.3 冲突处理
|
||||
如果 `VGCS.merge_trees` 返回 Error (Conflict):
|
||||
1. Orchestrator 捕获错误。
|
||||
2. 标记 Workflow 状态为 `Conflict`.
|
||||
3. (Future) 触发 `ConflictResolver` Agent,传入 C1, C2, C3。Agent 生成 C4。
|
||||
|
||||
## 4. 状态机重构
|
||||
废弃旧的 `WorkflowStateMachine` 中关于 TaskType 的判断。
|
||||
引入 `CommitTracker`:
|
||||
```rust
|
||||
struct CommitTracker {
|
||||
// 记录每个任务产出的 Commit
|
||||
task_commits: HashMap<TaskId, String>,
|
||||
// 记录当前主分支的 Commit (Latest Merged)
|
||||
head_commit: String,
|
||||
}
|
||||
```
|
||||
|
||||
## 5. 执行计划
|
||||
1. **Contract**: 定义 Rust Structs for RPC Payloads.
|
||||
2. **NATS**: 实现 Publisher/Subscriber。
|
||||
3. **Engine**: 实现 Merge Loop。
|
||||
|
||||
## 6. 数据持久化与缓存策略 (Persistence & Caching)
|
||||
|
||||
### 6.1 数据库角色 (Database Role)
|
||||
数据库不再作为业务数据的“主存储”,其角色转变为:
|
||||
1. **Configuration**: 存储系统运行所需的配置信息。
|
||||
2. **Cache (Hot Data)**: 缓存 Data Provider 抓取的原始数据 (Time-series),避免重复调用外部 API。
|
||||
3. **Index**: 存储 `request_id` -> `final_commit_hash` 的映射,作为系统快照的索引。
|
||||
|
||||
### 6.2 Provider 行为模式
|
||||
Provider 在接收到 Workflow Command 时:
|
||||
1. **Check Cache**: 检查本地 DB/Cache 是否有有效数据。
|
||||
2. **Fetch (If miss)**: 如果缓存未命中,调用外部 API 获取数据并更新缓存。
|
||||
3. **Inject to Context**: 将数据写入当前的 VGCS Context (via `WorkerRuntime`),生成新的 Commit。
|
||||
* *注意*: Provider 不直接将此次 Workflow 的结果“存”回数据库的业务表,数据库仅作 Cache 用。
|
||||
|
||||
### 6.3 Orchestrator 行为
|
||||
Orchestrator 仅负责追踪 Commit Hash 的演变。
|
||||
Workflow 结束时,Orchestrator 将最终的 `Head Commit Hash` 关联到 `Request ID` 并持久化(即“Snapshot 落盘”)。
|
||||
@ -0,0 +1,171 @@
|
||||
# 设计方案: Deep Research Service (Reactive & Git-Native)
|
||||
|
||||
## 1. 定位与核心理念 (Overview)
|
||||
|
||||
**Deep Research** 是一个独立的、正交的数据降维与提炼服务。它的核心任务解决 "Context Overflow" 问题——当业务数据量超过大模型上下文窗口,或数据复杂度超过单次 Prompt 处理能力时,通过**自主智能体 (Autonomous Agent)** 进行迭代式的分析、摘要和结构化提取。
|
||||
|
||||
### 1.1 核心原则
|
||||
1. **独立性 (Independence)**: 作为一个独立的微服务 (`deep-research-service`) 运行,拥有独立的资源配额和生命周期。
|
||||
2. **Git 原生 (Git-Native)**: 利用底层的 VGCS (Virtual Git Context System) 的分支 (Branching) 和合并 (Merging) 机制,实现原始数据与研究成果的**并存**。
|
||||
3. **响应式闭环 (Reactive Loop)**: 摒弃线性的 Map-Reduce,采用 "Plan -> Act -> Critic" 的动态循环,根据当前发现的信息实时调整研究目标。
|
||||
4. **安全护栏 (Guardrails)**: 引入熵减检查、语义收敛检测和硬性预算约束,防止智能体陷入死循环或发散。
|
||||
|
||||
---
|
||||
|
||||
## 2. 系统架构 (Architecture)
|
||||
|
||||
### 2.1 交互流程图
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "Orchestration Layer"
|
||||
Orch[Orchestrator Service]
|
||||
DAG[DAG Scheduler]
|
||||
end
|
||||
|
||||
subgraph "Storage Layer (VGCS)"
|
||||
Repo[Git Repo]
|
||||
MainBr[Branch: main]
|
||||
DRBr[Branch: feat/deep_research]
|
||||
end
|
||||
|
||||
subgraph "Deep Research Layer"
|
||||
Monitor[Resource Monitor]
|
||||
Reactor[Reactive Engine]
|
||||
Guard[Safety Guardrails]
|
||||
end
|
||||
|
||||
%% Flow
|
||||
Orch -->|1. Task Pending (Check Size)| Monitor
|
||||
Monitor -->|2. Threshold Exceeded| Orch
|
||||
Orch -->|3. Suspend Task & Create Branch| Repo
|
||||
Repo -.-> MainBr
|
||||
MainBr -.-> DRBr
|
||||
|
||||
Orch -->|4. Dispatch Job| Reactor
|
||||
Reactor <-->|5. Read/Write| DRBr
|
||||
Reactor <-->|6. Validate Step| Guard
|
||||
|
||||
Reactor -->|7. Completion| Orch
|
||||
Orch -->|8. Merge/Rebase| Repo
|
||||
DRBr -->|Merge| MainBr
|
||||
Orch -->|9. Resume Task| DAG
|
||||
```
|
||||
|
||||
### 2.2 分支策略 (Branching Strategy)
|
||||
|
||||
我们采用 **"并存 (Co-exist)"** 策略,而非简单的替换。
|
||||
|
||||
* **Main Branch**: 包含原始海量数据 (e.g., 100份 PDF, 10万行 CSV)。
|
||||
* **Research Branch**: 从 Main 分出。Deep Research Agent 在此分支上工作,生成新的提炼文件 (e.g., `research_summary.md`, `knowledge_graph.json`)。
|
||||
* **Merge**: 任务完成后,Research Branch 被 merge 回 Main。
|
||||
* **下游任务视角**: 下游任务可以看到原始数据(如果在 `input` 中显式请求),但默认通过 Context Selector 优先获取 `research_summary.md`。
|
||||
* **User Option**: 用户可以选择 "Re-run from Deep Research",即基于 merge 后的 commit 继续执行;也可以选择 "Raw Run",强制使用原始数据(虽然可能失败)。
|
||||
|
||||
---
|
||||
|
||||
## 3. 核心逻辑: The Reactive Engine
|
||||
|
||||
Deep Research 不仅仅是摘要,而是一个**有状态的探索过程**。
|
||||
|
||||
### 3.1 状态循环 (The Loop)
|
||||
|
||||
Agent 维护一个动态的 `State`:
|
||||
* `Goal Stack`: 当前的研究目标栈(初始由 Prompt 决定,后续动态分裂)。
|
||||
* `Knowledge Base`: 已获取的确信事实。
|
||||
* `Trace Log`: 思考路径。
|
||||
|
||||
**执行步骤**:
|
||||
1. **Observe**: 读取当前分支的数据(或上一轮的输出)。
|
||||
2. **Plan**: 基于当前 Goal 和 Observe 的结果,制定下一步行动(e.g., "搜索关键词 X", "读取文件 Y 的第 10-20 页", "对文件 Z 进行摘要")。
|
||||
3. **Act**: 执行行动,写入中间结果到 Git。
|
||||
4. **Reflect (Critic)**: 检查结果是否满足 Goal?是否发现了新线索需要更新 Goal?
|
||||
|
||||
### 3.2 保驾护航机制 (Guardrails)
|
||||
|
||||
为了防止 Agent "幻觉"、"死循环" 或 "跑题",必须实施严格的监控。
|
||||
|
||||
#### A. 熵减检查 (Entropy/Novelty Check)
|
||||
* **目的**: 防止 Agent 反复咀嚼相同的信息。
|
||||
* **机制**: 计算新生成的 Research Note 与已有 Knowledge Base 的 **Embedding 相似度**。
|
||||
* **策略**: 如果 `Similarity > 0.95`,判定为无效迭代,强制终止当前子路径或回退。
|
||||
|
||||
#### B. 语义距离收敛 (Semantic Convergence)
|
||||
* **目的**: 防止 Agent 跑题(Divergence)。
|
||||
* **机制**: 实时计算 `Current Research Content` 向量与 `Initial User Query` 向量的距离。
|
||||
* **策略**: 距离应随迭代呈**震荡下降**趋势。如果距离显著增大(e.g., 从研究 "Apple 财务" 跑题到 "越南美食"),Supervisor 介入干预。
|
||||
|
||||
#### C. 预算硬约束 (Hard Budgeting)
|
||||
* **Token Cap**: 单次 Research 任务上限(e.g., 200k tokens)。
|
||||
* **Step Cap**: 最大迭代轮数(e.g., 10 loops)。
|
||||
* **Time Cap**: 超时强制终止。
|
||||
|
||||
---
|
||||
|
||||
## 4. 触发与配置 (Triggering & Config)
|
||||
|
||||
### 4.1 自动触发 (Auto-Trigger)
|
||||
* **Pre-flight Check**: Orchestrator 在调度 Analysis Task 前,先调用 `ContextResolver` 获取输入文件的 Metadata。
|
||||
* **Threshold Logic**:
|
||||
```rust
|
||||
let total_tokens = estimate_tokens(file_list);
|
||||
let model_limit = llm_config.context_window; // e.g. 128k
|
||||
|
||||
if total_tokens > (model_limit * 0.8) {
|
||||
trigger_deep_research();
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 配置驱动 (Config-Driven)
|
||||
允许在 `AnalysisTemplate` 中显式控制:
|
||||
|
||||
```json
|
||||
{
|
||||
"modules": {
|
||||
"financial_analysis": {
|
||||
"deep_research": {
|
||||
"strategy": "auto", // auto | always | never
|
||||
"threshold_tokens": 50000,
|
||||
"focus_areas": ["risk", "cash_flow"] // Initial Goals
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4.3 基础设施配置更新
|
||||
|
||||
需要在 `LlmProvidersConfig` 中补充模型能力参数,以便系统知道 "上限" 是多少。
|
||||
|
||||
```rust
|
||||
pub struct ModelConfig {
|
||||
pub model_id: String,
|
||||
pub context_window: u32, // e.g., 128000
|
||||
pub pricing_tier: String, // e.g., "high"
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. 实施路线图 (Roadmap)
|
||||
|
||||
1. **Phase 1: Infrastructure**
|
||||
* 在 `common-contracts` 定义 Deep Research 的 Task 结构和配置。
|
||||
* 更新 `LlmProvidersConfig` 支持 context window 参数。
|
||||
* 在 `Orchestrator` 实现 "Suspend & Branch" 逻辑。
|
||||
|
||||
2. **Phase 2: The Service (MVP)**
|
||||
* 构建 `deep-research-service` (Rust)。
|
||||
* 实现基础的 **Map-Reduce** 逻辑作为 v0.1(先跑通流程)。
|
||||
* 集成 VGCS 分支操作。
|
||||
|
||||
3. **Phase 3: The Reactive Agent**
|
||||
* 引入向量数据库(或内存向量索引)用于 Guardrails。
|
||||
* 实现 Plan-Act-Critic 循环。
|
||||
* 实现熵减和收敛检查。
|
||||
|
||||
4. **Phase 4: Integration**
|
||||
* 全自动触发测试。
|
||||
* 前端可视化:展示 Research 过程的 Trace 和中间产物。
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
# 历史报告功能实施前 Dry Run 报告
|
||||
|
||||
## 概览
|
||||
本报告对“历史报告”功能的实施方案进行了全面的“Dry Run”检查。我们检查了后端服务 (`data-persistence-service`, `api-gateway`, `report-generator-service`) 和前端代码 (`schema.gen.ts`, UI 组件),确认了所有必要的变更点和潜在的遗漏。
|
||||
|
||||
## 检查清单与发现 (Dry Run Findings)
|
||||
|
||||
### 1. 后端: Report Generator Service (Fix Missing Persistence)
|
||||
* **现状**: `run_vgcs_based_generation` 函数在生成报告后,仅提交到了 VGCS (Git),没有调用 `persistence_client` 写入数据库。
|
||||
* **影响**: 数据库中没有记录,导致历史查询接口返回空列表。
|
||||
* **修正动作**:
|
||||
* 在 `worker.rs` 的 `run_vgcs_based_generation` 函数末尾(生成 commit 后),构建 `NewAnalysisResult` 结构体。
|
||||
* 调用 `persistence_client.create_analysis_result`。
|
||||
* 注意:`NewAnalysisResult` 需要 `module_id` 和 `template_id`,目前这些信息在 `command` 中是可选的 (`Option`)。在 `message_consumer.rs` 中解析 `WorkflowTaskCommand` 时,我们已经提取了这些信息并放入了 `GenerateReportCommand`。需要确保传递链路完整。
|
||||
|
||||
### 2. 后端: Data Persistence Service (Query Update)
|
||||
* **现状**:
|
||||
* `AnalysisQuery` 结构体强制要求 `symbol: String`。
|
||||
* DB 查询 `get_analysis_results` 强制 `WHERE symbol = $1`。
|
||||
* **修正动作**:
|
||||
* 修改 `AnalysisQuery`,将 `symbol` 改为 `Option<String>`。
|
||||
* 修改 `get_analysis_results` SQL,使其根据 symbol 是否存在动态构建 `WHERE` 子句(或者使用 `COALESCE` 技巧,但动态构建更清晰)。
|
||||
* 在 SQL 中强制加上 `LIMIT 10`(或者通过参数控制,为了简化本次只做最近10条,建议硬编码默认值或加 `limit` 参数)。
|
||||
|
||||
### 3. 后端: API Gateway (Endpoint Update)
|
||||
* **现状**:
|
||||
* `AnalysisResultQuery` 结构体定义了 `symbol: String`。
|
||||
* `get_analysis_results_by_symbol` 处理器绑定了该 Query。
|
||||
* 没有 `GET /api/v1/analysis-results/:id` 接口。
|
||||
* **修正动作**:
|
||||
* 修改 `AnalysisResultQuery` 为 `symbol: Option<String>`。
|
||||
* 更新 `get_analysis_results` 处理器以适应可选参数。
|
||||
* 新增 `get_analysis_result_by_id` 处理器,代理请求到 persistence service。
|
||||
* **关键遗漏检查**: 确保 `utoipa` 的宏定义 (`#[utoipa::path(...)]`) 也同步更新,否则生成的 OpenAPI 文档不对,前端 schema 就不会更新。
|
||||
|
||||
### 4. 前端: Schema与API客户端
|
||||
* **现状**: `schema.gen.ts` 是自动生成的。
|
||||
* **动作**:
|
||||
* 后端修改完并启动后,运行脚本 `scripts/update_api_spec.sh` (或类似机制) 重新生成 `openapi.json`。
|
||||
* 前端运行 `npm run openapi-ts` 更新 `schema.gen.ts`。
|
||||
* **确认**: `get_analysis_results` 的参数应变为可选,且新增 `get_analysis_result_by_id` 方法。
|
||||
|
||||
### 5. 前端: UI 组件
|
||||
* **Dashboard / Header**:
|
||||
* 需要新增 `RecentReportsDropdown` 组件。
|
||||
* 逻辑:Mount 时调用 `api.get_analysis_results()` (无参),获取列表。
|
||||
* 渲染:下拉列表,点击项使用 `Link` 跳转。
|
||||
* **HistoricalReportPage**:
|
||||
* 新增路由 `/history/:id` 在 `App.tsx`。
|
||||
* 组件逻辑:获取 `id` 参数 -> 调用 `api.get_analysis_result_by_id({ id })` -> 渲染 Markdown。
|
||||
* **复用**: 可以复用 `TaskDetailView` 中的 Markdown 渲染逻辑(样式一致性)。
|
||||
|
||||
## 风险评估与应对
|
||||
* **数据不一致**: 如果 Worker 写入 Git 成功但写入 DB 失败怎么办?
|
||||
* *应对*: 记录 Error 日志。本次暂不引入复杂的分布式事务。DB 缺失仅导致历史列表少一条,不影响核心业务(报告已生成且在 Git 中)。
|
||||
* **Schema 类型不匹配**: 手动修改后端 struct 后,前端生成的 TS 类型可能报错。
|
||||
* *应对*: 严格按照 `common-contracts` 定义 DTO,确保 `utoipa` 宏准确描述 `Option` 类型。
|
||||
|
||||
## 执行结论
|
||||
计划可行。已识别关键遗漏(Worker 持久化缺失)。
|
||||
|
||||
**执行顺序**:
|
||||
1. **Fix Worker Persistence** (最关键,确保新数据能进去)
|
||||
2. **Update Persistence Service** (支持无 symbol 查询)
|
||||
3. **Update API Gateway** (暴露接口)
|
||||
4. **Update OpenAPI & Frontend Client**
|
||||
5. **Implement Frontend UI**
|
||||
|
||||
准备开始执行。
|
||||
|
||||
@ -0,0 +1,137 @@
|
||||
# 全栈一致性与历史回放增强设计 (Unified Architecture Design)
|
||||
|
||||
## 1. 背景与目标
|
||||
|
||||
当前系统存在三个主要割裂点:
|
||||
1. **节点行为不一致**: Analysis 节点输出报告,而部分 Data Provider (YFinance/Mock) 仅抓取数据无可视化输出(虽已部分修复,但缺乏强制规范)。
|
||||
2. **实时与历史割裂**: 实时页面依赖 SSE 推送,历史页面缺乏状态恢复机制,导致无法查看 DAG 结构和执行日志。
|
||||
3. **日志展示分散**: 实时日志是全局流,难以对应到具体任务;历史日志未与 UI 深度集成。
|
||||
|
||||
**本设计旨在将上述问题合并为一个系统性工程,实现以下目标:**
|
||||
* **后端标准化**: 所有节点必须通过统一 Trait 实现,强制产出 Markdown 报告和流式更新。
|
||||
* **前端统一化**: 使用同一套 `ReportPage` 逻辑处理“实时监控”和“历史回放”。
|
||||
* **上下文完整性**: 无论是实时还是历史,都能查看 DAG 状态、节点报告、以及执行日志 (`_execution.md`)。
|
||||
|
||||
---
|
||||
|
||||
## 2. 后端架构升级 (Backend Architecture)
|
||||
|
||||
### 2.1. 统一节点运行时 (`WorkflowNode` Trait)
|
||||
|
||||
引入强制性接口,确保所有 Worker 行为一致。
|
||||
|
||||
* **Trait 定义**:
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait WorkflowNode {
|
||||
async fn execute(&self, ctx: &NodeContext, config: &Value) -> Result<NodeExecutionResult>;
|
||||
fn render_report(&self, result: &NodeExecutionResult) -> Result<String>;
|
||||
}
|
||||
```
|
||||
* **`WorkflowNodeRunner` (Harness)**:
|
||||
* **职责**: 负责 NATS 订阅、VGCS 读写、Git Commit、错误处理。
|
||||
* **增强**:
|
||||
* 自动将 `execute` 产生的日志写入 `_execution.md`。
|
||||
* 自动推送 `TaskStreamUpdate` (包含 Report Markdown)。
|
||||
* 自动推送 `TaskLog` (带 `task_id` 的结构化日志,用于前端分流)。
|
||||
|
||||
### 2.2. 工作流快照持久化 (Snapshot Persistence)
|
||||
|
||||
为了支持历史回放,Orchestrator 必须在工作流结束时保存“案发现场”。
|
||||
|
||||
* **触发时机**: `handle_task_completed` 检测到 Workflow 结束 (Completed/Failed)。
|
||||
* **保存内容**: `WorkflowStateSnapshot` (DAG 结构、每个 Task 的最终 Status、Output Commit Hash)。
|
||||
* **存储位置**: `data-persistence-service` -> `session_data` 表。
|
||||
* `data_type`: "workflow_snapshot"
|
||||
* `request_id`: workflow_id
|
||||
* **API**: `GET /api/v1/workflow/snapshot/{request_id}` (API Gateway 转发)。
|
||||
|
||||
---
|
||||
|
||||
## 3. 前端架构升级 (Frontend Architecture)
|
||||
|
||||
### 3.1. 统一状态管理 (`useWorkflowStore`)
|
||||
|
||||
改造 Store 以支持“双模式”加载。
|
||||
|
||||
* **State**: 增加 `mode: 'realtime' | 'historical'`。
|
||||
* **Action `initialize(id)`**:
|
||||
1. 重置 Store。
|
||||
2. **尝试 SSE 连接** (`/api/v1/workflow/events/{id}`)。
|
||||
* 如果连接成功且收到 `WorkflowStarted` / `TaskStateChanged`,进入 **Realtime Mode**。
|
||||
3. **并行/Fallback 调用 Snapshot API** (`/api/v1/workflow/snapshot/{id}`)。
|
||||
* 如果 SSE 连接失败(404/Closed,说明任务已结束),或者 Snapshot 返回了数据:
|
||||
* 调用 `loadFromSnapshot(snapshot)` 填充 DAG 和 Task 状态。
|
||||
* 进入 **Historical Mode**。
|
||||
|
||||
### 3.2. 统一页面逻辑 (`ReportPage.tsx`)
|
||||
|
||||
不再区分 `HistoricalReportPage`,统一使用 `ReportPage`。
|
||||
|
||||
* **DAG 可视化**: 复用 `WorkflowVisualizer`,数据源由 Store 提供(无论是 SSE 来的还是 Snapshot 来的)。
|
||||
* **状态指示**:
|
||||
* 实时模式:显示 Spinner 和实时进度。
|
||||
* 历史模式:显示最终结果 Badge,并提示“Viewing History”。
|
||||
|
||||
### 3.3. 沉浸式报告与调试面板 (Immersive Report & Debugging Panel)
|
||||
|
||||
为了提升用户体验,我们将摒弃原本“三等分标签页”的设计,采用 **"主视图 + 侧边栏"** 的布局策略,实现“隐形调试”。
|
||||
|
||||
* **主视图 (Main View - The Report)**:
|
||||
* **定位**: 面向最终用户,强调阅读体验。
|
||||
* **布局**: 占据屏幕中央核心区域,无干扰展示 Markdown 渲染结果。
|
||||
* **状态栏**: 顶部仅保留最关键信息(任务状态 Badge、耗时)。
|
||||
* **调试面板 (Debug Panel - The Inspector)**:
|
||||
* **定位**: 面向开发者和排查问题,默认隐藏。
|
||||
* **入口**: 顶部导航栏右侧的 "Terminal/Code" 图标按钮。
|
||||
* **交互**: 点击从屏幕右侧滑出 (Sheet/Drawer),支持拖拽调整宽度。
|
||||
* **内容结构**: 面板内采用 Tabs 组织调试信息:
|
||||
1. **Logs**: 聚合实时流日志与 `_execution.md` 回放。
|
||||
2. **Context**: 文件系统快照 (Context Explorer),展示 Input/Output 文件及 Diff。
|
||||
3. **Raw**: 任务原始配置 (Config) 与元数据 (Metadata)。
|
||||
|
||||
这样的设计实现了“一步到位”:普通用户完全感知不到调试页面的存在,而开发者只需一次点击即可获得所有深层上下文。
|
||||
|
||||
### 3.4. 历史记录入口 (`RecentReports`)
|
||||
|
||||
* **组件**: `RecentReportsDropdown` (Header 区域)。
|
||||
* **逻辑**: 调用 `GET /api/v1/analysis-results?limit=10`。
|
||||
* **跳转**: 点击跳转到 `/report/{id}` (复用统一页面)。
|
||||
|
||||
---
|
||||
|
||||
## 4. 实施计划 (Implementation Plan)
|
||||
|
||||
### Phase 1: 后端 - 统一运行时 (Backend Standardization)
|
||||
1. 在 `common-contracts` 实现 `WorkflowNode` Trait 和 `WorkflowNodeRunner`。
|
||||
2. 重构 `yfinance`, `tushare`, `mock` Provider 使用新架构。
|
||||
* 确保它们都生成 `report.md` 和 `_execution.md`。
|
||||
3. 验证实时流推送是否包含正确的 `task_id` 日志。
|
||||
|
||||
### Phase 2: 后端 - 快照持久化 (Snapshot Persistence)
|
||||
1. `workflow-orchestrator`: 结束时保存 `WorkflowStateSnapshot` 到 Session Data。
|
||||
2. `api-gateway`: 暴露 Snapshot 查询接口。
|
||||
3. `data-persistence`: 优化 `get_analysis_results` 支持全局最近 10 条查询。
|
||||
|
||||
### Phase 3: 前端 - 统一页面与日志 (Frontend Unification)
|
||||
1. 改造 `useWorkflowStore` 支持 Snapshot Hydration。
|
||||
2. 改造 `ReportPage` 实现 SSE + Snapshot 双重加载策略。
|
||||
3. 改造 `TaskDetailView` 为“沉浸式”布局:
|
||||
* 默认仅展示 Markdown Viewer。
|
||||
* 添加 Right Sheet 组件承载 Logs/Context。
|
||||
4. 实现 `RecentReportsDropdown`。
|
||||
|
||||
### Phase 4: 清理与验证
|
||||
1. 删除旧的 `HistoricalReportPage` 路由和组件。
|
||||
2. 验证全流程:
|
||||
* **实时**: 启动新任务 -> 看到 DAG 生长 -> 看到节点实时 Log -> 看到 Report 生成。
|
||||
* **历史**: 点击下拉框 -> 加载旧任务 -> 看到完整 DAG -> 点击节点看到 Report -> 打开 Inspector 看到 Logs。
|
||||
|
||||
---
|
||||
|
||||
## 5. 废弃文档
|
||||
|
||||
本设计取代以下文档:
|
||||
- `docs/3_project_management/tasks/pending/20251128_backend_unified_worker_trait.md`
|
||||
- `tasks/pending/20251128_historical_playback_design.md`
|
||||
- `docs/3_project_management/tasks/pending/20251128_historical_reports_design.md`
|
||||
62
docs/backend_requirements_for_frontend_refactor.md
Normal file
@ -0,0 +1,62 @@
|
||||
# Backend Requirements for Frontend Refactor
|
||||
|
||||
由于前端正在进行“破坏式”重构,删除了所有包含业务逻辑控制、状态推断、流程编排的代码(如 `useReportEngine`, `ExecutionStepManager`),后端必须接管以下职责,以支持纯被动式(Puppet Mode)的前端。
|
||||
|
||||
## 1. 核心原则
|
||||
前端不再拥有“大脑”,只拥有“眼睛”和“耳朵”。所有状态变更、流程流转、错误判断全由后端指令驱动。
|
||||
|
||||
## 2. 接口需求
|
||||
|
||||
### 2.1 统一事件流 (Unified Event Stream)
|
||||
前端将只连接**一个**长连接通道(SSE 或 WebSocket),用于接收整个分析周期的所有信息。
|
||||
|
||||
* **Endpoint**: `/api/v2/workflow/events?request_id={id}` (建议)
|
||||
* **职责**: 聚合 NATS (Data Fetching), Internal State (Report Generator), Database (Persistence) 的所有事件。
|
||||
|
||||
### 2.2 事件类型定义 (Protocol)
|
||||
后端需要推送以下类型的事件,且 Payload 必须包含前端渲染所需的所有上下文,前端不再发起二次请求查询详情。
|
||||
|
||||
1. **`WORKFLOW_START`**
|
||||
* 标志流程开始。
|
||||
* Payload: `{ symbol, market, template_id, timestamp }`
|
||||
|
||||
2. **`PHASE_CHANGED`**
|
||||
* **关键**: 前端不再判断何时切换界面,完全依赖此事件。
|
||||
* Payload: `{ phase: 'DATA_FETCHING' | 'ANALYZING' | 'COMPLETED' | 'FAILED', previous_phase: '...' }`
|
||||
|
||||
3. **`TASK_PROGRESS` (Data Fetching Phase)**
|
||||
* 替代前端轮询 `/api/tasks`。
|
||||
* Payload: `{ task_id, provider, status, progress, message }`
|
||||
* **注意**: 后端需负责聚合多个 Provider 的进度,前端只管展示列表。
|
||||
|
||||
4. **`MODULE_PROGRESS` (Analysis Phase)**
|
||||
* 替代旧的 SSE 流。
|
||||
* Payload: `{ module_id, content_delta, status }`
|
||||
|
||||
5. **`WORKFLOW_ERROR`**
|
||||
* **关键**: 包含错误级别(Fatal/Warning)。前端只展示,不判断是否重试。
|
||||
* Payload: `{ code, message, is_fatal, suggestion }`
|
||||
|
||||
## 3. 逻辑接管需求
|
||||
|
||||
### 3.1 状态机迁移 (State Transitions)
|
||||
* **旧逻辑 (已删)**: 前端轮询任务 -> `if (all_tasks_done) start_analysis()`.
|
||||
* **新逻辑**: 后端 `Workflow Orchestrator` 监听任务完成事件 -> 自动触发分析 -> 推送 `PHASE_CHANGED: ANALYZING` 给前端。
|
||||
|
||||
### 3.2 容错与部分成功 (Partial Success)
|
||||
* **旧逻辑 (已删)**: 前端判断 `if (failed_tasks < total) continue`.
|
||||
* **新逻辑**: 后端决定数据缺失量是否允许继续分析。如果允许,直接进入分析阶段;如果不允许,推送 `WORKFLOW_ERROR`。
|
||||
|
||||
### 3.3 超时控制 (Timeout)
|
||||
* **旧逻辑 (已删)**: 前端 `setTimeout(10min)`.
|
||||
* **新逻辑**: 后端设置执行超时。如果超时,主动推送 Error 事件关闭连接。前端仅处理网络层面的断开重连。
|
||||
|
||||
### 3.4 断点恢复 (Resume)
|
||||
* **需求**: 当用户刷新页面重连 SSE 时,后端必须立即推送一条 `SNAPSHOT` 事件,包含当前所有已完成的任务、已生成的报告片段、当前所处的阶段。
|
||||
* **目的**: 防止前端因为丢失历史事件而无法渲染完整界面。
|
||||
|
||||
## 4. 废弃接口
|
||||
以下接口的前端调用代码已被删除,后端可酌情保留用于调试,但业务不再依赖:
|
||||
* `GET /api/tasks/{id}` (轮询接口)
|
||||
* `GET /api/analysis-results/stream` (旧的纯分析流,需升级为统一流)
|
||||
|
||||
131
docs/experiences/rust_microservice_workspace_dev_pattern.md
Normal file
@ -0,0 +1,131 @@
|
||||
# Rust 微服务开发最佳实践:Workspace 与 Docker 高效协同
|
||||
|
||||
**日期**: 2025-11-29
|
||||
**标签**: #Rust #Microservices #Docker #DevEx #Tilt #Workspace
|
||||
|
||||
---
|
||||
|
||||
## 1. 背景与痛点
|
||||
|
||||
在采用 Rust 开发微服务架构时,我们面临着一个经典的**两难选择**:
|
||||
|
||||
### 方案 A:单一仓库 (Monorepo) + Workspace
|
||||
* **优点**:所有服务共享依赖库版本(`Cargo.lock`),代码复用极其方便,一次编译所有公共库(`target` 共享)。
|
||||
* **缺点**:在 Docker 容器化部署时,每次修改哪怕一行代码,都会导致 Docker Cache 失效,触发整个 Workspace 的重新编译。对于拥有数十个服务的系统,这简直是灾难。
|
||||
|
||||
### 方案 B:多仓库 (Polyrepo) 或 独立构建
|
||||
* **优点**:服务间彻底隔离,互不影响。
|
||||
* **缺点**:每个服务都要重新下载和编译一遍 `tokio`, `axum` 等几百个依赖。磁盘占用爆炸(每个服务 2GB+ target),编译时间爆炸(CPU 重复劳动)。
|
||||
|
||||
### 我们的目标
|
||||
我们需要一种**两全其美**的方案:
|
||||
1. **开发时 (Dev)**:享受 Workspace 的增量编译速度,改一行代码只需 2 秒重启。
|
||||
2. **部署时 (Prod)**:享受容器的隔离性,且构建尽可能快。
|
||||
3. **体验 (DevEx)**:自动化热重载 (Hot Reload),无需手动重启容器。
|
||||
|
||||
---
|
||||
|
||||
## 2. 解决方案:共享缓存挂载 + 容器内增量编译
|
||||
|
||||
核心思想是**放弃在 Docker 构建阶段进行编译**(针对开发环境),改为**在容器运行时利用挂载的宿主机缓存进行增量编译**。
|
||||
|
||||
### 2.1 关键技术点
|
||||
|
||||
1. **极简开发镜像 (`Dockerfile.dev`)**:
|
||||
* 不再 `COPY` 源代码。
|
||||
* 不再运行 `cargo build`。
|
||||
* 只安装必要工具(如 `cargo-watch`)。
|
||||
* 所有源码和依赖通过 Volume 挂载。
|
||||
|
||||
2. **共享编译缓存 (`cargo-target` Volume)**:
|
||||
* 创建一个 Docker Volume(或挂载宿主机目录)专门存放 `/app/target`。
|
||||
* 所有微服务容器**共享**这个 Volume。
|
||||
* **效果**:服务 A 编译过的 `tokio`,服务 B 启动时直接复用,无需再次编译。
|
||||
|
||||
3. **Cargo Registry 缓存 (`cargo-cache` Volume)**:
|
||||
* 挂载 `/usr/local/cargo`。
|
||||
* **效果**:避免每次启动容器都要重新下载 crates.io 的索引和源码。
|
||||
|
||||
4. **Cargo Watch 热重载**:
|
||||
* 容器启动命令为 `cargo watch -x "run --bin my-service"`。
|
||||
* 配合 Docker Compose 的源码挂载,一旦宿主机修改代码,容器内立即触发增量编译并重启进程。
|
||||
|
||||
### 2.2 实施细节
|
||||
|
||||
#### Dockerfile.dev (通用开发镜像)
|
||||
```dockerfile
|
||||
FROM rust:1.90
|
||||
# 安装热重载工具
|
||||
RUN cargo install cargo-watch
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 预创建挂载点,避免权限问题
|
||||
RUN mkdir -p /app/target && mkdir -p /usr/local/cargo
|
||||
|
||||
# 默认命令:监听并运行
|
||||
CMD ["cargo", "watch", "-x", "run"]
|
||||
```
|
||||
|
||||
#### docker-compose.yml (编排配置)
|
||||
```yaml
|
||||
services:
|
||||
api-gateway:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/Dockerfile.dev
|
||||
# 覆盖启动命令,指定运行的 binary
|
||||
command: ["cargo", "watch", "-x", "run --bin api-gateway-server"]
|
||||
volumes:
|
||||
# 1. 挂载源码 (实时同步)
|
||||
- .:/app
|
||||
# 2. 挂载共享编译产物 (核心加速点!)
|
||||
- cargo-target:/app/target
|
||||
# 3. 挂载依赖库缓存
|
||||
- cargo-cache:/usr/local/cargo
|
||||
|
||||
iam-service:
|
||||
# ... 同样的配置,复用相同的 cargo-target
|
||||
volumes:
|
||||
- .:/app
|
||||
- cargo-target:/app/target
|
||||
- cargo-cache:/usr/local/cargo
|
||||
|
||||
volumes:
|
||||
cargo-target: # 这里的魔法在于所有容器共享同一个 target 目录
|
||||
driver: local
|
||||
cargo-cache:
|
||||
driver: local
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. 优势总结
|
||||
|
||||
| 指标 | 传统 Docker 构建 | 本方案 (共享挂载) |
|
||||
| :--- | :--- | :--- |
|
||||
| **首次启动时间** | 慢 (需编译所有) | 慢 (需编译所有,但只需一次) |
|
||||
| **二次启动时间** | **极慢** (代码变动导致层失效,全量重编) | **极快** (复用 target,增量编译 < 5s) |
|
||||
| **磁盘占用** | 高 (每个镜像都有 target) | **低** (所有服务共享一份 target) |
|
||||
| **依赖冲突** | 严格 (Docker 构建会报错) | 宽松 (只要本地能跑,容器就能跑) |
|
||||
| **开发体验** | 修改代码 -> 等待构建 -> 重启容器 | 修改代码 -> 自动热跟新 (Hot Reload) |
|
||||
|
||||
## 4. 注意事项与坑
|
||||
|
||||
1. **文件锁 (File Locking)**:
|
||||
* Rust 的 Cargo 能够很好地处理并发编译锁。多个服务同时启动时,它们会排队等待编译公共依赖(如 `tokio`),而不会发生冲突损坏文件。
|
||||
* **注意**: 如果宿主机也是 Linux 且挂载了宿主机的 `target` 目录,可能会因为 glibc 版本不同导致宿主机和容器内的 `cargo` 互相“打架”(指纹不一致导致频繁重编)。**建议使用独立的 Docker Volume (`cargo-target`) 而不是挂载宿主机 `target` 目录**,以此隔离宿主机环境和容器环境。
|
||||
|
||||
2. **权限问题**:
|
||||
* Docker 容器内默认是 `root`,写入 Volume 的文件也是 `root` 权限。如果挂载的是宿主机目录,可能会导致宿主机用户无法清理 `target`。使用 Docker Volume 可以规避这个问题。
|
||||
|
||||
3. **生产环境构建**:
|
||||
* 本方案**仅限于开发环境**。
|
||||
* 生产环境 (`Dockerfile.prod`) 依然需要使用标准的 `COPY . .` + `cargo build --release` 流程,或者使用 `cargo-chef` 进行多阶段构建以减小镜像体积。
|
||||
|
||||
---
|
||||
|
||||
## 5. 结论
|
||||
|
||||
通过 **Docker Compose Volume 挂载** + **Cargo Watch** + **共享 Target 目录**,我们成功地在微服务架构下保留了 Monorepo 的开发效率。这是一套经过验证的、适合中大型 Rust 项目的高效开发模式。
|
||||
|
||||
400
docs/frontend/20251122_frontend_rebuild_design.md
Normal file
@ -0,0 +1,400 @@
|
||||
# 前端重构设计: Vite + React SPA
|
||||
日期: 2025-11-22
|
||||
状态: 已实现 (Vite + React 重构完成,Mock 验证通过)
|
||||
|
||||
## 1. 概述与核心理念
|
||||
|
||||
本文档描述了将 `Fundamental_Analysis` 前端从 Next.js 重构为 Vite + React 的方案。
|
||||
新的架构将专注于简单性、类型安全和“后端驱动”的状态模型。
|
||||
|
||||
### 核心理念
|
||||
1. **木偶架构 (Puppet Architecture)**: 前端只包含最小的业务逻辑,它忠实反映后端的状态。
|
||||
* 如果后端说“加载中”,我们就显示加载中。
|
||||
* 如果后端发送了状态快照,我们就完全按照快照渲染。
|
||||
* 不做任何猜测性的 UI 更新 (Optimistic UI)。
|
||||
2. **单一数据源 (Single Source of Truth)**: 所有类型 (Structs, Enums) 由 Rust 定义并生成 TypeScript (Zod)。
|
||||
* **前端禁止内联定义类型**。
|
||||
* 使用 `zod` 对所有输入数据进行运行时校验 (Fail Early)。
|
||||
3. **Rustic 风格**:
|
||||
* 开启 Strict Mode。
|
||||
* 禁止 `any` 类型。
|
||||
* 对 Enum 进行穷尽匹配 (Exhaustive matching)。
|
||||
4. **Vite + React**: SPA 架构,消除 SSR 代理黑盒,使 SSE 处理更加健壮和透明。
|
||||
|
||||
## 2. 技术栈
|
||||
|
||||
| 层级 | 技术 | 选择理由 |
|
||||
| :--- | :--- | :--- |
|
||||
| **构建工具** | **Vite** | 极速、简单、无隐藏代理黑盒。 |
|
||||
| **框架** | **React 19** | 保留现有的 `shadcn/ui` 组件资产。 |
|
||||
| **路由** | **React Router v7** | 标准的客户端路由。 |
|
||||
| **状态管理** | **TanStack Query** | 服务端状态缓存 (用于配置数据)。 |
|
||||
| **全局状态** | **Zustand** | 客户端 UI 状态 (Sidebar, Workflow) 管理。 |
|
||||
| **UI 库** | **shadcn/ui** | 延续现有的设计语言 (Tailwind CSS)。 |
|
||||
| **可视化** | **ReactFlow** | 用于渲染工作流 DAG 图。 |
|
||||
| **数据请求** | **Axios + Zod** | 带运行时校验的类型化 HTTP 客户端。 |
|
||||
| **API 生成** | **OpenAPI -> Zod** | 从后端自动生成前端类型。 |
|
||||
|
||||
## 3. 架构设计
|
||||
|
||||
### 3.1 类型生成流水线
|
||||
后端 (Rust/Axum) 需暴露 `openapi.json` (通过 `utoipa` 等库)。
|
||||
前端运行生成脚本:
|
||||
`Rust Structs` -> `openapi.json` -> `frontend/src/api/schema.ts` (Zod schemas + TS Types)。
|
||||
|
||||
### 3.2 状态管理策略
|
||||
|
||||
#### A. 配置数据 (Pull 模型)
|
||||
低频变更的数据 (API Keys, Templates) 由 **TanStack Query** 管理。
|
||||
* `useQuery(['llm-providers'])`
|
||||
* `useMutation(['update-provider'])`
|
||||
|
||||
#### B. 工作流数据 (Push/Stream 模型)
|
||||
实时流动的数据 (分析进度) 由自定义的 **Workflow Store (Zustand)** 管理,直接连接 SSE。
|
||||
* **连接层**: 使用原生 `EventSource` 直接连接 `http://api-gateway:4000/v1/workflow/events/{id}`。
|
||||
* **状态机**:
|
||||
```typescript
|
||||
type TaskInfo = {
|
||||
id: string;
|
||||
status: 'Pending' | 'Running' | 'Completed' | 'Failed';
|
||||
progress?: number; // 0-100
|
||||
error?: string;
|
||||
logs: string[]; // 实时日志
|
||||
};
|
||||
|
||||
type WorkflowState =
|
||||
| { status: 'IDLE' }
|
||||
| { status: 'CONNECTING', requestId: string }
|
||||
| { status: 'RUNNING', requestId: string, dag: Dag, tasks: Record<string, TaskInfo> }
|
||||
| { status: 'COMPLETED', result: any }
|
||||
| { status: 'ERROR', error: string };
|
||||
```
|
||||
|
||||
## 4. 页面设计与布局 (ASCII Art)
|
||||
|
||||
**设计原则**: 严格保留现有 Next.js 版本的 UI 布局、风格和功能流程。
|
||||
|
||||
### 4.1 整体布局 (Shell)
|
||||
保留现有的顶部导航栏设计 (`RootLayout`)。历史报告改为下拉菜单。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| FA Platform |
|
||||
| [首页] [历史报告 v] [文档] [配置] |
|
||||
+-----------+---------------------------------------------------+
|
||||
| | (Dropdown Menu) |
|
||||
| | 2025-11-22 10:00: 600519.SS (Success) |
|
||||
| | 2025-11-21 14:30: AAPL (Failed) |
|
||||
| | ... |
|
||||
| +---------------------------------------------------+
|
||||
| |
|
||||
| <Outlet /> (主内容区域) |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### 4.2 仪表盘 (Dashboard / Home)
|
||||
入口页面,用于发起新的分析。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| 基本面分析报告 |
|
||||
| 输入股票代码和市场,生成综合分析报告。 |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| [ Card: Start Analysis ] |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | 股票代码: [ 600519 ] | |
|
||||
| | 交易市场: [ 中国 v ] | |
|
||||
| | | |
|
||||
| | [ BUTTON: 生成报告 ] | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### 4.3 报告详情页 (Report View)
|
||||
核心页面,负责展示实时工作流状态和分析结果。
|
||||
**关键改进**: Tabs 上需有直观的状态提示 (Spinner/Check/X)。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [Card: Header] |
|
||||
| 600519.SS Market: CN [Badge: Ready/Analyzing] |
|
||||
| [Template Select] [Start Btn] |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| [Tabs List] |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | [DAG View] (Always First) | |
|
||||
| | [Stock Chart] | |
|
||||
| | [Fundamental Data] [Spinner] (Fetching...) | |
|
||||
| | [Analysis: Basic] [Check] | |
|
||||
| | [Analysis: Value] [Spinner] | |
|
||||
| | [Analysis: Risk ] [Pending] | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
| (Tab Content Area) |
|
||||
| |
|
||||
| ----------------------------------------------------------- |
|
||||
| Request ID: ... | Time: 12.5s | Tokens: 1500 |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.3.1 DAG 视图 (Workflow Graph)
|
||||
使用 ReactFlow 渲染。
|
||||
* **节点**: 代表任务 (Fetch Data, Analysis Modules)。
|
||||
* **连线**: 代表依赖关系。
|
||||
* **状态**: 节点颜色随状态变化 (灰=Pending, 蓝=Running, 绿=Completed, 红=Failed)。
|
||||
* **交互**: 点击节点自动切换 Tabs 到对应的详情页。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ DAG View ] |
|
||||
| |
|
||||
| [Fetch: Tushare] ---> [Fundamental Data] |
|
||||
| (Running) (Pending) |
|
||||
| | |
|
||||
| [Fetch: Finnhub] --------------+ |
|
||||
| (Done) | |
|
||||
| v |
|
||||
| [Analysis: Basic] |
|
||||
| (Pending) |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.3.2 Fundamental Data Tab
|
||||
展示多个数据源的并行获取状态。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Fundamental Data ] |
|
||||
| |
|
||||
| [ Card: Tushare Provider ] |
|
||||
| Status: [Spinner] Running (3.2s) |
|
||||
| Logs: |
|
||||
| > Connecting to api.tushare.pro... |
|
||||
| > Fetching income statement... |
|
||||
| |
|
||||
| [ Card: Finnhub Provider ] |
|
||||
| Status: [Check] Completed |
|
||||
| Result: Fetched 3 years of data. |
|
||||
| |
|
||||
| [ Card: AlphaVantage Provider ] |
|
||||
| Status: [X] Failed |
|
||||
| Error: Rate limit exceeded. |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.3.3 Analysis Module Tab
|
||||
展示 LLM 分析的流式输出。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Analysis: Basic Analysis ] |
|
||||
| |
|
||||
| Status: [Spinner] Generating Report... |
|
||||
| Model: gpt-4o |
|
||||
| |
|
||||
| (Markdown Content Streaming Area) |
|
||||
| # Basic Financial Analysis |
|
||||
| Based on the income statement... [Cursor] |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
### 4.4 配置页面 (Config)
|
||||
|
||||
#### 4.4.1 AI Providers 配置 (Tab 1)
|
||||
|
||||
**设计逻辑**:
|
||||
1. **Provider 维度**: 允许配置多个 Provider (如 OpenAI, Anthropic, LocalLLM)。
|
||||
2. **Model 维度**: 每个 Provider 下包含多个 Model。
|
||||
3. **交互流程**: 添加 Provider -> 输入 Key/BaseURL -> 点击 "刷新模型列表" (Fetch List) -> 从下拉框选择或手动添加模型 -> 点击 "测试" (Test) -> 保存。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Tabs: AI Provider | 数据源配置 | 分析模板 | 系统 ] |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| [ Button: + 添加 AI Provider ] |
|
||||
| |
|
||||
| [ Card: OpenAI (Official) ] [Delete] |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | Base URL: [ https://api.openai.com/v1 ] | |
|
||||
| | API Key: [ ************************ ] [Eye Icon] | |
|
||||
| | | |
|
||||
| | [ Button: 刷新模型列表 (Refresh List) ] | |
|
||||
| | | |
|
||||
| | **模型列表 (Models)**: | |
|
||||
| | +-----------------------------------------------------+ | |
|
||||
| | | gpt-4o [Status: Active] [Test] [Remove] | | |
|
||||
| | | gpt-3.5-turbo [Status: Active] [Test] [Remove] | | |
|
||||
| | +-----------------------------------------------------+ | |
|
||||
| | | |
|
||||
| | [ Input: 添加模型 (支持搜索/补全) ] [Button: Add] | |
|
||||
| | (输入 "claude" 自动提示 "claude-3-sonnet", etc.) | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
| [ Card: Local LLM (Ollama) ] |
|
||||
| ... |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.4.2 数据源配置 (Data Sources) (Tab 2)
|
||||
|
||||
**设计逻辑**:
|
||||
1. **后端驱动 (Backend Driven)**: 页面不硬编码有哪些数据源。
|
||||
2. **查询流程**: 前端请求 `GET /configs/data_sources/schema` (或类似接口),后端返回可用的 Provider 列表及其所需配置项 (Schema)。
|
||||
3. **动态渲染**: 根据后端返回的 Schema 渲染表单 (如 API Key 输入框, URL 输入框)。
|
||||
4. **测试**: 如果后端支持 `Test` 接口,显示测试按钮。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Tabs: AI Provider | 数据源配置 | 分析模板 | 系统 ] |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| (Dynamically Rendered from Backend Config) |
|
||||
| |
|
||||
| [ Card: Tushare Pro ] (Enabled [x]) |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | API Token: [ ******************** ] | |
|
||||
| | Endpoint: [ http://api.tushare.pro ] | |
|
||||
| | [Button: Test Connection] -> (Result: Success/Fail) | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
| [ Card: AlphaVantage ] (Enabled [ ]) |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | API Key: [ ] | |
|
||||
| | [Button: Test Connection] | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.4.3 分析模板配置 (Analysis Templates) (Tab 3)
|
||||
|
||||
**设计逻辑**:
|
||||
1. **模板 (Template)**: 分析报告的骨架,包含多个模块。
|
||||
2. **模块 (Module)**: 具体的分析任务 (如 "基本面概览", "风险评估")。
|
||||
3. **N*M 模型选择**: 每个模块可以指定特定的 AI 模型。
|
||||
* **来源**: 聚合所有 AI Providers 中已启用的模型。
|
||||
* **交互**: 下拉搜索框 (Combobox),输入关键词 (如 "gpt") 筛选出 `{provider: "openai", model: "gpt-4"}`。
|
||||
4. **依赖管理 (DAG)**: 指定模块依赖关系 (如 "风险评估" 依赖 "基本面概览")。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Tabs: AI Provider | 数据源配置 | 分析模板 | 系统 ] |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| 当前模板: [ 快速分析模板 v ] [+ 新建模板] [删除] |
|
||||
| |
|
||||
| **分析模块 (Modules)**: |
|
||||
| |
|
||||
| [ Card: 模块 1 - 基本面概览 ] |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | ID: basic_analysis | |
|
||||
| | 依赖 (Dependencies): [ 无 v ] | |
|
||||
| | | |
|
||||
| | **模型选择 (Model)**: | |
|
||||
| | [ Combobox: gpt-4o (OpenAI) v ] | |
|
||||
| | (数据来源: 聚合自 AI Provider Tab 的所有模型) | |
|
||||
| | | |
|
||||
| | **提示词 (Prompt)**: | |
|
||||
| | [ Textarea: Analyze the financial data... ] | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
| [ Card: 模块 2 - 深度风险评估 ] |
|
||||
| +---------------------------------------------------------+ |
|
||||
| | ID: risk_eval | |
|
||||
| | 依赖 (Dependencies): [ 基本面概览 (basic_analysis) x ] | |
|
||||
| | Model: [ claude-3-opus (Anthropic) v ] | |
|
||||
| | ... | |
|
||||
| +---------------------------------------------------------+ |
|
||||
| |
|
||||
| [ Button: + 添加分析模块 ] [ Button: 保存配置 ] |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
#### 4.4.4 系统状态 (System) (Tab 4)
|
||||
|
||||
**设计逻辑**:
|
||||
1. **Status Check**: 简单的健康看板。
|
||||
2. **Modules**: 列出所有微服务/组件的状态 (Running, Degraded, Down)。
|
||||
|
||||
```
|
||||
+---------------------------------------------------------------+
|
||||
| [ Tabs: AI Provider | 数据源配置 | 分析模板 | 系统 ] |
|
||||
+---------------------------------------------------------------+
|
||||
| |
|
||||
| **系统健康状态 (System Health)** |
|
||||
| |
|
||||
| [ API Gateway ] [ Badge: Healthy (Green) ] |
|
||||
| [ Workflow Orchestrator] [ Badge: Healthy (Green) ] |
|
||||
| [ Data Persistence ] [ Badge: Healthy (Green) ] |
|
||||
| [ Report Generator ] [ Badge: Degraded (Yellow) ] |
|
||||
| |
|
||||
| **服务详情**: |
|
||||
| - Database Connection: OK |
|
||||
| - NATS Connection: OK |
|
||||
| - Redis Cache: OK |
|
||||
| |
|
||||
+---------------------------------------------------------------+
|
||||
```
|
||||
|
||||
## 5. 目录结构 (Proposed)
|
||||
|
||||
```
|
||||
frontend/
|
||||
├── public/
|
||||
├── src/
|
||||
│ ├── api/ # Axios instances, Zod schemas
|
||||
│ ├── assets/
|
||||
│ ├── components/ # Shared UI components
|
||||
│ │ ├── ui/ # shadcn/ui primitives (迁移自原项目)
|
||||
│ │ ├── layout/ # Shell, Header
|
||||
│ │ ├── workflow/ # DAG Graph, Status Badges
|
||||
│ │ └── business/ # 业务组件
|
||||
│ │ ├── StockChart.tsx
|
||||
│ │ ├── FinancialTable.tsx
|
||||
│ │ └── ModelSelector.tsx # 复用的模型选择器
|
||||
│ ├── pages/ # 路由页面组件
|
||||
│ │ ├── Dashboard.tsx
|
||||
│ │ ├── Report.tsx
|
||||
│ │ └── config/ # 配置相关页面拆分
|
||||
│ │ ├── index.tsx # 配置页 Layout
|
||||
│ │ ├── AIProviderTab.tsx
|
||||
│ │ ├── DataSourceTab.tsx
|
||||
│ │ └── TemplateTab.tsx
|
||||
│ ├── hooks/ # Global hooks
|
||||
│ ├── lib/ # Utils (cn, formatters)
|
||||
│ ├── stores/ # Zustand stores
|
||||
│ ├── types/ # Global types (if not in schema)
|
||||
│ ├── App.tsx # Router Setup
|
||||
│ └── main.tsx # Entry
|
||||
├── index.html
|
||||
├── package.json
|
||||
└── vite.config.ts
|
||||
```
|
||||
|
||||
## 6. 迁移步骤
|
||||
|
||||
1. **归档**: 将现有 `frontend` 移动到 `frontend/archive/v2_nextjs`。
|
||||
2. **初始化**: 在 `frontend` 创建新的 Vite 项目。
|
||||
3. **安装依赖**: Tailwind, Shadcn, Axios, Zustand, React Router, Lucide, ReactFlow。
|
||||
4. **移植 UI**: 从归档中复制 `components/ui` (shadcn)。
|
||||
5. **移植逻辑**:
|
||||
* 重写 `useWorkflow` hook,使用新的 Store 模式。
|
||||
* 实现 DAG 可视化组件。
|
||||
* 实现配置页面的“添加+补全”交互。
|
||||
6. **验证**: 测试与后端的 SSE 连接和 DAG 状态同步。
|
||||
|
||||
## 7. 执行阶段 (Next Steps)
|
||||
|
||||
1. 归档现有代码。
|
||||
2. 初始化 Vite + React 项目。
|
||||
3. 配置 Tailwind + Shadcn 环境。
|
||||
4. 搭建基础 Layout (Shell)。
|
||||
48
docs/frontend/backend_todos.md
Normal file
@ -0,0 +1,48 @@
|
||||
# 后端改造需求清单 (配合前端重构)
|
||||
日期: 2025-11-22
|
||||
状态: **已完成**
|
||||
|
||||
为了支持新的 "Puppet Architecture" 前端设计,后端已完成以下适配性改造。
|
||||
|
||||
## 1. API 规范与类型生成 (Service Kit 集成) - [已完成]
|
||||
|
||||
**目标**: 利用项目现有的 `service_kit` 库,实现前端通过脚本自动从后端生成 TypeScript 类型定义 (Zod Schemas),确保前后端类型严格一致。
|
||||
|
||||
**实施情况**:
|
||||
* **Contract 层改造**:
|
||||
* 在 `common-contracts` 中,核心 Struct/Enum (如 `WorkflowEvent`, `TaskStatus`) 已全面使用 `#[api_dto]` 宏标注。
|
||||
* `#[api_dto]` 自动注入了 `utoipa::ToSchema`,确保了 Schema 的正确导出。
|
||||
|
||||
* **API Gateway 改造 (混合模式)**:
|
||||
* 引入了 `utoipa` 和 `utoipa-swagger-ui` 依赖。
|
||||
* 创建了 `services/api-gateway/src/openapi/mod.rs`,定义了 `ApiDoc` 结构体。
|
||||
* 在 `services/api-gateway/src/api.rs` 中,手动为 Handler 添加了 `#[utoipa::path(...)]` 标注。
|
||||
* 在 Router 中挂载了 `/swagger-ui` 和 `/api-docs/openapi.json`。
|
||||
|
||||
## 2. 动态数据源 Schema 接口 - [已完成]
|
||||
|
||||
**目标**: 实现数据源的插件化和动态发现。
|
||||
|
||||
**实施情况**:
|
||||
* 在 `api-gateway` 中新增了接口 `GET /v1/configs/data_sources/schema`。
|
||||
* 定义了 `DataSourceSchemaResponse` 和 `DataSourceProviderSchema` DTOs。
|
||||
* 接口返回了 Tushare, Finnhub, AlphaVantage, Yfinance 的配置 Schema。
|
||||
|
||||
## 3. 任务进度 (Progress) 字段支持 - [已完成]
|
||||
|
||||
**目标**: 支持在 UI 上展示细粒度的任务进度条。
|
||||
|
||||
**实施情况**:
|
||||
* **修改 Contract**: 在 `common-contracts` 的 `WorkflowEvent::TaskStateChanged` 中增加了 `progress: Option<u8>` 字段。
|
||||
```rust
|
||||
#[api_dto]
|
||||
pub struct TaskStateChanged {
|
||||
// ... existing fields
|
||||
pub progress: Option<u8>, // 0-100
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 下一步计划
|
||||
|
||||
* **前端对接**: 前端可以尝试访问 `http://localhost:4000/api-docs/openapi.json` 来生成类型定义。
|
||||
* **集成测试**: 验证 `GET /v1/workflow/events/:id` 是否能正确返回带有 `progress` 字段的事件。
|
||||
132
docs/tasks/completed/20251121_refactor_nats_subjects_enum.md
Normal file
@ -0,0 +1,132 @@
|
||||
# NATS Subject 强类型重构设计文档
|
||||
|
||||
## 1. 背景与现状 (Background & Status Quo)
|
||||
|
||||
目前,项目中微服务之间的 NATS 消息通信主要依赖于硬编码的字符串(String Literals)来指定 Subject(主题)。例如:
|
||||
- `services/report-generator-service` 使用 `"events.analysis.report_generated"` 发布消息。
|
||||
- `services/workflow-orchestrator-service` 使用 `"events.analysis.>"` 订阅消息,并使用字符串匹配 `if subject == "events.analysis.report_generated"` 来区分消息类型。
|
||||
|
||||
这种方式存在以下问题:
|
||||
1. **弱类型约束**:字符串拼接容易出现拼写错误(Typos),且无法在编译期捕获,只能在运行时发现,违反了 "Fail Early" 原则。
|
||||
2. **维护困难**:Subject 散落在各个服务的代码中,缺乏统一视图(Single Source of Truth),修改一个 Subject 需要全局搜索并小心替换。
|
||||
3. **缺乏契约**:Subject 与 Payload(消息体)之间的对应关系仅通过注释或隐式约定存在,缺乏代码层面的强制约束。
|
||||
|
||||
## 2. 目的 (Objectives)
|
||||
|
||||
本设计旨在贯彻 Rustic 的工程原则(强类型约束、单一来源、早失败、无回退),通过以下方式重构 NATS Subject 的管理:
|
||||
|
||||
1. **强类型枚举 (Enum-driven Subjects)**:在 `common-contracts` 中定义全局唯一的枚举类型,涵盖系统中所有合法的 NATS Subject。
|
||||
2. **消除魔法字符串**:禁止在业务逻辑中直接使用字符串字面量进行 publish 或 subscribe 操作。
|
||||
3. **编译期安全**:利用 Rust 的类型系统,确保 Subject 的构造和匹配是合法的。
|
||||
|
||||
## 3. 设计方案 (Design Proposal)
|
||||
|
||||
### 3.1 核心数据结构 (`common-contracts`)
|
||||
|
||||
在 `services/common-contracts/src/subjects.rs` 中定义 `NatsSubject` 枚举。该枚举涵盖系统中所有合法的 NATS Subject。
|
||||
|
||||
```rust
|
||||
use uuid::Uuid;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum NatsSubject {
|
||||
// --- Commands ---
|
||||
WorkflowCommandStart, // "workflow.commands.start"
|
||||
WorkflowCommandSyncState, // "workflow.commands.sync_state"
|
||||
DataFetchCommands, // "data_fetch_commands"
|
||||
AnalysisCommandGenerateReport, // "analysis.commands.generate_report"
|
||||
|
||||
// --- Events ---
|
||||
// Analysis Events
|
||||
AnalysisReportGenerated, // "events.analysis.report_generated"
|
||||
AnalysisReportFailed, // "events.analysis.report_failed"
|
||||
|
||||
// Data Events
|
||||
DataFinancialsPersisted, // "events.data.financials_persisted"
|
||||
DataFetchFailed, // "events.data.fetch_failed"
|
||||
|
||||
// Workflow Events (Dynamic)
|
||||
WorkflowProgress(Uuid), // "events.workflow.{uuid}"
|
||||
|
||||
// --- Wildcards (For Subscription) ---
|
||||
AnalysisEventsWildcard, // "events.analysis.>"
|
||||
WorkflowCommandsWildcard, // "workflow.commands.>"
|
||||
DataEventsWildcard, // "events.data.>"
|
||||
}
|
||||
|
||||
// ... impl Display and FromStr ...
|
||||
```
|
||||
|
||||
### 3.2 使用方式
|
||||
|
||||
#### 发布消息 (Publish)
|
||||
|
||||
```rust
|
||||
// Old
|
||||
state.nats.publish("events.analysis.report_generated", payload).await?;
|
||||
|
||||
// New
|
||||
use common_contracts::subjects::NatsSubject;
|
||||
|
||||
state.nats.publish(NatsSubject::AnalysisReportGenerated.to_string(), payload).await?;
|
||||
```
|
||||
|
||||
#### 订阅与匹配 (Subscribe & Match)
|
||||
|
||||
```rust
|
||||
// Old
|
||||
let sub = nats.subscribe("events.analysis.>").await?;
|
||||
while let Some(msg) = sub.next().await {
|
||||
if msg.subject == "events.analysis.report_generated" { ... }
|
||||
}
|
||||
|
||||
// New
|
||||
let sub = nats.subscribe(NatsSubject::AnalysisEventsWildcard.to_string()).await?;
|
||||
while let Some(msg) = sub.next().await {
|
||||
// 将接收到的 subject 字符串尝试转换为枚举
|
||||
match NatsSubject::try_from(msg.subject.as_str()) {
|
||||
Ok(NatsSubject::AnalysisReportGenerated) => {
|
||||
// Handle report generated
|
||||
},
|
||||
Ok(NatsSubject::AnalysisReportFailed) => {
|
||||
// Handle report failed
|
||||
},
|
||||
_ => {
|
||||
// Log warning or ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 实施状态 (Implementation Status)
|
||||
|
||||
### 4.1 `common-contracts`
|
||||
- [x] 定义 `NatsSubject` 枚举及相关 Trait (`Display`, `FromStr`) 在 `src/subjects.rs`。
|
||||
- [x] 添加单元测试确保 Round-trip 正确性。
|
||||
|
||||
### 4.2 `report-generator-service`
|
||||
- [x] `src/worker.rs`: 替换 Publish Subject。
|
||||
|
||||
### 4.3 `workflow-orchestrator-service`
|
||||
- [x] `src/message_consumer.rs`: 替换 Subscribe Subject 和 Match 逻辑。
|
||||
|
||||
### 4.4 `api-gateway`
|
||||
- [x] `src/api.rs`: 替换 Publish Subject。
|
||||
|
||||
### 4.5 Provider Services
|
||||
- [x] `finnhub-provider-service`: 替换 Subscribe Subject,移除魔法字符串常量。
|
||||
- [x] `alphavantage-provider-service`: 替换 Subscribe Subject,移除魔法字符串常量。
|
||||
- [x] `tushare-provider-service`: 替换 Subscribe Subject,移除魔法字符串常量。
|
||||
- [x] `yfinance-provider-service`: 替换 Subscribe Subject,移除魔法字符串常量。
|
||||
|
||||
## 5. 进阶优化 (Future Work)
|
||||
|
||||
- [x] **关联 Payload 类型**: 利用 Rust 的 trait 系统,将 Subject 枚举与对应的 Payload 结构体关联起来,使得 `publish` 函数能够根据 Subject 自动推断 Payload 类型,从而防止 Subject 与 Payload 不匹配的问题。
|
||||
```rust
|
||||
trait SubjectMessage {
|
||||
// type Payload: Serialize + DeserializeOwned; // Simplified: trait is implemented on Payload struct itself
|
||||
fn subject(&self) -> NatsSubject;
|
||||
}
|
||||
```
|
||||
已在 `services/common-contracts/src/subjects.rs` 中实现 `SubjectMessage` trait,并在 `messages.rs` 中为各个 Command/Event 实现了该 trait。各服务已更新为使用 `msg.subject().to_string()` 进行发布。
|
||||
39
docs/tasks/completed/20251127_add_task_display_names.md
Normal file
@ -0,0 +1,39 @@
|
||||
# [Pending] 为工作流任务添加人类可读名称 (Display Name)
|
||||
|
||||
## 背景
|
||||
目前,前端在显示任务名称时使用的是任务 ID(例如 `analysis:news_analysis`,或者是经过简单格式化后的 `news analysis`)。然而,真正的人类可读名称(例如 “新闻分析”)是定义在 `AnalysisTemplate` 配置中的,但这些名称并没有通过工作流事件传播到 `WorkflowOrchestrator` 或前端。
|
||||
|
||||
## 目标
|
||||
确保前端可以在工作流可视化图表(Visualizer)和标签页(Tab Headers)中显示模板中定义的本地化/人类可读的任务名称。
|
||||
|
||||
## 需要的变更
|
||||
|
||||
### 1. Common Contracts (`services/common-contracts`)
|
||||
- **文件**: `src/workflow_types.rs` 或 `src/messages.rs`
|
||||
- **行动**: 更新 `TaskNode` 结构体(用于 `WorkflowStateSnapshot`),增加一个 `display_name` (`Option<String>`) 字段。
|
||||
- **行动**: (可选) 如果我们需要实时更新也携带名称,可以更新 `WorkflowTaskEvent`,虽然对于静态拓扑来说,快照(Snapshot)通常就足够了。
|
||||
|
||||
### 2. Workflow Orchestrator Service (`services/workflow-orchestrator-service`)
|
||||
- **文件**: `src/dag_scheduler.rs`
|
||||
- **行动**: 在通过 `add_node` 添加节点时,接受一个 `display_name` 参数。
|
||||
- **文件**: `src/workflow.rs`
|
||||
- **行动**: 在 `build_dag` 函数中,遍历 `template.modules` 时:
|
||||
- 提取 `module_config.name`(例如 “新闻分析”)。
|
||||
- 在创建 DAG 节点时传递这个名称。
|
||||
|
||||
### 3. Frontend (`frontend`)
|
||||
- **文件**: `src/types/workflow.ts`
|
||||
- **行动**: 更新 `TaskNode` 接口以匹配新的后端 DTO。
|
||||
- **文件**: `src/components/workflow/WorkflowVisualizer.tsx` & `src/pages/ReportPage.tsx`
|
||||
- **行动**: 如果 `node.display_name` 存在,则优先使用它;否则回退到使用 `formatNodeName(node.id)`。
|
||||
|
||||
## 替代方案 / 临时方案 (纯前端)
|
||||
由于前端已经(通过 `useAnalysisTemplates` hook)获取了 `AnalysisTemplate`,我们可以:
|
||||
1. 从 URL 参数中获取当前的 `templateId`。
|
||||
2. 查找对应的模板定义。
|
||||
3. 创建一个映射表:`module_id -> module_name`。
|
||||
4. 在 `ReportPage` 和 `WorkflowVisualizer` 中使用此映射表来动态解析名称。
|
||||
|
||||
## 优先级
|
||||
中等 - 能够显著改善用户体验 (UX),但现有功能不受影响。
|
||||
|
||||
160
docs/tasks/completed/20251127_refactor_context_mechanism.md
Normal file
@ -0,0 +1,160 @@
|
||||
# 任务:重构分析模块上下文机制 (两阶段选择与统一 I/O 绑定的融合)
|
||||
|
||||
**状态**: 设计中 (Finalizing)
|
||||
**日期**: 2025-11-27
|
||||
**优先级**: 高
|
||||
**负责人**: @User / @Assistant
|
||||
|
||||
## 1. 核心理念:意图与实现的解耦
|
||||
|
||||
我们经历了三个思维阶段,现在需要将其融合成一个完整的体系:
|
||||
1. **Context Projection**: 模块需要从全局上下文中“投影”出自己需要的数据。
|
||||
2. **Two-Stage Selection**: 这种投影过程分为“选择(我需要什么?)”和“分析(怎么处理它?)”两个阶段,且都需要 Prompt/Model 驱动。
|
||||
3. **Unified I/O Binding**: 模块本身不应处理物理路径,应由 Orchestrator 负责 I/O 绑定。
|
||||
|
||||
**融合方案**:
|
||||
* **Module 定义意图 (Intent)**: 模块通过 Configuration (Prompt/Rules) 描述“我需要什么样的输入”(例如:“我需要去年的财务数据” 或 “按此 Glob 规则匹配”)。
|
||||
* **Orchestrator 负责解析 (Resolution)**: Orchestrator(借助 IO Binder)根据模块的意图和当前的全局上下文状态,计算出具体的**物理路径**绑定。
|
||||
* **Module 执行实现 (Execution)**: 模块接收 Orchestrator 传来的物理路径,执行读取、分析和写入。
|
||||
|
||||
## 2. 架构设计
|
||||
|
||||
### 2.1. 模块配置:描述“我需要什么”
|
||||
|
||||
`AnalysisModuleConfig` 依然保持两阶段结构,但这里的“Input/Context Selector”描述的是**逻辑需求**。
|
||||
|
||||
```rust
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AnalysisModuleConfig {
|
||||
pub id: String,
|
||||
|
||||
// Phase 1: Input Intent (我需要什么数据?)
|
||||
pub context_selector: ContextSelectorConfig,
|
||||
// Manual: 明确的规则 (e.g., "financials/*.json")
|
||||
// Auto: 模糊的需求,交给 Orchestrator/Agent 自动推断
|
||||
// Hybrid: 具体的 Prompt (e.g., "Find all news about 'Environment' from last year")
|
||||
|
||||
// Phase 2: Analysis Intent (怎么处理这些数据?)
|
||||
pub analysis_prompt: String,
|
||||
pub llm_config: Option<LlmConfig>,
|
||||
|
||||
// Output Intent (结果是什么?)
|
||||
// 模块只定义它产生什么类型的结果,物理路径由 Orchestrator 分配
|
||||
pub output_type: String, // e.g., "markdown_report", "json_summary"
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2. Orchestrator 运行时:解析“在哪里”
|
||||
|
||||
Orchestrator 在调度任务前,会执行一个 **Resolution Step**。
|
||||
|
||||
* **对于 Manual Selector**:
|
||||
* Orchestrator 根据规则(Glob)在当前 VGCS Head Commit 中查找匹配的文件。
|
||||
* 生成具体的 `InputBindings` (Map<FileName, PhysicalPath>)。
|
||||
* **对于 Auto/Hybrid Selector**:
|
||||
* **这里是关键融合点**:Orchestrator (或专门的 Resolution Agent) 会运行一个轻量级的 LLM 任务。
|
||||
* Input: 当前 VGCS 目录树 + 模块定义的 Selection Prompt (或 Auto 策略)。
|
||||
* Output: 具体的 VGCS 文件路径列表。
|
||||
* Orchestrator 将这些路径打包成 `InputBindings`。
|
||||
|
||||
### 2.3. 模块执行:执行“转换”
|
||||
|
||||
当模块真正启动时(Worker 接收到 Command),它看到的是**已经被解析过**的确定的世界。
|
||||
|
||||
```rust
|
||||
// 最终发给 Worker 的指令
|
||||
pub struct GenerateReportCommand {
|
||||
pub request_id: Uuid,
|
||||
pub commit_hash: String, // 锁定的世界状态
|
||||
|
||||
// 具体的 I/O 绑定 (由 Orchestrator 解析完毕)
|
||||
pub input_bindings: Vec<String>, // e.g., ["raw/tushare/AAPL/financials.json", ...]
|
||||
pub output_path: String, // e.g., "analysis/financial_v1/report.md"
|
||||
|
||||
// 分析逻辑 (透传给 Worker)
|
||||
pub analysis_prompt: String,
|
||||
pub llm_config: Option<LlmConfig>,
|
||||
}
|
||||
```
|
||||
|
||||
**变化点**:
|
||||
* **复杂的 Selection 逻辑上移**:原本打算放在 Worker 里的 `Select_Smart` 逻辑,现在看来更适合作为 Orchestrator 的预处理步骤(或者一个独立的微任务)。
|
||||
* **Worker 变轻**:Worker 变得非常“傻”,只负责 `Read(paths) -> Expand -> Prompt -> Write(output_path)`。这就实现了真正的“模块只关注核心任务”。
|
||||
* **灵活性保留**:如果是 Auto/Hybrid 模式,Orchestrator 会动态决定 Input Bindings;如果是 Manual 模式,则是静态规则解析。对 Worker 来说,它收到的永远是确定的文件列表。
|
||||
|
||||
## 3. 实施路线图 (Revised)
|
||||
|
||||
### Phase 1: 协议与配置 (Contracts)
|
||||
1. 定义 `AnalysisModuleConfig` (包含 Selector, Prompt, LlmConfig)。
|
||||
2. 定义 `GenerateReportCommand` (包含 `input_bindings` 物理路径列表, `output_path`, `commit_hash`)。
|
||||
|
||||
### Phase 2: Orchestrator Resolution Logic
|
||||
1. 实现 `ContextResolver` 组件:
|
||||
* 支持 Glob 解析 (Manual)。
|
||||
* (后续) 支持 LLM 目录树推理 (Auto/Hybrid)。
|
||||
2. 在调度循环中,在生成 Command 之前调用 `ContextResolver`。
|
||||
|
||||
### Phase 3: 模块改造 (Module Refactor)
|
||||
1. **Provider**: 接收 `output_path` (由 Orchestrator 按约定生成,如 `raw/{provider}/{symbol}`) 并写入。
|
||||
2. **Generator**:
|
||||
* 移除所有选择逻辑。
|
||||
* 直接读取 `cmd.input_bindings` 中的文件。
|
||||
* 执行 Expander (JSON->Table 等)。
|
||||
* 执行 Prompt。
|
||||
* 写入 `cmd.output_path`。
|
||||
|
||||
## 4. 总结
|
||||
这个方案完美融合了我们的讨论:
|
||||
* **Input/Output Symmetry**: 都在 Command 中明确绑定。
|
||||
* **Two-Stage**:
|
||||
* Stage 1 (Selection) 发生在 **Orchestration Time** (解析 Binding)。
|
||||
* Stage 2 (Analysis) 发生在 **Execution Time** (Worker 运行)。
|
||||
* **Module Focus**: 模块不需要知道“去哪找”,只知道“给我这些文件,我给你那个结果”。
|
||||
|
||||
## 5. 实施步骤清单 (Checklist)
|
||||
|
||||
### Phase 1: 协议与配置定义 (Contracts & Configs)
|
||||
- [x] **Common Contracts**: 在 `services/common-contracts/src` 创建或更新 `configs.rs`。
|
||||
- [x] 定义 `SelectionMode` (Manual, Auto, Hybrid)。
|
||||
- [x] 定义 `LlmConfig` (model_id, parameters)。
|
||||
- [x] 定义 `ContextSelectorConfig` (mode, rules, prompt, llm_config)。
|
||||
- [x] 定义 `AnalysisModuleConfig` (id, selector, analysis_prompt, llm_config, output_type)。
|
||||
- [x] **Messages**: 更新 `services/common-contracts/src/messages.rs`。
|
||||
- [x] `GenerateReportCommand`: 添加 `commit_hash`, `input_bindings: Vec<String>`, `output_path: String`, `llm_config`.
|
||||
- [x] `FetchCompanyDataCommand`: 添加 `output_path: Option<String>`.
|
||||
- [x] **VGCS Types**: 确保 `workflow-context` crate 中的类型足以支持路径操作。(Confirmed: Vgcs struct has methods)
|
||||
|
||||
### Phase 2: Orchestrator 改造 (Resolution Logic)
|
||||
- [x] **Context Resolver**: 在 `workflow-orchestrator-service` 中创建 `context_resolver.rs`。
|
||||
- [x] 实现 `resolve_input(selector, vgcs_client, commit_hash) -> Result<Vec<String>>`。
|
||||
- [x] 针对 `Manual` 模式:实现 Glob 匹配逻辑 (调用 VGCS `list_dir` 递归查找)。
|
||||
- [x] 针对 `Auto/Hybrid` 模式:(暂留接口) 返回 Empty 或 NotImplemented,后续接入 LLM。
|
||||
- [x] **IO Binder**: 实现 `io_binder.rs`。
|
||||
- [x] 实现 `allocate_output_path(task_type, task_id) -> String` 约定生成逻辑。
|
||||
- [x] **Scheduler**: 更新 `dag_scheduler.rs`。
|
||||
- [x] 在 dispatch 任务前,调用 `ContextResolver` 和 `IOBinder`。
|
||||
- [x] 将解析结果填入 Command。
|
||||
|
||||
### Phase 3: 写入端改造 (Provider Adaptation)
|
||||
- [x] **Tushare Provider**: 更新 `services/tushare-provider-service/src/generic_worker.rs`。
|
||||
- [x] 读取 Command 中的 `output_path` (如果存在)。
|
||||
- [x] 使用 `WorkerContext` 写入数据到指定路径 (不再硬编码 `raw/tushare/...`,而是信任 Command)。
|
||||
- [x] 提交并返回 New Commit Hash。
|
||||
|
||||
### Phase 4: 读取端改造 (Report Generator Adaptation)
|
||||
- [x] **Worker Refactor**: 重写 `services/report-generator-service/src/worker.rs`。
|
||||
- [x] **Remove**: 删除 `fetch_data_and_configs` (旧的 DB 读取逻辑)。
|
||||
- [x] **Checkout**: 使用 `vgcs.checkout(cmd.commit_hash)`。
|
||||
- [x] **Read Input**: 遍历 `cmd.input_bindings`,使用 `vgcs.read_file` 读取内容。
|
||||
- [x] **Expand**: 实现简单 `Expander` (JSON -> Markdown Table)。
|
||||
- [x] **Prompt**: 渲染 `cmd.analysis_prompt`。
|
||||
- [x] **LLM Call**: 使用 `cmd.llm_config` 初始化 Client 并调用。
|
||||
- [x] **Write Output**: 将结果写入 `cmd.output_path`。
|
||||
- [x] **Commit**: 提交更改并广播 Event。
|
||||
|
||||
### Phase 5: 集成与验证 (Integration)
|
||||
- [x] **Config Migration**: 更新 `config/analysis-config.json` (或 DB 中的配置),适配新的 `AnalysisModuleConfig` 结构。
|
||||
- [ ] **End-to-End Test**: 运行完整流程,验证:
|
||||
1. Provider 写文件到 Git。
|
||||
2. Orchestrator 解析路径。
|
||||
3. Generator 读文件并生成报告。
|
||||
71
docs/tasks/completed/20251128_refactor_worker_generic.md
Normal file
@ -0,0 +1,71 @@
|
||||
# 任务:重构 Report Worker 为通用执行器 (Generic Execution)
|
||||
|
||||
**状态**: 规划中 -> 实施准备中
|
||||
**优先级**: 高
|
||||
**相关组件**: `report-generator-service`, `common-contracts`
|
||||
|
||||
## 1. 问题背景
|
||||
|
||||
当前的 `report-generator-service/src/worker.rs` 存在严重的设计缺陷:**业务逻辑泄露**。
|
||||
|
||||
Worker 代码中硬编码了对 `financials.json` 的特殊处理逻辑(反序列化 `TimeSeriesFinancialDto` 并转换为 Markdown Table)。这导致 Worker 不再是一个通用的分析执行器,而是与特定的财务分析业务强耦合。这违背了系统设计的初衷,即 Worker 应该只负责通用的 `IO -> Context Assembly -> LLM` 流程。
|
||||
|
||||
## 2. 目标
|
||||
|
||||
将 Worker 彻底重构为 **Generic Analysis Worker**。它不应该知道什么是 "Financials",什么是 "Profile"。它只知道:
|
||||
1. 我有输入文件(JSON, Text, etc.)。
|
||||
2. 我需要把它们转换成 Prompt Context(优先对人类可读,如 YAML)。
|
||||
3. 我调用 LLM。
|
||||
4. 我写入结果。
|
||||
|
||||
## 3. 核心变更点
|
||||
|
||||
### 3.1 移除硬编码的 DTO 解析
|
||||
* **彻底删除** `worker.rs` 中所有关于 `TimeSeriesFinancialDto` 的引用。Worker 不应该知道任何业务特定的数据结构。
|
||||
* 删除 `formatter.rs` 中专门针对 Financials 的表格生成逻辑。
|
||||
|
||||
### 3.2 通用格式化策略 (Generic Formatter) -> YAML First
|
||||
我们需要一种通用的方式来将结构化数据展示给 LLM,同时兼顾人类调试时的可读性。
|
||||
|
||||
**方案: YAML Pretty Print (首选)**
|
||||
* **理由**:YAML 相比 JSON 更干净,没有大量的括号和引号,人类阅读体验更好。LLM 对 YAML 的理解能力也很好。既然我们目前处于开发调试阶段,**人类可读性 (Human Readability)** 优于极致的 Token 效率。
|
||||
* **策略**:
|
||||
* 尝试将输入文件内容解析为 JSON Value。
|
||||
* 如果成功,将其转换为 **YAML** 字符串。
|
||||
* 如果解析失败(非结构化文本),则保持原样 (Raw Text)。
|
||||
* **Context 结构**:避免使用 XML Tags,采用更直观的分隔符。
|
||||
```yaml
|
||||
---
|
||||
# Data Source: financials.json (Original Size: 1.2MB)
|
||||
data:
|
||||
- date: 2023-12-31
|
||||
revenue: 10000
|
||||
...
|
||||
```
|
||||
|
||||
### 3.3 增强的 Execution Trace 与截断策略
|
||||
* **Sidecar Log**: 必须记录详细的执行过程。
|
||||
* **截断策略 (Truncation)**:
|
||||
* 保留字符级截断作为最后的安全防线 (Safety Net)。
|
||||
* **Critical Logging**: 一旦发生截断,必须在 Log 中留下醒目的警告。
|
||||
* **详细信息**: 必须记录“截断前大小” vs “截断后大小”(例如:`Original: 1MB, Truncated to: 64KB`),让开发者清楚意识到数据丢失的程度。
|
||||
|
||||
## 4. 实施步骤
|
||||
|
||||
1. **Cleanup**: 移除 `worker.rs` 和 `formatter.rs` 中所有特定业务 DTO 的代码。
|
||||
2. **Generic Implementation**:
|
||||
* 引入 `serde_yaml` 依赖。
|
||||
* 实现通用的 `context_builder`:
|
||||
* Input -> `serde_json::Value` -> `serde_yaml::to_string`.
|
||||
* Fallback: Raw Text.
|
||||
* 组装 Context String。
|
||||
3. **Safety & Logging**:
|
||||
* 实现截断逻辑,计算 `original_size` 和 `truncated_size`。
|
||||
* 在 `execution_trace.md` 中记录详细的文件处理情况。
|
||||
4. **Verify**: 运行测试,查看生成的 Context 是否清晰易读。
|
||||
|
||||
## 5. 预期效果
|
||||
|
||||
* **解耦**: 彻底切断 Worker 与 Financial Domain 的耦合。
|
||||
* **直观**: Context 变得像配置文件一样易读,方便人工 Review LLM 的输入。
|
||||
* **透明**: 明确知道哪些数据喂给了 LLM,哪些被截断了。
|
||||
@ -0,0 +1,83 @@
|
||||
# 任务:重构历史记录与上下文管理(破坏性拆除与重建)
|
||||
|
||||
## 1. 目标
|
||||
彻底移除旧的 `analysis_results` 表及其相关基础设施,建立基于 `workflow_history` + `VGCS` 的新一代历史记录与上下文管理系统。
|
||||
|
||||
## 2. 执行策略
|
||||
**破坏性拆除 (Destructive Refactoring)**:直接删除数据库表定义,依靠编译器报错和搜索工具定位并清除所有相关代码,确保不留死代码。
|
||||
|
||||
## 3. 详细步骤
|
||||
|
||||
### 阶段一:破坏性拆除 (Demolition)
|
||||
|
||||
1. **数据库层**
|
||||
* 删除 `analysis_results` 表的 SQL 定义 (Migration)。
|
||||
* 创建新的 Migration:`DROP TABLE analysis_results;`。
|
||||
* 删除 `AnalysisResult` 相关的 Model 定义 (`services/data-persistence-service/src/models.rs`, `common-contracts`).
|
||||
|
||||
2. **持久化服务层 (Data Persistence Service)**
|
||||
* 删除 `src/api/analysis.rs` (API Handler)。
|
||||
* 删除 `src/db/analysis_results.rs` (DB Access)。
|
||||
* 清理 `src/api/mod.rs` 路由注册。
|
||||
|
||||
3. **公共契约层 (Common Contracts)**
|
||||
* 删除 `AnalysisResultDto`, `NewAnalysisResult` 等 DTO。
|
||||
* 删除 `PersistenceClient` 中的 `create_analysis_result`, `get_analysis_results` 方法。
|
||||
|
||||
4. **生产者层 (Report Generator Service)**
|
||||
* 移除 `worker.rs` 中调用 `create_analysis_result` 的代码。
|
||||
* **保留**:文件写入 VGCS 的逻辑(这是我们新方案的基础)。
|
||||
|
||||
5. **网关与前端层 (API Gateway & Frontend)**
|
||||
* 移除 API Gateway 中 `/analysis-results` 的转发。
|
||||
* 前端相关调用代码暂时注释或标记为 TODO (待对接新接口)。
|
||||
|
||||
### 阶段二:基础设施重建 (Reconstruction)
|
||||
|
||||
1. **数据库层**
|
||||
* 新建 `workflow_history` 表。
|
||||
* Schema 定义:
|
||||
```sql
|
||||
CREATE TABLE workflow_history (
|
||||
request_id UUID PRIMARY KEY,
|
||||
symbol VARCHAR(20) NOT NULL,
|
||||
market VARCHAR(10) NOT NULL,
|
||||
template_id VARCHAR(50),
|
||||
status VARCHAR(20) NOT NULL,
|
||||
start_time TIMESTAMPTZ NOT NULL,
|
||||
end_time TIMESTAMPTZ,
|
||||
snapshot_data JSONB NOT NULL -- 包含 DAG 结构、Commit Hash、Artifact Paths
|
||||
);
|
||||
```
|
||||
|
||||
2. **公共契约层 (Common Contracts)**
|
||||
* 定义 `WorkflowHistoryDto` 和 `WorkflowSnapshot` 结构。
|
||||
* 更新 `PersistenceClient`,增加 `create_workflow_history` 和 `get_workflow_history` 方法。
|
||||
|
||||
3. **持久化服务层 (Data Persistence Service)**
|
||||
* 实现 `src/api/history.rs` 和 `src/db/history.rs`。
|
||||
* 支持按 `request_id` 查询详情,按 `symbol`/`template_id` 查询列表。
|
||||
|
||||
### 阶段三:编排与集成 (Orchestration & Integration)
|
||||
|
||||
1. **Orchestrator Service**
|
||||
* **收集逻辑**:修改 `handle_task_completed`,从 Task Result 中收集 `artifact_paths` 和 `commit_hash`。
|
||||
* **结束逻辑**:实现 `finalize_workflow`。
|
||||
* 当工作流结束时,构建 `WorkflowSnapshot`。
|
||||
* 调用 `persistence-service` 写入 `workflow_history`。
|
||||
|
||||
2. **Frontend**
|
||||
* 重写 `useHistory` Hook,对接 `/api/v1/history`。
|
||||
* 重写 `ReportPage`,使用 Snapshot 中的 Commit Hash + Path 通过 VGCS API 获取报告内容。
|
||||
|
||||
## 4. 验证标准
|
||||
1. **编译通过**:所有服务无 `analysis_results` 相关报错。
|
||||
2. **数据纯净**:数据库中无 `analysis_results` 表,只有 `workflow_history`。
|
||||
3. **功能正常**:
|
||||
* 运行一个 Workflow,能在“历史记录”页看到**一条**记录。
|
||||
* 点击进入详情,能正确加载各步骤的报告文件(从 VGCS)。
|
||||
|
||||
## 5. 注意事项
|
||||
* 这是一个 Breaking Change,执行期间历史数据会不可见(直到前端适配新接口)。
|
||||
* VGCS 的读取接口 (`read_file`) 需要确保可用性,供前端/网关调用。
|
||||
|
||||
57
frontend/.gitignore
vendored
@ -1,43 +1,24 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
/src/generated/prisma
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
|
||||
@ -1,22 +1,21 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
FROM node:20-slim AS base
|
||||
ENV PNPM_HOME="/pnpm"
|
||||
ENV PATH="$PNPM_HOME:$PATH"
|
||||
RUN corepack enable
|
||||
|
||||
FROM node:20-alpine AS base
|
||||
FROM base AS deps
|
||||
WORKDIR /app
|
||||
COPY package.json package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
ENV NODE_ENV=development \
|
||||
NEXT_TELEMETRY_DISABLED=1 \
|
||||
CI=false
|
||||
FROM base AS runner
|
||||
WORKDIR /app
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
COPY . .
|
||||
|
||||
WORKDIR /workspace/frontend
|
||||
|
||||
# 仅复制依赖清单,最大化利用缓存
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
|
||||
# 使用 npm ci(若失败则回退 npm install,避免镜像构建被锁文件问题卡住)
|
||||
RUN npm ci || npm install
|
||||
|
||||
# 运行时通过挂载卷提供源码
|
||||
RUN mkdir -p /workspace/frontend
|
||||
|
||||
# 缺省入口由 docker-compose 提供
|
||||
# Expose port 3001
|
||||
EXPOSE 3001
|
||||
|
||||
# Start dev server by default (overridden by docker-compose)
|
||||
CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "3001"]
|
||||
|
||||
|
||||
73
frontend/README.md
Normal file
@ -0,0 +1,73 @@
|
||||
# React + TypeScript + Vite
|
||||
|
||||
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
||||
|
||||
Currently, two official plugins are available:
|
||||
|
||||
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
|
||||
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
|
||||
|
||||
## React Compiler
|
||||
|
||||
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
|
||||
|
||||
## Expanding the ESLint configuration
|
||||
|
||||
If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
|
||||
|
||||
```js
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
extends: [
|
||||
// Other configs...
|
||||
|
||||
// Remove tseslint.configs.recommended and replace with this
|
||||
tseslint.configs.recommendedTypeChecked,
|
||||
// Alternatively, use this for stricter rules
|
||||
tseslint.configs.strictTypeChecked,
|
||||
// Optionally, add this for stylistic rules
|
||||
tseslint.configs.stylisticTypeChecked,
|
||||
|
||||
// Other configs...
|
||||
],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
project: ['./tsconfig.node.json', './tsconfig.app.json'],
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
},
|
||||
// other options...
|
||||
},
|
||||
},
|
||||
])
|
||||
```
|
||||
|
||||
You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
|
||||
|
||||
```js
|
||||
// eslint.config.js
|
||||
import reactX from 'eslint-plugin-react-x'
|
||||
import reactDom from 'eslint-plugin-react-dom'
|
||||
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
extends: [
|
||||
// Other configs...
|
||||
// Enable lint rules for React
|
||||
reactX.configs['recommended-typescript'],
|
||||
// Enable lint rules for React DOM
|
||||
reactDom.configs.recommended,
|
||||
],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
project: ['./tsconfig.node.json', './tsconfig.app.json'],
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
},
|
||||
// other options...
|
||||
},
|
||||
},
|
||||
])
|
||||
```
|
||||
@ -14,8 +14,17 @@ const nextConfig = {
|
||||
proxyTimeout: 300000, // 300 seconds (5 minutes)
|
||||
},
|
||||
// Optimize for Docker deployment only in production
|
||||
// 当 NODE_ENV 为 production 时开启 standalone 模式
|
||||
output: process.env.NODE_ENV === 'production' ? 'standalone' : undefined,
|
||||
|
||||
async rewrites() {
|
||||
const apiUrl = process.env.API_GATEWAY_URL || 'http://api-gateway:4000';
|
||||
return [
|
||||
{
|
||||
source: '/api/:path*',
|
||||
destination: `${apiUrl}/v1/:path*`,
|
||||
},
|
||||
];
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
4085
frontend/archive/20251122_backup/package-lock.json
generated
Normal file
@ -35,8 +35,10 @@ export function AnalysisContent({
|
||||
const contentWithoutTitle = removeTitleFromContent(state.content, analysisName);
|
||||
const normalizedContent = normalizeMarkdown(contentWithoutTitle);
|
||||
|
||||
const isGenerating = state.loading;
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="space-y-4 relative">
|
||||
<h2 className="text-lg font-medium">{analysisName}(来自 {modelName || 'AI'})</h2>
|
||||
|
||||
{!financials && (
|
||||
@ -64,16 +66,16 @@ export function AnalysisContent({
|
||||
: '待开始'}
|
||||
</div>
|
||||
</div>
|
||||
{/* 始终可见的"重新生成分析"按钮 */}
|
||||
{/* 重新生成按钮 */}
|
||||
{!state.loading && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={() => retryAnalysis(analysisType)}
|
||||
disabled={currentAnalysisTask !== null}
|
||||
disabled={currentAnalysisTask !== null || isGenerating}
|
||||
>
|
||||
<RotateCw className="size-4" />
|
||||
重新生成分析
|
||||
<RotateCw className={`size-4 ${isGenerating ? 'animate-spin' : ''}`} />
|
||||
{isGenerating ? '生成中...' : '重新生成分析'}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
@ -82,31 +84,43 @@ export function AnalysisContent({
|
||||
<p className="text-red-500">加载失败: {state.error}</p>
|
||||
)}
|
||||
|
||||
{(state.loading || state.content) && (
|
||||
<div className="space-y-4">
|
||||
<div className="border rounded-lg p-6 bg-card">
|
||||
<article className="markdown-body" style={{
|
||||
boxSizing: 'border-box',
|
||||
minWidth: '200px',
|
||||
maxWidth: '980px',
|
||||
margin: '0 auto',
|
||||
padding: '0'
|
||||
}}>
|
||||
<ReactMarkdown
|
||||
remarkPlugins={[remarkGfm]}
|
||||
>
|
||||
{normalizedContent}
|
||||
</ReactMarkdown>
|
||||
{state.loading && (
|
||||
<span className="inline-flex items-center gap-2 mt-2 text-muted-foreground">
|
||||
<Spinner className="size-3" />
|
||||
<span className="text-sm">正在生成中...</span>
|
||||
</span>
|
||||
)}
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Content Area with Overlay */}
|
||||
<div className="relative min-h-[200px]">
|
||||
{/* Overlay when generating */}
|
||||
{isGenerating && (
|
||||
<div className="absolute inset-0 bg-background/80 backdrop-blur-sm z-10 flex flex-col items-center justify-center space-y-4 rounded-lg border">
|
||||
<Spinner className="size-8 text-primary" />
|
||||
<p className="text-sm font-medium text-muted-foreground animate-pulse">
|
||||
正在深入分析财务数据,请稍候...
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Existing Content or Placeholder */}
|
||||
{state.content ? (
|
||||
<div className="space-y-4">
|
||||
<div className="border rounded-lg p-6 bg-card">
|
||||
<article className="markdown-body" style={{
|
||||
boxSizing: 'border-box',
|
||||
minWidth: '200px',
|
||||
maxWidth: '980px',
|
||||
margin: '0 auto',
|
||||
padding: '0'
|
||||
}}>
|
||||
<ReactMarkdown
|
||||
remarkPlugins={[remarkGfm]}
|
||||
>
|
||||
{normalizedContent}
|
||||
</ReactMarkdown>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
) : !isGenerating && (
|
||||
<div className="flex items-center justify-center h-full text-muted-foreground border rounded-lg p-12 border-dashed">
|
||||
暂无分析内容,请点击生成。
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
303
frontend/archive/v1_report/useAnalysisRunner.ts
Normal file
@ -0,0 +1,303 @@
|
||||
import { useState, useRef, useEffect, useMemo } from 'react';
|
||||
import { useDataRequest, useTaskProgress, useAnalysisResults } from '@/hooks/useApi';
|
||||
|
||||
interface AnalysisState {
|
||||
content: string;
|
||||
loading: boolean;
|
||||
error: string | null;
|
||||
elapsed_ms?: number;
|
||||
}
|
||||
|
||||
interface AnalysisRecord {
|
||||
type: string;
|
||||
name: string;
|
||||
status: 'pending' | 'running' | 'done' | 'error';
|
||||
start_ts?: string;
|
||||
end_ts?: string;
|
||||
duration_ms?: number;
|
||||
tokens?: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export function useAnalysisRunner(
|
||||
financials: any,
|
||||
financialConfig: any,
|
||||
normalizedMarket: string,
|
||||
unifiedSymbol: string,
|
||||
isLoading: boolean,
|
||||
error: any,
|
||||
templateSets: any // Added templateSets
|
||||
) {
|
||||
// --- Template Logic ---
|
||||
const [selectedTemplateId, setSelectedTemplateId] = useState<string>('');
|
||||
const reportTemplateId = financials?.meta?.template_id;
|
||||
|
||||
// Sync selected template with report template when report loads
|
||||
useEffect(() => {
|
||||
if (reportTemplateId) {
|
||||
setSelectedTemplateId(reportTemplateId);
|
||||
}
|
||||
}, [reportTemplateId]);
|
||||
|
||||
// Set default template if nothing selected and no report template
|
||||
useEffect(() => {
|
||||
if (!selectedTemplateId && !reportTemplateId && templateSets && Object.keys(templateSets).length > 0) {
|
||||
const defaultId = Object.keys(templateSets).find(k => k.includes('standard') || k === 'default') || Object.keys(templateSets)[0];
|
||||
setSelectedTemplateId(defaultId);
|
||||
}
|
||||
}, [templateSets, selectedTemplateId, reportTemplateId]);
|
||||
|
||||
// Determine active template set
|
||||
const activeTemplateId = selectedTemplateId;
|
||||
|
||||
const activeTemplateSet = useMemo(() => {
|
||||
if (!activeTemplateId || !templateSets) return null;
|
||||
return templateSets[activeTemplateId] || null;
|
||||
}, [activeTemplateId, templateSets]);
|
||||
|
||||
// Derive effective analysis config from template set, falling back to global config if needed
|
||||
const activeAnalysisConfig = useMemo(() => {
|
||||
if (activeTemplateSet) {
|
||||
return {
|
||||
...financialConfig,
|
||||
analysis_modules: activeTemplateSet.modules,
|
||||
};
|
||||
}
|
||||
return financialConfig; // Fallback to global config (legacy behavior)
|
||||
}, [activeTemplateSet, financialConfig]);
|
||||
|
||||
// 分析类型列表
|
||||
const analysisTypes = useMemo(() => {
|
||||
if (!activeAnalysisConfig?.analysis_modules) return [];
|
||||
return Object.keys(activeAnalysisConfig.analysis_modules);
|
||||
}, [activeAnalysisConfig]);
|
||||
|
||||
// 分析状态管理
|
||||
const [analysisStates, setAnalysisStates] = useState<Record<string, AnalysisState>>({});
|
||||
|
||||
const fullAnalysisTriggeredRef = useRef<boolean>(false);
|
||||
const isAnalysisRunningRef = useRef<boolean>(false);
|
||||
const analysisFetchedRefs = useRef<Record<string, boolean>>({});
|
||||
const stopRequestedRef = useRef<boolean>(false);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
const currentAnalysisTypeRef = useRef<string | null>(null);
|
||||
const [manualRunKey, setManualRunKey] = useState(0);
|
||||
|
||||
// 当前正在执行的分析任务
|
||||
const [currentAnalysisTask, setCurrentAnalysisTask] = useState<string | null>(null);
|
||||
|
||||
// 计时器状态
|
||||
const [startTime, setStartTime] = useState<number | null>(null);
|
||||
const [elapsedSeconds, setElapsedSeconds] = useState(0);
|
||||
|
||||
// 分析执行记录
|
||||
const [analysisRecords, setAnalysisRecords] = useState<AnalysisRecord[]>([]);
|
||||
|
||||
// 新架构:触发分析与查看任务进度
|
||||
const { trigger: triggerAnalysisRequest, isMutating: triggering } = useDataRequest();
|
||||
const [requestId, setRequestId] = useState<string | null>(null);
|
||||
const { progress: taskProgress } = useTaskProgress(requestId);
|
||||
|
||||
// 引入 Analysis Results 轮询
|
||||
const { data: newAnalysisResults } = useAnalysisResults(unifiedSymbol);
|
||||
|
||||
// 1. Determine the Active Request ID (The one we want to display)
|
||||
const activeRequestId = useMemo(() => {
|
||||
// If the user manually triggered a task in this session, prioritize that
|
||||
if (requestId) return requestId;
|
||||
|
||||
// Otherwise, default to the most recent result's request_id from the backend
|
||||
// Assuming newAnalysisResults is sorted by created_at DESC
|
||||
if (newAnalysisResults && newAnalysisResults.length > 0) {
|
||||
return newAnalysisResults[0].request_id;
|
||||
}
|
||||
return null;
|
||||
}, [requestId, newAnalysisResults]);
|
||||
|
||||
// 2. Filter results for the current batch
|
||||
const currentBatchResults = useMemo(() => {
|
||||
if (!newAnalysisResults || !activeRequestId) return [];
|
||||
return newAnalysisResults.filter(r => r.request_id === activeRequestId);
|
||||
}, [newAnalysisResults, activeRequestId]);
|
||||
|
||||
// 3. Sync analysisStates (Content) from current batch
|
||||
// We only update if we have a result for that module in the current batch.
|
||||
// If not, we leave it as is (or could clear it if we wanted strict mode).
|
||||
// For now, we'll update based on what we find.
|
||||
useEffect(() => {
|
||||
if (!currentBatchResults) return;
|
||||
|
||||
setAnalysisStates(prev => {
|
||||
const next = { ...prev };
|
||||
let hasChanges = false;
|
||||
|
||||
currentBatchResults.forEach(result => {
|
||||
const type = result.module_id;
|
||||
const status = result.meta_data?.status || 'success';
|
||||
const content = result.content;
|
||||
|
||||
const currentState = next[type];
|
||||
|
||||
// Only update if content changed or status changed
|
||||
if (
|
||||
!currentState ||
|
||||
currentState.content !== content ||
|
||||
(status === 'processing' && !currentState.loading) ||
|
||||
(status === 'success' && currentState.loading) ||
|
||||
(status === 'error' && !currentState.error)
|
||||
) {
|
||||
next[type] = {
|
||||
content: content,
|
||||
loading: status === 'processing',
|
||||
error: status === 'error' ? result.meta_data?.error || 'Unknown error' : null,
|
||||
};
|
||||
hasChanges = true;
|
||||
}
|
||||
});
|
||||
return hasChanges ? next : prev;
|
||||
});
|
||||
}, [currentBatchResults]);
|
||||
|
||||
// 4. Sync analysisRecords (Execution Details) from current batch
|
||||
// This ensures Execution Details only shows the relevant modules for the current run.
|
||||
useEffect(() => {
|
||||
if (!currentBatchResults) return;
|
||||
|
||||
// If we are starting a new run (triggered), we might want to reset records initially?
|
||||
// But currentBatchResults will eventually populate.
|
||||
|
||||
const records: AnalysisRecord[] = currentBatchResults.map(r => {
|
||||
const statusStr = r.meta_data?.status;
|
||||
let status: 'pending' | 'running' | 'done' | 'error' = 'done';
|
||||
if (statusStr === 'processing') status = 'running';
|
||||
else if (statusStr === 'error') status = 'error';
|
||||
|
||||
return {
|
||||
type: r.module_id,
|
||||
name: activeAnalysisConfig?.analysis_modules?.[r.module_id]?.name || r.module_id,
|
||||
status: status,
|
||||
duration_ms: r.meta_data?.elapsed_ms, // Backend needs to provide this in meta_data
|
||||
error: r.meta_data?.error,
|
||||
tokens: r.meta_data?.tokens // Backend needs to provide this
|
||||
};
|
||||
});
|
||||
|
||||
// Sort records to match the defined order in activeAnalysisConfig if possible
|
||||
const definedOrder = Object.keys(activeAnalysisConfig?.analysis_modules || {});
|
||||
records.sort((a, b) => {
|
||||
const idxA = definedOrder.indexOf(a.type);
|
||||
const idxB = definedOrder.indexOf(b.type);
|
||||
if (idxA === -1) return 1;
|
||||
if (idxB === -1) return -1;
|
||||
return idxA - idxB;
|
||||
});
|
||||
|
||||
setAnalysisRecords(records);
|
||||
}, [currentBatchResults, activeAnalysisConfig]);
|
||||
|
||||
|
||||
// 计算完成比例
|
||||
const completionProgress = useMemo(() => {
|
||||
const totalTasks = analysisRecords.length;
|
||||
if (totalTasks === 0) return 0;
|
||||
const completedTasks = analysisRecords.filter(r => r.status === 'done' || r.status === 'error').length;
|
||||
return (completedTasks / totalTasks) * 100;
|
||||
}, [analysisRecords]);
|
||||
|
||||
// 总耗时(ms)
|
||||
const totalElapsedMs = useMemo(() => {
|
||||
const finMs = financials?.meta?.elapsed_ms || 0;
|
||||
const analysesMs = analysisRecords.reduce((sum, r) => sum + (r.duration_ms || 0), 0);
|
||||
return finMs + analysesMs;
|
||||
}, [financials?.meta?.elapsed_ms, analysisRecords]);
|
||||
|
||||
const hasRunningTask = useMemo(() => {
|
||||
if (currentAnalysisTask !== null) return true;
|
||||
// Also check analysisRecords derived from backend
|
||||
if (analysisRecords.some(r => r.status === 'running')) return true;
|
||||
return false;
|
||||
}, [currentAnalysisTask, analysisRecords]);
|
||||
|
||||
// 全部任务是否完成
|
||||
const allTasksCompleted = useMemo(() => {
|
||||
if (analysisRecords.length === 0) return false;
|
||||
const allDoneOrErrored = analysisRecords.every(r => r.status === 'done' || r.status === 'error');
|
||||
return allDoneOrErrored && !hasRunningTask && currentAnalysisTask === null;
|
||||
}, [analysisRecords, hasRunningTask, currentAnalysisTask]);
|
||||
|
||||
// 所有任务完成时,停止计时器
|
||||
useEffect(() => {
|
||||
if (allTasksCompleted) {
|
||||
setStartTime(null);
|
||||
}
|
||||
}, [allTasksCompleted]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!startTime) return;
|
||||
const interval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
const elapsed = Math.floor((now - startTime) / 1000);
|
||||
setElapsedSeconds(elapsed);
|
||||
}, 1000);
|
||||
return () => clearInterval(interval);
|
||||
}, [startTime]);
|
||||
|
||||
const retryAnalysis = async (analysisType: string) => {
|
||||
// Retry logic is complicated with the new backend-driven approach.
|
||||
// Ideally, we should send a backend command to retry a specific module.
|
||||
// For now, we can just re-trigger the whole template or alert the user.
|
||||
// Or implementation TODO: Single module retry endpoint.
|
||||
alert("单个模块重试功能在新架构中尚未就绪,请重新触发完整分析。");
|
||||
};
|
||||
|
||||
const stopAll = () => {
|
||||
// Clean up client-side state
|
||||
stopRequestedRef.current = true;
|
||||
isAnalysisRunningRef.current = false;
|
||||
setStartTime(null);
|
||||
// Ideally call backend to cancel job
|
||||
};
|
||||
|
||||
const continuePending = () => {
|
||||
// No-op in new architecture basically
|
||||
};
|
||||
|
||||
const triggerAnalysis = async () => {
|
||||
const reqId = await triggerAnalysisRequest(unifiedSymbol, normalizedMarket || '', selectedTemplateId);
|
||||
if (reqId) {
|
||||
setRequestId(reqId);
|
||||
setStartTime(Date.now()); // Start timer
|
||||
// Reset records to empty or wait for poll?
|
||||
// Waiting for poll is safer to avoid flashing old data
|
||||
setAnalysisRecords([]);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
activeAnalysisConfig, // Exported
|
||||
analysisTypes,
|
||||
analysisStates,
|
||||
analysisRecords,
|
||||
currentAnalysisTask,
|
||||
triggerAnalysis,
|
||||
triggering,
|
||||
requestId,
|
||||
setRequestId,
|
||||
taskProgress,
|
||||
startTime,
|
||||
elapsedSeconds,
|
||||
completionProgress,
|
||||
totalElapsedMs,
|
||||
stopAll,
|
||||
continuePending,
|
||||
retryAnalysis,
|
||||
hasRunningTask,
|
||||
isAnalysisRunning: hasRunningTask, // Simplified
|
||||
selectedTemplateId, // Exported
|
||||
setSelectedTemplateId, // Exported
|
||||
};
|
||||
}
|
||||
43
frontend/archive/v2_nextjs/.gitignore
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
/src/generated/prisma
|
||||
22
frontend/archive/v2_nextjs/Dockerfile
Normal file
@ -0,0 +1,22 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
FROM node:20-alpine AS base
|
||||
|
||||
ENV NODE_ENV=development \
|
||||
NEXT_TELEMETRY_DISABLED=1 \
|
||||
CI=false
|
||||
|
||||
WORKDIR /workspace/frontend
|
||||
|
||||
# 仅复制依赖清单,最大化利用缓存
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
|
||||
# 使用 npm ci(若失败则回退 npm install,避免镜像构建被锁文件问题卡住)
|
||||
RUN npm ci || npm install
|
||||
|
||||
# 运行时通过挂载卷提供源码
|
||||
RUN mkdir -p /workspace/frontend
|
||||
|
||||
# 缺省入口由 docker-compose 提供
|
||||
|
||||
|
||||
22
frontend/archive/v2_nextjs/components.json
Normal file
@ -0,0 +1,22 @@
|
||||
{
|
||||
"$schema": "https://ui.shadcn.com/schema.json",
|
||||
"style": "new-york",
|
||||
"rsc": true,
|
||||
"tsx": true,
|
||||
"tailwind": {
|
||||
"config": "",
|
||||
"css": "src/app/globals.css",
|
||||
"baseColor": "neutral",
|
||||
"cssVariables": true,
|
||||
"prefix": ""
|
||||
},
|
||||
"iconLibrary": "lucide",
|
||||
"aliases": {
|
||||
"components": "@/components",
|
||||
"utils": "@/lib/utils",
|
||||
"ui": "@/components/ui",
|
||||
"lib": "@/lib",
|
||||
"hooks": "@/hooks"
|
||||
},
|
||||
"registries": {}
|
||||
}
|
||||
30
frontend/archive/v2_nextjs/next.config.mjs
Normal file
@ -0,0 +1,30 @@
|
||||
import { fileURLToPath } from 'url';
|
||||
import path from 'path';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
// Explicitly set Turbopack root to this frontend directory to silence multi-lockfile warning
|
||||
turbopack: {
|
||||
root: __dirname,
|
||||
},
|
||||
// Increase server timeout for long-running AI requests
|
||||
experimental: {
|
||||
proxyTimeout: 300000, // 300 seconds (5 minutes)
|
||||
},
|
||||
// Optimize for Docker deployment only in production
|
||||
output: process.env.NODE_ENV === 'production' ? 'standalone' : undefined,
|
||||
|
||||
async rewrites() {
|
||||
const apiUrl = process.env.API_GATEWAY_URL || 'http://api-gateway:4000';
|
||||
return [
|
||||
{
|
||||
source: '/api/:path*',
|
||||
destination: `${apiUrl}/v1/:path*`,
|
||||
},
|
||||
];
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
8931
frontend/archive/v2_nextjs/package-lock.json
generated
Normal file
45
frontend/archive/v2_nextjs/package.json
Normal file
@ -0,0 +1,45 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "NODE_NO_WARNINGS=1 next dev -p 3001",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "eslint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@radix-ui/react-checkbox": "^1.3.3",
|
||||
"@radix-ui/react-navigation-menu": "^1.2.14",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-slot": "^1.2.3",
|
||||
"@radix-ui/react-tabs": "^1.1.13",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"geist": "^1.5.1",
|
||||
"github-markdown-css": "^5.8.1",
|
||||
"lucide-react": "^0.545.0",
|
||||
"next": "15.5.5",
|
||||
"react": "19.1.0",
|
||||
"react-dom": "19.1.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"recharts": "^3.3.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"swr": "^2.3.6",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"zustand": "^5.0.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3",
|
||||
"@tailwindcss/postcss": "^4",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "15.5.5",
|
||||
"tailwindcss": "^4",
|
||||
"tw-animate-css": "^1.4.0",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 391 B After Width: | Height: | Size: 391 B |
|
Before Width: | Height: | Size: 1.0 KiB After Width: | Height: | Size: 1.0 KiB |
|
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
|
Before Width: | Height: | Size: 128 B After Width: | Height: | Size: 128 B |
|
Before Width: | Height: | Size: 385 B After Width: | Height: | Size: 385 B |
43
frontend/archive/v2_nextjs/scripts/docker-dev-entrypoint.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/bin/sh
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_DIR="${PROJECT_DIR:-/workspace/frontend}"
|
||||
LOCKFILE="${PROJECT_DIR}/package-lock.json"
|
||||
NODE_MODULES_DIR="${PROJECT_DIR}/node_modules"
|
||||
HASH_FILE="${NODE_MODULES_DIR}/.package-lock.hash"
|
||||
DEV_COMMAND="${DEV_COMMAND:-npm run dev}"
|
||||
|
||||
cd "${PROJECT_DIR}"
|
||||
|
||||
calculate_lock_hash() {
|
||||
sha256sum "${LOCKFILE}" | awk '{print $1}'
|
||||
}
|
||||
|
||||
write_hash() {
|
||||
calculate_lock_hash > "${HASH_FILE}"
|
||||
}
|
||||
|
||||
install_dependencies() {
|
||||
echo "[frontend] 安装/更新依赖..."
|
||||
npm ci
|
||||
write_hash
|
||||
}
|
||||
|
||||
if [ ! -d "${NODE_MODULES_DIR}" ]; then
|
||||
install_dependencies
|
||||
elif [ ! -f "${HASH_FILE}" ]; then
|
||||
install_dependencies
|
||||
else
|
||||
current_hash="$(calculate_lock_hash)"
|
||||
installed_hash="$(cat "${HASH_FILE}" 2>/dev/null || true)"
|
||||
|
||||
if [ "${current_hash}" != "${installed_hash}" ]; then
|
||||
echo "[frontend] package-lock.json 发生变化,重新安装依赖..."
|
||||
install_dependencies
|
||||
else
|
||||
echo "[frontend] 依赖哈希一致,跳过 npm ci。"
|
||||
fi
|
||||
fi
|
||||
|
||||
exec ${DEV_COMMAND}
|
||||
|
||||
@ -0,0 +1,59 @@
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
export const dynamic = 'force-dynamic';
|
||||
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
{ params }: { params: Promise<{ requestId: string }> }
|
||||
) {
|
||||
const { requestId } = await params;
|
||||
|
||||
// Use container internal URL if available, otherwise fallback
|
||||
const backendUrl = process.env.BACKEND_INTERNAL_URL || 'http://api-gateway:4000/v1';
|
||||
const targetUrl = `${backendUrl}/workflow/events/${requestId}`;
|
||||
|
||||
console.log(`[API Route] Proxying SSE for request ${requestId} to ${targetUrl}`);
|
||||
|
||||
try {
|
||||
const response = await fetch(targetUrl, {
|
||||
headers: {
|
||||
'Accept': 'text/event-stream',
|
||||
},
|
||||
cache: 'no-store',
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`[API Route] Upstream error: ${response.status} ${response.statusText}`);
|
||||
return NextResponse.json(
|
||||
{ error: `Upstream error: ${response.status}` },
|
||||
{ status: response.status }
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
return NextResponse.json(
|
||||
{ error: 'No response body from upstream' },
|
||||
{ status: 502 }
|
||||
);
|
||||
}
|
||||
|
||||
const stream = response.body;
|
||||
|
||||
return new NextResponse(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache, no-transform',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Encoding': 'none',
|
||||
'X-Accel-Buffering': 'no', // For Nginx if present
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[API Route] Proxy failed:', error);
|
||||
return NextResponse.json(
|
||||
{ error: 'Internal Server Error' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -329,7 +329,9 @@ export function AnalysisConfigTab() {
|
||||
|
||||
{isCreatingModule && (
|
||||
<div className="space-y-4 p-4 border rounded-lg border-dashed">
|
||||
<h3 className="text-lg font-semibold">在 "{localTemplateSets[selectedTemplateId].name}" 中新增分析模块</h3>
|
||||
<h3 className="text-lg font-semibold">
|
||||
在 “{localTemplateSets[selectedTemplateId].name}” 中新增分析模块
|
||||
</h3>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="new-module-id">模块 ID (英文, 无空格)</Label>
|
||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
113
frontend/archive/v2_nextjs/src/app/page.tsx
Normal file
@ -0,0 +1,113 @@
|
||||
'use client';
|
||||
|
||||
import { useState } from 'react';
|
||||
import { useRouter } from 'next/navigation';
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Loader2 } from "lucide-react";
|
||||
|
||||
export default function StockInputForm() {
|
||||
const [symbol, setSymbol] = useState('');
|
||||
const [market, setMarket] = useState('china');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState('');
|
||||
const router = useRouter();
|
||||
|
||||
const handleSearch = async (e?: React.FormEvent) => {
|
||||
if (e) {
|
||||
e.preventDefault();
|
||||
}
|
||||
if (!symbol.trim()) return;
|
||||
|
||||
setIsLoading(true);
|
||||
setError('');
|
||||
|
||||
try {
|
||||
// 1. 调用后端进行 Symbol 归一化,但不启动工作流
|
||||
const response = await fetch('/api/tools/resolve-symbol', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
symbol: symbol.trim(),
|
||||
market: market,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errText = await response.text();
|
||||
throw new Error(errText || '解析股票代码失败');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
// data 结构: { symbol: string, market: string }
|
||||
|
||||
// 2. 跳转到报告页面,仅携带归一化后的 Symbol
|
||||
// 此时并没有 request_id,所以详情页不会自动开始,而是等待用户点击
|
||||
router.push(`/report/${encodeURIComponent(data.symbol)}?market=${data.market}`);
|
||||
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
setError(err instanceof Error ? err.message : '操作失败,请重试');
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center items-center h-full">
|
||||
<Card className="w-full max-w-md">
|
||||
<CardHeader>
|
||||
<CardTitle>基本面分析报告</CardTitle>
|
||||
<CardDescription>输入股票代码和市场,生成综合分析报告。</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<form onSubmit={handleSearch} className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<label>股票代码</label>
|
||||
<Input
|
||||
placeholder="例如: 600519 或 AAPL"
|
||||
value={symbol}
|
||||
onChange={(e) => setSymbol(e.target.value)}
|
||||
disabled={isLoading}
|
||||
/>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<label>交易市场</label>
|
||||
<Select value={market} onValueChange={setMarket} disabled={isLoading}>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="china">中国</SelectItem>
|
||||
<SelectItem value="hongkong">香港</SelectItem>
|
||||
<SelectItem value="usa">美国</SelectItem>
|
||||
<SelectItem value="japan">日本</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<div className="text-sm text-red-500 font-medium">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Button type="submit" className="w-full" disabled={isLoading || !symbol.trim()}>
|
||||
{isLoading ? (
|
||||
<>
|
||||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
正在解析...
|
||||
</>
|
||||
) : (
|
||||
'生成报告'
|
||||
)}
|
||||
</Button>
|
||||
</form>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -0,0 +1,234 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs";
|
||||
import { ScrollArea } from "@/components/ui/scroll-area";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import remarkGfm from 'remark-gfm';
|
||||
import { TaskStatus } from '@/types/workflow';
|
||||
import { AnalysisModuleConfig } from '@/types/index';
|
||||
import { BrainCircuit, Terminal, Info } from 'lucide-react';
|
||||
import { TaskInfo } from '@/hooks/useWorkflow';
|
||||
|
||||
interface AnalysisModulesViewProps {
|
||||
taskStates: Record<string, TaskStatus>;
|
||||
taskInfos: Record<string, TaskInfo>;
|
||||
taskOutputs: Record<string, string>;
|
||||
modulesConfig: Record<string, AnalysisModuleConfig>;
|
||||
}
|
||||
|
||||
export function AnalysisModulesView({
|
||||
taskStates,
|
||||
taskInfos,
|
||||
taskOutputs,
|
||||
modulesConfig
|
||||
}: AnalysisModulesViewProps) {
|
||||
console.log('[AnalysisModulesView] Render. Config keys:', Object.keys(modulesConfig));
|
||||
console.log('[AnalysisModulesView] Task States:', taskStates);
|
||||
|
||||
// Identify analysis tasks based on the template config
|
||||
// We assume task IDs in the DAG correspond to module IDs or follow a pattern
|
||||
// For now, let's try to match tasks that are NOT fetch tasks
|
||||
|
||||
// If we have config, use it to drive tabs
|
||||
const moduleIds = Object.keys(modulesConfig);
|
||||
|
||||
const [activeModuleId, setActiveModuleId] = useState<string>(moduleIds[0] || '');
|
||||
|
||||
useEffect(() => {
|
||||
// If no active module and we have modules, select first
|
||||
if (!activeModuleId && moduleIds.length > 0) {
|
||||
setActiveModuleId(moduleIds[0]);
|
||||
}
|
||||
}, [moduleIds, activeModuleId]);
|
||||
|
||||
if (moduleIds.length === 0) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center h-[300px] border-dashed border-2 rounded-lg text-muted-foreground">
|
||||
<BrainCircuit className="w-10 h-10 mb-2 opacity-50" />
|
||||
<p>No analysis modules defined in this template.</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<Tabs value={activeModuleId} onValueChange={setActiveModuleId} className="w-full">
|
||||
<div className="overflow-x-auto pb-2">
|
||||
<TabsList className="w-full justify-start h-auto p-1 bg-transparent gap-2">
|
||||
{moduleIds.map(moduleId => {
|
||||
const config = modulesConfig[moduleId];
|
||||
// Task ID might match module ID directly or be prefixed
|
||||
// We need to check multiple patterns because the backend DAG might be simplified (e.g., single "analysis:report" task)
|
||||
// or use "analysis:{moduleId}" format.
|
||||
let taskId = moduleId;
|
||||
let status = taskStates[taskId];
|
||||
|
||||
if (!status) {
|
||||
// Try prefix format
|
||||
taskId = `analysis:${moduleId}`;
|
||||
status = taskStates[taskId];
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
// Try fallback to generic analysis task if specific one is missing
|
||||
// This handles the case where backend collapses all analysis into one task
|
||||
taskId = 'analysis:report';
|
||||
status = taskStates[taskId];
|
||||
}
|
||||
|
||||
// Default to pending if still not found
|
||||
status = status || 'Pending';
|
||||
|
||||
return (
|
||||
<TabsTrigger
|
||||
key={moduleId}
|
||||
value={moduleId}
|
||||
className="data-[state=active]:bg-primary data-[state=active]:text-primary-foreground px-4 py-2 rounded-md border bg-card hover:bg-accent/50 transition-all"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span>{config.name}</span>
|
||||
<StatusDot status={status} />
|
||||
</div>
|
||||
</TabsTrigger>
|
||||
);
|
||||
})}
|
||||
</TabsList>
|
||||
</div>
|
||||
|
||||
{moduleIds.map(moduleId => {
|
||||
// Resolve task ID and Status using the same logic as tabs
|
||||
let taskId = moduleId;
|
||||
let status = taskStates[taskId];
|
||||
let output = taskOutputs[taskId];
|
||||
let info = taskInfos[taskId];
|
||||
|
||||
if (!status) {
|
||||
taskId = `analysis:${moduleId}`;
|
||||
status = taskStates[taskId];
|
||||
output = taskOutputs[taskId];
|
||||
info = taskInfos[taskId];
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
// Fallback for status
|
||||
const genericId = 'analysis:report';
|
||||
status = taskStates[genericId];
|
||||
info = taskInfos[genericId];
|
||||
// Note: We might not want to show generic output for specific module tab,
|
||||
// but if it's the only output we have, maybe?
|
||||
// Usually 'analysis:report' output might be the full report or a summary.
|
||||
// Let's check if we have output for the specific module ID first in taskOutputs
|
||||
// regardless of status.
|
||||
if (!output) output = taskOutputs[genericId];
|
||||
}
|
||||
|
||||
status = status || 'Pending';
|
||||
output = output || '';
|
||||
|
||||
const config = modulesConfig[moduleId];
|
||||
|
||||
return (
|
||||
<TabsContent key={moduleId} value={moduleId} className="mt-0">
|
||||
<Card className="h-[600px] flex flex-col">
|
||||
<CardHeader className="py-4 border-b bg-muted/5">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<CardTitle className="text-lg">{config.name}</CardTitle>
|
||||
<Badge variant="outline" className="font-mono text-xs">
|
||||
{config.model_id}
|
||||
</Badge>
|
||||
</div>
|
||||
<StatusBadge status={status} message={info?.message} />
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="flex-1 p-0 min-h-0 relative">
|
||||
<ScrollArea className="h-full p-6">
|
||||
{output ? (
|
||||
<div className="prose dark:prose-invert max-w-none pb-10">
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>
|
||||
{output}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex flex-col items-center justify-center h-full text-muted-foreground gap-2 opacity-50">
|
||||
<Terminal className="w-8 h-8" />
|
||||
<p>
|
||||
{status === 'Running' ? 'Generating analysis...' :
|
||||
status === 'Skipped' ? 'Module skipped.' :
|
||||
status === 'Failed' ? 'Module failed.' :
|
||||
'Waiting for input...'}
|
||||
</p>
|
||||
{info?.message && (status === 'Skipped' || status === 'Failed') && (
|
||||
<p className="text-sm text-red-500 max-w-md text-center mt-2">
|
||||
Reason: {info.message}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</ScrollArea>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</TabsContent>
|
||||
);
|
||||
})}
|
||||
</Tabs>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function StatusDot({ status }: { status: TaskStatus }) {
|
||||
let colorClass = "bg-muted";
|
||||
if (status === 'Completed') colorClass = "bg-green-500";
|
||||
if (status === 'Failed') colorClass = "bg-red-500";
|
||||
if (status === 'Running') colorClass = "bg-blue-500 animate-pulse";
|
||||
if (status === 'Scheduled') colorClass = "bg-yellow-500";
|
||||
if (status === 'Skipped') colorClass = "bg-gray-400";
|
||||
|
||||
return <div className={`w-2 h-2 rounded-full ${colorClass}`} />;
|
||||
}
|
||||
|
||||
function StatusBadge({ status, message }: { status: TaskStatus, message?: string }) {
|
||||
let badge = <Badge variant="outline">Pending</Badge>;
|
||||
|
||||
switch (status) {
|
||||
case 'Completed':
|
||||
badge = <Badge variant="outline" className="text-green-600 border-green-200 bg-green-50">Completed</Badge>;
|
||||
break;
|
||||
case 'Failed':
|
||||
badge = <Badge variant="destructive">Failed</Badge>;
|
||||
break;
|
||||
case 'Running':
|
||||
badge = <Badge variant="secondary" className="text-blue-600 bg-blue-50 animate-pulse">Generating...</Badge>;
|
||||
break;
|
||||
case 'Scheduled':
|
||||
badge = <Badge variant="outline" className="text-yellow-600 border-yellow-200 bg-yellow-50">Scheduled</Badge>;
|
||||
break;
|
||||
case 'Skipped':
|
||||
badge = <Badge variant="outline" className="text-gray-500">Skipped</Badge>;
|
||||
break;
|
||||
default:
|
||||
badge = <Badge variant="outline">Pending</Badge>;
|
||||
}
|
||||
|
||||
if (message) {
|
||||
return (
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<div className="flex items-center gap-2 cursor-help">
|
||||
{badge}
|
||||
<Info className="w-4 h-4 text-muted-foreground" />
|
||||
</div>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent className="max-w-xs">
|
||||
<p>{message}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
);
|
||||
}
|
||||
|
||||
return badge;
|
||||
}
|
||||