From fcadb1ff6acc1ba5c0dbdb42abd2f1044691a24e Mon Sep 17 00:00:00 2001 From: "Lv, Qi" Date: Thu, 27 Nov 2025 00:24:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(workflow-context):=20=E5=AE=9E=E7=8E=B0=20?= =?UTF-8?q?DocOS=20=E6=96=87=E6=A1=A3=E5=AF=B9=E8=B1=A1=E7=B3=BB=E7=BB=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现 DocOS 核心逻辑 (docos.rs),支持文档树管理 - 实现裂变 (Fission) 能力: 自动将文件提升为目录结构 - 实现聚合 (Fusion) 能力: 将目录结构降级为单文件,保持内容完整 - 实现统一 Outline 能力: 无论物理存储是文件还是目录,均提供一致的树状大纲(支持 Markdown Header 解析) - 新增相关单元测试 (docos_tests.rs) - 更新 types.rs 支持 DocNodeKind::Section - 引入 regex 依赖用于标题解析 --- crates/workflow-context/Cargo.lock | 39 +++ crates/workflow-context/Cargo.toml | 5 +- crates/workflow-context/src/docos.rs | 320 +++++++++++++++++++ crates/workflow-context/src/lib.rs | 2 + crates/workflow-context/src/types.rs | 14 + crates/workflow-context/tests/docos_tests.rs | 141 ++++++++ 6 files changed, 519 insertions(+), 2 deletions(-) create mode 100644 crates/workflow-context/src/docos.rs create mode 100644 crates/workflow-context/tests/docos_tests.rs diff --git a/crates/workflow-context/Cargo.lock b/crates/workflow-context/Cargo.lock index 4788f0d..6c89031 100644 --- a/crates/workflow-context/Cargo.lock +++ b/crates/workflow-context/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.100" @@ -422,6 +431,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "rustix" version = "1.1.2" @@ -685,6 +723,7 @@ dependencies = [ "anyhow", "git2", "hex", + "regex", "serde", "serde_json", "sha2", diff --git a/crates/workflow-context/Cargo.toml b/crates/workflow-context/Cargo.toml index d744aec..c70238f 100644 --- a/crates/workflow-context/Cargo.toml +++ b/crates/workflow-context/Cargo.toml @@ -4,14 +4,15 @@ version = "0.1.0" edition = "2024" [dependencies] -git2 = { version = "0.18", features = ["vendored-openssl"] } # Using 0.19 as it is newer than 0.18, unless strictly pinned. 0.18 mentioned in docs, but 0.19 is stable. Sticking to 0.18 if strictly required? Docs say 0.18. I will use 0.19 to be safe with modern rust, or 0.18 if user insists. User said "git2 (0.18)". I'll stick to 0.18 to follow specs exactly. +git2 = { version = "0.18", features = ["vendored-openssl"] } sha2 = "0.10" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" anyhow = "1.0" thiserror = "1.0" hex = "0.4" -walkdir = "2.3" # Useful for recursive directory operations if needed, though git2 handles trees. +walkdir = "2.3" +regex = "1.10" [dev-dependencies] tempfile = "3.8" diff --git a/crates/workflow-context/src/docos.rs b/crates/workflow-context/src/docos.rs new file mode 100644 index 0000000..2982f62 --- /dev/null +++ b/crates/workflow-context/src/docos.rs @@ -0,0 +1,320 @@ +use anyhow::{Result, anyhow, Context}; +use std::io::Read; +use std::sync::Arc; +use regex::Regex; +use crate::types::{DocNode, DocNodeKind, EntryKind}; +use crate::traits::{ContextStore, Transaction}; + +pub trait DocManager { + /// Reload state based on the latest Commit + fn reload(&mut self, commit_hash: &str) -> Result<()>; + + /// Get the current document tree outline + fn get_outline(&self) -> Result; + + /// Read node content + fn read_content(&self, path: &str) -> Result; + + /// Write content (Upsert) + fn write_content(&mut self, path: &str, content: &str) -> Result<()>; + + /// Insert subsection (Implies Promotion) + fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()>; + + /// Demote Composite to Leaf (Aggregation) + fn demote(&mut self, path: &str) -> Result<()>; + + /// Commit changes + fn save(&mut self, message: &str) -> Result; +} + +pub struct DocOS { + store: Arc, + req_id: String, + commit_hash: String, + transaction: Option>, +} + +impl DocOS { + pub fn new(store: Arc, req_id: &str, commit_hash: &str) -> Self { + Self { + store, + req_id: req_id.to_string(), + commit_hash: commit_hash.to_string(), + transaction: None, + } + } + + fn ensure_transaction(&mut self) -> Result<&mut Box> { + if self.transaction.is_none() { + let tx = self.store.begin_transaction(&self.req_id, &self.commit_hash)?; + self.transaction = Some(tx); + } + Ok(self.transaction.as_mut().unwrap()) + } + + fn is_leaf(&self, path: &str) -> Result { + match self.store.read_file(&self.req_id, &self.commit_hash, path) { + Ok(_) => Ok(true), + Err(_) => Ok(false), + } + } + + fn is_composite(&self, path: &str) -> Result { + match self.store.list_dir(&self.req_id, &self.commit_hash, path) { + Ok(_) => Ok(true), + Err(_) => Ok(false), + } + } + + /// Parse Markdown headers to find subsections + fn parse_markdown_headers(&self, content: &str) -> Vec { + let re = Regex::new(r"(?m)^(#{1,6})\s+(.+)").unwrap(); + let mut sections = Vec::new(); + + for cap in re.captures_iter(content) { + let _level = cap[1].len(); + let name = cap[2].trim().to_string(); + + // Simplified logic: All headers are children of the file node + // In a real rich outline, we would build a tree based on level. + // For this MVP, we treat found sections as direct children in the outline view. + sections.push(DocNode { + name: name.clone(), + path: "".to_string(), // Virtual path, no direct file address + kind: DocNodeKind::Section, + children: vec![], + }); + } + sections + } + + fn build_node(&self, name: String, path: String, kind: DocNodeKind) -> Result { + let mut node = DocNode { + name, + path: path.clone(), + kind: kind.clone(), + children: vec![], + }; + + match kind { + DocNodeKind::Composite => { + let entries = self.store.list_dir(&self.req_id, &self.commit_hash, &path)?; + + // 1. Process index.md first if exists (content of this composite node) + let mut index_content = String::new(); + if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &format!("{}/index.md", path)) { + reader.read_to_string(&mut index_content).unwrap_or_default(); + let sections = self.parse_markdown_headers(&index_content); + node.children.extend(sections); + } + + // 2. Process children files/dirs + let mut children_nodes = Vec::new(); + for entry in entries { + if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") { + continue; + } + + let child_path = if path == "/" { + entry.name.clone() + } else { + format!("{}/{}", path, entry.name) + }; + + let child_kind = match entry.kind { + EntryKind::Dir => DocNodeKind::Composite, + EntryKind::File => DocNodeKind::Leaf, + }; + + let child_node = self.build_node(entry.name, child_path, child_kind)?; + children_nodes.push(child_node); + } + // Sort children by name (simple default) + children_nodes.sort_by(|a, b| a.name.cmp(&b.name)); + node.children.extend(children_nodes); + } + DocNodeKind::Leaf => { + // Parse content for sections + if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &path) { + let mut content = String::new(); + reader.read_to_string(&mut content).unwrap_or_default(); + let sections = self.parse_markdown_headers(&content); + node.children.extend(sections); + } + } + DocNodeKind::Section => { + // Sections don't have children in this simplified view + } + } + + Ok(node) + } +} + +impl DocManager for DocOS { + fn reload(&mut self, commit_hash: &str) -> Result<()> { + self.commit_hash = commit_hash.to_string(); + self.transaction = None; + Ok(()) + } + + fn get_outline(&self) -> Result { + self.build_node("Root".to_string(), "/".to_string(), DocNodeKind::Composite) + } + + fn read_content(&self, path: &str) -> Result { + let target_path = if path == "/" { + "index.md".to_string() + } else if self.is_composite(path)? { + format!("{}/index.md", path) + } else { + path.to_string() + }; + + let mut reader = self.store.read_file(&self.req_id, &self.commit_hash, &target_path) + .context("Failed to read content")?; + let mut content = String::new(); + reader.read_to_string(&mut content)?; + Ok(content) + } + + fn write_content(&mut self, path: &str, content: &str) -> Result<()> { + let is_comp = self.is_composite(path)?; + let target_path = if is_comp { + format!("{}/index.md", path) + } else { + path.to_string() + }; + + let tx = self.ensure_transaction()?; + tx.write(&target_path, content.as_bytes())?; + Ok(()) + } + + fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()> { + let is_leaf = self.is_leaf(parent_path)?; + let is_composite = self.is_composite(parent_path)?; + + if !is_leaf && !is_composite && parent_path != "/" { + return Err(anyhow!("Parent path '{}' does not exist", parent_path)); + } + + if is_leaf { + // Promote: Leaf -> Composite + let old_content = self.read_content(parent_path)?; + + let tx = self.ensure_transaction()?; + tx.remove(parent_path)?; + + let index_path = format!("{}/index.md", parent_path); + tx.write(&index_path, old_content.as_bytes())?; + + let child_path = format!("{}/{}", parent_path, name); + tx.write(&child_path, content.as_bytes())?; + + } else { + let child_path = if parent_path == "/" { + name.to_string() + } else { + format!("{}/{}", parent_path, name) + }; + + let tx = self.ensure_transaction()?; + tx.write(&child_path, content.as_bytes())?; + } + + Ok(()) + } + + fn demote(&mut self, path: &str) -> Result<()> { + if !self.is_composite(path)? { + return Err(anyhow!("Path '{}' is not a composite node (directory)", path)); + } + if path == "/" { + return Err(anyhow!("Cannot demote root")); + } + + // 1. Read index.md (Main content) + let mut main_content = String::new(); + if let Ok(content) = self.read_content(path) { + main_content = content; + } + + // Reading directory entries + let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path)?; + + // Sort entries to have deterministic order + let mut sorted_entries = entries; + sorted_entries.sort_by(|a, b| a.name.cmp(&b.name)); + + let mut combined_content = main_content; + + // Iterate for content reading (Borrowing self immutably) + for entry in &sorted_entries { + if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") { + continue; + } + + let child_rel_path = format!("{}/{}", path, entry.name); + let child_content = self.read_content(&child_rel_path)?; + combined_content.push_str(&format!("\n\n# {}\n\n", entry.name)); + combined_content.push_str(&child_content); + } + + // Get list of items to remove before starting transaction (to avoid double borrow) + // We need a recursive list of paths to remove from git index. + let paths_to_remove = self.collect_recursive_paths(path)?; + + let tx = self.ensure_transaction()?; + + // 3. Remove everything recursively + for p in paths_to_remove { + tx.remove(&p)?; + } + // Also remove the directory path itself (conceptually, or handled by git index cleanup) + // In our simplified VGCS, remove(dir) is not enough if not empty. + // But we just cleaned up recursively. + + // 4. Write new file + tx.write(path, combined_content.as_bytes())?; + + Ok(()) + } + + fn save(&mut self, message: &str) -> Result { + if let Some(tx) = self.transaction.take() { + let new_oid = tx.commit(message, "DocOS User")?; + self.commit_hash = new_oid.clone(); + Ok(new_oid) + } else { + Ok(self.commit_hash.clone()) + } + } +} + +impl DocOS { + // Helper: Collect paths recursively (reading from store, immutable self) + fn collect_recursive_paths(&self, path: &str) -> Result> { + let mut paths = Vec::new(); + + let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path); + if let Ok(entries) = entries { + for entry in entries { + let child_path = format!("{}/{}", path, entry.name); + match entry.kind { + EntryKind::File => { + paths.push(child_path); + }, + EntryKind::Dir => { + // Add children of dir first + let mut sub_paths = self.collect_recursive_paths(&child_path)?; + paths.append(&mut sub_paths); + // No need to remove dir itself in git, but we might track it? + } + } + } + } + Ok(paths) + } +} diff --git a/crates/workflow-context/src/lib.rs b/crates/workflow-context/src/lib.rs index f167046..f41ea89 100644 --- a/crates/workflow-context/src/lib.rs +++ b/crates/workflow-context/src/lib.rs @@ -1,7 +1,9 @@ pub mod types; pub mod traits; pub mod vgcs; +pub mod docos; pub use types::*; pub use traits::*; pub use vgcs::Vgcs; +pub use docos::{DocOS, DocManager}; diff --git a/crates/workflow-context/src/types.rs b/crates/workflow-context/src/types.rs index 9a53f33..f277924 100644 --- a/crates/workflow-context/src/types.rs +++ b/crates/workflow-context/src/types.rs @@ -30,3 +30,17 @@ pub struct BlobRef { pub original_name: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub enum DocNodeKind { + Leaf, // Pure content node (file) + Composite, // Composite node (dir with index.md) + Section, // Virtual node (Markdown Header inside a file) +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct DocNode { + pub name: String, + pub path: String, // Logical path e.g., "Analysis/Revenue" + pub kind: DocNodeKind, + pub children: Vec, // Only for Composite or Section-bearing Leaf +} diff --git a/crates/workflow-context/tests/docos_tests.rs b/crates/workflow-context/tests/docos_tests.rs new file mode 100644 index 0000000..9965810 --- /dev/null +++ b/crates/workflow-context/tests/docos_tests.rs @@ -0,0 +1,141 @@ +use workflow_context::{ContextStore, Vgcs, DocOS, DocManager, DocNodeKind}; +use tempfile::TempDir; +use std::sync::Arc; + +const ZERO_OID: &str = "0000000000000000000000000000000000000000"; + +#[test] +fn test_docos_basic() -> anyhow::Result<()> { + let temp_dir = TempDir::new()?; + let store = Arc::new(Vgcs::new(temp_dir.path())); + let req_id = "req-docos-1"; + + store.init_repo(req_id)?; + + // 1. Init DocOS with empty repo + let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID); + + // 2. Create a file (Leaf) + docos.write_content("Introduction", "Intro Content")?; + let _commit_1 = docos.save("Add Intro")?; + + // 3. Verify outline + let outline = docos.get_outline()?; + // Root -> [Introduction (Leaf)] + assert_eq!(outline.children.len(), 1); + let intro_node = &outline.children[0]; + assert_eq!(intro_node.name, "Introduction"); + assert_eq!(intro_node.kind, DocNodeKind::Leaf); + + // 4. Read content + let content = docos.read_content("Introduction")?; + assert_eq!(content, "Intro Content"); + + Ok(()) +} + +#[test] +fn test_docos_fission() -> anyhow::Result<()> { + let temp_dir = TempDir::new()?; + let store = Arc::new(Vgcs::new(temp_dir.path())); + let req_id = "req-docos-2"; + store.init_repo(req_id)?; + + let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID); + + // 1. Start with a Leaf: "Analysis" + docos.write_content("Analysis", "General Analysis")?; + let commit_1 = docos.save("Init Analysis")?; + + // 2. Insert subsection "Revenue" into "Analysis" + // This should promote "Analysis" to Composite + docos.reload(&commit_1)?; + docos.insert_subsection("Analysis", "Revenue", "Revenue Data")?; + let commit_2 = docos.save("Split Analysis")?; + + // 3. Verify Structure + docos.reload(&commit_2)?; + let outline = docos.get_outline()?; + + // Root -> [Analysis (Composite)] + assert_eq!(outline.children.len(), 1); + let analysis_node = &outline.children[0]; + assert_eq!(analysis_node.name, "Analysis"); + assert_eq!(analysis_node.kind, DocNodeKind::Composite); + + // Analysis -> [Revenue (Leaf)] (index.md is hidden in outline) + assert_eq!(analysis_node.children.len(), 1); + let revenue_node = &analysis_node.children[0]; + assert_eq!(revenue_node.name, "Revenue"); + assert_eq!(revenue_node.kind, DocNodeKind::Leaf); + + // 4. Verify Content + // Reading "Analysis" should now read "Analysis/index.md" which contains "General Analysis" + let analysis_content = docos.read_content("Analysis")?; + assert_eq!(analysis_content, "General Analysis"); + + let revenue_content = docos.read_content("Analysis/Revenue")?; + assert_eq!(revenue_content, "Revenue Data"); + + Ok(()) +} + +#[test] +fn test_docos_fusion_and_outline() -> anyhow::Result<()> { + let temp_dir = TempDir::new()?; + let store = Arc::new(Vgcs::new(temp_dir.path())); + let req_id = "req-docos-3"; + store.init_repo(req_id)?; + + let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID); + + // 1. Create a composite structure (Pre-fissioned state) + // Root -> [Chapter1 (Composite)] -> [SectionA (Leaf), SectionB (Leaf)] + docos.write_content("Chapter1/index.md", "Chapter 1 Intro")?; + docos.write_content("Chapter1/SectionA", "Content A")?; + docos.write_content("Chapter1/SectionB", "Content B")?; + let commit_1 = docos.save("Setup Structure")?; + + docos.reload(&commit_1)?; + + // Verify Initial Outline + let outline_1 = docos.get_outline()?; + let ch1 = &outline_1.children[0]; + assert_eq!(ch1.kind, DocNodeKind::Composite); + assert_eq!(ch1.children.len(), 2); // SectionA, SectionB + + // 2. Demote (Fusion) + docos.demote("Chapter1")?; + let commit_2 = docos.save("Demote Chapter 1")?; + + // 3. Verify Fusion Result + docos.reload(&commit_2)?; + let outline_2 = docos.get_outline()?; + + // Now Chapter1 should be a Leaf + let ch1_fused = &outline_2.children[0]; + assert_eq!(ch1_fused.name, "Chapter1"); + assert_eq!(ch1_fused.kind, DocNodeKind::Leaf); + + // But wait! Because of our Outline Enhancement (Markdown Headers), + // we expect the Fused file to have children (Sections) derived from headers! + // The demote logic appends children with "# Name". + // So "SectionA" became "# SectionA". + + // Let's inspect the children of the Fused node + // We expect 2 children: "SectionA" and "SectionB" (as Sections) + assert_eq!(ch1_fused.children.len(), 2); + assert_eq!(ch1_fused.children[0].name, "SectionA"); + assert_eq!(ch1_fused.children[0].kind, DocNodeKind::Section); + assert_eq!(ch1_fused.children[1].name, "SectionB"); + + // 4. Verify Content of Fused File + let content = docos.read_content("Chapter1")?; + // Should contain Intro + # SectionA ... + # SectionB ... + assert!(content.contains("Chapter 1 Intro")); + assert!(content.contains("# SectionA")); + assert!(content.contains("Content A")); + assert!(content.contains("# SectionB")); + + Ok(()) +}