feat(workflow-context): 实现 DocOS 文档对象系统

- 实现 DocOS 核心逻辑 (docos.rs),支持文档树管理
- 实现裂变 (Fission) 能力: 自动将文件提升为目录结构
- 实现聚合 (Fusion) 能力: 将目录结构降级为单文件,保持内容完整
- 实现统一 Outline 能力: 无论物理存储是文件还是目录,均提供一致的树状大纲(支持 Markdown Header 解析)
- 新增相关单元测试 (docos_tests.rs)
- 更新 types.rs 支持 DocNodeKind::Section
- 引入 regex 依赖用于标题解析
This commit is contained in:
Lv, Qi 2025-11-27 00:24:30 +08:00
parent 48e45faffb
commit fcadb1ff6a
6 changed files with 519 additions and 2 deletions

View File

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.100"
@ -422,6 +431,35 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustix"
version = "1.1.2"
@ -685,6 +723,7 @@ dependencies = [
"anyhow",
"git2",
"hex",
"regex",
"serde",
"serde_json",
"sha2",

View File

@ -4,14 +4,15 @@ version = "0.1.0"
edition = "2024"
[dependencies]
git2 = { version = "0.18", features = ["vendored-openssl"] } # Using 0.19 as it is newer than 0.18, unless strictly pinned. 0.18 mentioned in docs, but 0.19 is stable. Sticking to 0.18 if strictly required? Docs say 0.18. I will use 0.19 to be safe with modern rust, or 0.18 if user insists. User said "git2 (0.18)". I'll stick to 0.18 to follow specs exactly.
git2 = { version = "0.18", features = ["vendored-openssl"] }
sha2 = "0.10"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
anyhow = "1.0"
thiserror = "1.0"
hex = "0.4"
walkdir = "2.3" # Useful for recursive directory operations if needed, though git2 handles trees.
walkdir = "2.3"
regex = "1.10"
[dev-dependencies]
tempfile = "3.8"

View File

@ -0,0 +1,320 @@
use anyhow::{Result, anyhow, Context};
use std::io::Read;
use std::sync::Arc;
use regex::Regex;
use crate::types::{DocNode, DocNodeKind, EntryKind};
use crate::traits::{ContextStore, Transaction};
pub trait DocManager {
/// Reload state based on the latest Commit
fn reload(&mut self, commit_hash: &str) -> Result<()>;
/// Get the current document tree outline
fn get_outline(&self) -> Result<DocNode>;
/// Read node content
fn read_content(&self, path: &str) -> Result<String>;
/// Write content (Upsert)
fn write_content(&mut self, path: &str, content: &str) -> Result<()>;
/// Insert subsection (Implies Promotion)
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()>;
/// Demote Composite to Leaf (Aggregation)
fn demote(&mut self, path: &str) -> Result<()>;
/// Commit changes
fn save(&mut self, message: &str) -> Result<String>;
}
pub struct DocOS<S: ContextStore> {
store: Arc<S>,
req_id: String,
commit_hash: String,
transaction: Option<Box<dyn Transaction>>,
}
impl<S: ContextStore> DocOS<S> {
pub fn new(store: Arc<S>, req_id: &str, commit_hash: &str) -> Self {
Self {
store,
req_id: req_id.to_string(),
commit_hash: commit_hash.to_string(),
transaction: None,
}
}
fn ensure_transaction(&mut self) -> Result<&mut Box<dyn Transaction>> {
if self.transaction.is_none() {
let tx = self.store.begin_transaction(&self.req_id, &self.commit_hash)?;
self.transaction = Some(tx);
}
Ok(self.transaction.as_mut().unwrap())
}
fn is_leaf(&self, path: &str) -> Result<bool> {
match self.store.read_file(&self.req_id, &self.commit_hash, path) {
Ok(_) => Ok(true),
Err(_) => Ok(false),
}
}
fn is_composite(&self, path: &str) -> Result<bool> {
match self.store.list_dir(&self.req_id, &self.commit_hash, path) {
Ok(_) => Ok(true),
Err(_) => Ok(false),
}
}
/// Parse Markdown headers to find subsections
fn parse_markdown_headers(&self, content: &str) -> Vec<DocNode> {
let re = Regex::new(r"(?m)^(#{1,6})\s+(.+)").unwrap();
let mut sections = Vec::new();
for cap in re.captures_iter(content) {
let _level = cap[1].len();
let name = cap[2].trim().to_string();
// Simplified logic: All headers are children of the file node
// In a real rich outline, we would build a tree based on level.
// For this MVP, we treat found sections as direct children in the outline view.
sections.push(DocNode {
name: name.clone(),
path: "".to_string(), // Virtual path, no direct file address
kind: DocNodeKind::Section,
children: vec![],
});
}
sections
}
fn build_node(&self, name: String, path: String, kind: DocNodeKind) -> Result<DocNode> {
let mut node = DocNode {
name,
path: path.clone(),
kind: kind.clone(),
children: vec![],
};
match kind {
DocNodeKind::Composite => {
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, &path)?;
// 1. Process index.md first if exists (content of this composite node)
let mut index_content = String::new();
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &format!("{}/index.md", path)) {
reader.read_to_string(&mut index_content).unwrap_or_default();
let sections = self.parse_markdown_headers(&index_content);
node.children.extend(sections);
}
// 2. Process children files/dirs
let mut children_nodes = Vec::new();
for entry in entries {
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
continue;
}
let child_path = if path == "/" {
entry.name.clone()
} else {
format!("{}/{}", path, entry.name)
};
let child_kind = match entry.kind {
EntryKind::Dir => DocNodeKind::Composite,
EntryKind::File => DocNodeKind::Leaf,
};
let child_node = self.build_node(entry.name, child_path, child_kind)?;
children_nodes.push(child_node);
}
// Sort children by name (simple default)
children_nodes.sort_by(|a, b| a.name.cmp(&b.name));
node.children.extend(children_nodes);
}
DocNodeKind::Leaf => {
// Parse content for sections
if let Ok(mut reader) = self.store.read_file(&self.req_id, &self.commit_hash, &path) {
let mut content = String::new();
reader.read_to_string(&mut content).unwrap_or_default();
let sections = self.parse_markdown_headers(&content);
node.children.extend(sections);
}
}
DocNodeKind::Section => {
// Sections don't have children in this simplified view
}
}
Ok(node)
}
}
impl<S: ContextStore> DocManager for DocOS<S> {
fn reload(&mut self, commit_hash: &str) -> Result<()> {
self.commit_hash = commit_hash.to_string();
self.transaction = None;
Ok(())
}
fn get_outline(&self) -> Result<DocNode> {
self.build_node("Root".to_string(), "/".to_string(), DocNodeKind::Composite)
}
fn read_content(&self, path: &str) -> Result<String> {
let target_path = if path == "/" {
"index.md".to_string()
} else if self.is_composite(path)? {
format!("{}/index.md", path)
} else {
path.to_string()
};
let mut reader = self.store.read_file(&self.req_id, &self.commit_hash, &target_path)
.context("Failed to read content")?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
Ok(content)
}
fn write_content(&mut self, path: &str, content: &str) -> Result<()> {
let is_comp = self.is_composite(path)?;
let target_path = if is_comp {
format!("{}/index.md", path)
} else {
path.to_string()
};
let tx = self.ensure_transaction()?;
tx.write(&target_path, content.as_bytes())?;
Ok(())
}
fn insert_subsection(&mut self, parent_path: &str, name: &str, content: &str) -> Result<()> {
let is_leaf = self.is_leaf(parent_path)?;
let is_composite = self.is_composite(parent_path)?;
if !is_leaf && !is_composite && parent_path != "/" {
return Err(anyhow!("Parent path '{}' does not exist", parent_path));
}
if is_leaf {
// Promote: Leaf -> Composite
let old_content = self.read_content(parent_path)?;
let tx = self.ensure_transaction()?;
tx.remove(parent_path)?;
let index_path = format!("{}/index.md", parent_path);
tx.write(&index_path, old_content.as_bytes())?;
let child_path = format!("{}/{}", parent_path, name);
tx.write(&child_path, content.as_bytes())?;
} else {
let child_path = if parent_path == "/" {
name.to_string()
} else {
format!("{}/{}", parent_path, name)
};
let tx = self.ensure_transaction()?;
tx.write(&child_path, content.as_bytes())?;
}
Ok(())
}
fn demote(&mut self, path: &str) -> Result<()> {
if !self.is_composite(path)? {
return Err(anyhow!("Path '{}' is not a composite node (directory)", path));
}
if path == "/" {
return Err(anyhow!("Cannot demote root"));
}
// 1. Read index.md (Main content)
let mut main_content = String::new();
if let Ok(content) = self.read_content(path) {
main_content = content;
}
// Reading directory entries
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path)?;
// Sort entries to have deterministic order
let mut sorted_entries = entries;
sorted_entries.sort_by(|a, b| a.name.cmp(&b.name));
let mut combined_content = main_content;
// Iterate for content reading (Borrowing self immutably)
for entry in &sorted_entries {
if entry.name == "index.md" || entry.name == "_meta.json" || entry.name.starts_with(".") {
continue;
}
let child_rel_path = format!("{}/{}", path, entry.name);
let child_content = self.read_content(&child_rel_path)?;
combined_content.push_str(&format!("\n\n# {}\n\n", entry.name));
combined_content.push_str(&child_content);
}
// Get list of items to remove before starting transaction (to avoid double borrow)
// We need a recursive list of paths to remove from git index.
let paths_to_remove = self.collect_recursive_paths(path)?;
let tx = self.ensure_transaction()?;
// 3. Remove everything recursively
for p in paths_to_remove {
tx.remove(&p)?;
}
// Also remove the directory path itself (conceptually, or handled by git index cleanup)
// In our simplified VGCS, remove(dir) is not enough if not empty.
// But we just cleaned up recursively.
// 4. Write new file
tx.write(path, combined_content.as_bytes())?;
Ok(())
}
fn save(&mut self, message: &str) -> Result<String> {
if let Some(tx) = self.transaction.take() {
let new_oid = tx.commit(message, "DocOS User")?;
self.commit_hash = new_oid.clone();
Ok(new_oid)
} else {
Ok(self.commit_hash.clone())
}
}
}
impl<S: ContextStore> DocOS<S> {
// Helper: Collect paths recursively (reading from store, immutable self)
fn collect_recursive_paths(&self, path: &str) -> Result<Vec<String>> {
let mut paths = Vec::new();
let entries = self.store.list_dir(&self.req_id, &self.commit_hash, path);
if let Ok(entries) = entries {
for entry in entries {
let child_path = format!("{}/{}", path, entry.name);
match entry.kind {
EntryKind::File => {
paths.push(child_path);
},
EntryKind::Dir => {
// Add children of dir first
let mut sub_paths = self.collect_recursive_paths(&child_path)?;
paths.append(&mut sub_paths);
// No need to remove dir itself in git, but we might track it?
}
}
}
}
Ok(paths)
}
}

View File

@ -1,7 +1,9 @@
pub mod types;
pub mod traits;
pub mod vgcs;
pub mod docos;
pub use types::*;
pub use traits::*;
pub use vgcs::Vgcs;
pub use docos::{DocOS, DocManager};

View File

@ -30,3 +30,17 @@ pub struct BlobRef {
pub original_name: String,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub enum DocNodeKind {
Leaf, // Pure content node (file)
Composite, // Composite node (dir with index.md)
Section, // Virtual node (Markdown Header inside a file)
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct DocNode {
pub name: String,
pub path: String, // Logical path e.g., "Analysis/Revenue"
pub kind: DocNodeKind,
pub children: Vec<DocNode>, // Only for Composite or Section-bearing Leaf
}

View File

@ -0,0 +1,141 @@
use workflow_context::{ContextStore, Vgcs, DocOS, DocManager, DocNodeKind};
use tempfile::TempDir;
use std::sync::Arc;
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
#[test]
fn test_docos_basic() -> anyhow::Result<()> {
let temp_dir = TempDir::new()?;
let store = Arc::new(Vgcs::new(temp_dir.path()));
let req_id = "req-docos-1";
store.init_repo(req_id)?;
// 1. Init DocOS with empty repo
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
// 2. Create a file (Leaf)
docos.write_content("Introduction", "Intro Content")?;
let _commit_1 = docos.save("Add Intro")?;
// 3. Verify outline
let outline = docos.get_outline()?;
// Root -> [Introduction (Leaf)]
assert_eq!(outline.children.len(), 1);
let intro_node = &outline.children[0];
assert_eq!(intro_node.name, "Introduction");
assert_eq!(intro_node.kind, DocNodeKind::Leaf);
// 4. Read content
let content = docos.read_content("Introduction")?;
assert_eq!(content, "Intro Content");
Ok(())
}
#[test]
fn test_docos_fission() -> anyhow::Result<()> {
let temp_dir = TempDir::new()?;
let store = Arc::new(Vgcs::new(temp_dir.path()));
let req_id = "req-docos-2";
store.init_repo(req_id)?;
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
// 1. Start with a Leaf: "Analysis"
docos.write_content("Analysis", "General Analysis")?;
let commit_1 = docos.save("Init Analysis")?;
// 2. Insert subsection "Revenue" into "Analysis"
// This should promote "Analysis" to Composite
docos.reload(&commit_1)?;
docos.insert_subsection("Analysis", "Revenue", "Revenue Data")?;
let commit_2 = docos.save("Split Analysis")?;
// 3. Verify Structure
docos.reload(&commit_2)?;
let outline = docos.get_outline()?;
// Root -> [Analysis (Composite)]
assert_eq!(outline.children.len(), 1);
let analysis_node = &outline.children[0];
assert_eq!(analysis_node.name, "Analysis");
assert_eq!(analysis_node.kind, DocNodeKind::Composite);
// Analysis -> [Revenue (Leaf)] (index.md is hidden in outline)
assert_eq!(analysis_node.children.len(), 1);
let revenue_node = &analysis_node.children[0];
assert_eq!(revenue_node.name, "Revenue");
assert_eq!(revenue_node.kind, DocNodeKind::Leaf);
// 4. Verify Content
// Reading "Analysis" should now read "Analysis/index.md" which contains "General Analysis"
let analysis_content = docos.read_content("Analysis")?;
assert_eq!(analysis_content, "General Analysis");
let revenue_content = docos.read_content("Analysis/Revenue")?;
assert_eq!(revenue_content, "Revenue Data");
Ok(())
}
#[test]
fn test_docos_fusion_and_outline() -> anyhow::Result<()> {
let temp_dir = TempDir::new()?;
let store = Arc::new(Vgcs::new(temp_dir.path()));
let req_id = "req-docos-3";
store.init_repo(req_id)?;
let mut docos = DocOS::new(store.clone(), req_id, ZERO_OID);
// 1. Create a composite structure (Pre-fissioned state)
// Root -> [Chapter1 (Composite)] -> [SectionA (Leaf), SectionB (Leaf)]
docos.write_content("Chapter1/index.md", "Chapter 1 Intro")?;
docos.write_content("Chapter1/SectionA", "Content A")?;
docos.write_content("Chapter1/SectionB", "Content B")?;
let commit_1 = docos.save("Setup Structure")?;
docos.reload(&commit_1)?;
// Verify Initial Outline
let outline_1 = docos.get_outline()?;
let ch1 = &outline_1.children[0];
assert_eq!(ch1.kind, DocNodeKind::Composite);
assert_eq!(ch1.children.len(), 2); // SectionA, SectionB
// 2. Demote (Fusion)
docos.demote("Chapter1")?;
let commit_2 = docos.save("Demote Chapter 1")?;
// 3. Verify Fusion Result
docos.reload(&commit_2)?;
let outline_2 = docos.get_outline()?;
// Now Chapter1 should be a Leaf
let ch1_fused = &outline_2.children[0];
assert_eq!(ch1_fused.name, "Chapter1");
assert_eq!(ch1_fused.kind, DocNodeKind::Leaf);
// But wait! Because of our Outline Enhancement (Markdown Headers),
// we expect the Fused file to have children (Sections) derived from headers!
// The demote logic appends children with "# Name".
// So "SectionA" became "# SectionA".
// Let's inspect the children of the Fused node
// We expect 2 children: "SectionA" and "SectionB" (as Sections)
assert_eq!(ch1_fused.children.len(), 2);
assert_eq!(ch1_fused.children[0].name, "SectionA");
assert_eq!(ch1_fused.children[0].kind, DocNodeKind::Section);
assert_eq!(ch1_fused.children[1].name, "SectionB");
// 4. Verify Content of Fused File
let content = docos.read_content("Chapter1")?;
// Should contain Intro + # SectionA ... + # SectionB ...
assert!(content.contains("Chapter 1 Intro"));
assert!(content.contains("# SectionA"));
assert!(content.contains("Content A"));
assert!(content.contains("# SectionB"));
Ok(())
}