volcengine
diff --git a/‎config/prompts_en.yaml‎
Lines changed: 646 additions & 468 deletions b/‎config/prompts_en.yaml‎
Lines changed: 646 additions & 468 deletions
diff --git a/‎config/prompts_zh.yaml‎
Lines changed: 323 additions & 143 deletions b/‎config/prompts_zh.yaml‎
Lines changed: 323 additions & 143 deletions
diff --git a/‎opencontext/context_consumption/context_agent/core/llm_context_strategy.py‎
Lines changed: 143 additions & 167 deletions b/‎opencontext/context_consumption/context_agent/core/llm_context_strategy.py‎
Lines changed: 143 additions & 167 deletions
diff --git a/‎opencontext/context_consumption/context_agent/models/enums.py‎
Lines changed: 2 additions & 1 deletion b/‎opencontext/context_consumption/context_agent/models/enums.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎opencontext/context_consumption/context_agent/models/schemas.py‎
Lines changed: 4 additions & 0 deletions b/‎opencontext/context_consumption/context_agent/models/schemas.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎opencontext/context_consumption/context_agent/nodes/context.py‎
Lines changed: 52 additions & 80 deletions b/‎opencontext/context_consumption/context_agent/nodes/context.py‎
Lines changed: 52 additions & 80 deletions
diff --git a/‎opencontext/context_consumption/context_agent/nodes/intent.py‎
Lines changed: 3 additions & 3 deletions b/‎opencontext/context_consumption/context_agent/nodes/intent.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎opencontext/context_consumption/generation/smart_todo_manager.py‎
Lines changed: 14 additions & 8 deletions b/‎opencontext/context_consumption/generation/smart_todo_manager.py‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎opencontext/managers/consumption_manager.py‎
Lines changed: 0 additions & 1 deletion b/‎opencontext/managers/consumption_manager.py‎
Lines changed: 0 additions & 1 deletion
@@ -72,11 +72,12 @@ class EventType(str, Enum):
 
 
 class QueryType(str, Enum):
-    """Query type enumeration - four categories"""
+    """Query type enumeration - five categories"""
     SIMPLE_CHAT = "simple_chat"             # Simple chat (daily greetings, small talk, etc.)
     DOCUMENT_EDIT = "document_edit"         # Document editing and rewriting (preserving existing facts/not introducing new information)
     QA_ANALYSIS = "qa_analysis"             # Q&A (covering summarization, analysis, and dialogue based on documents and complex context)
     CONTENT_GENERATION = "content_generation"  # Document content generation/expansion (allowing new information)
+    CLARIFICATION_NEEDED = "clarification_needed"  # Query is too vague or ambiguous, needs user clarification
 
 
 class ContextSufficiency(str, Enum):
 
@@ -155,6 +155,10 @@ def get_summary(self) -> str:
 
         summary_parts = [f"{source.value}: {count}" for source, count in source_counts.items()]
         return f"Collected {len(self.items)} context items ({', '.join(summary_parts)}), sufficiency: {self.sufficiency.value}"
+    
+    def get_chat_history(self) -> List[Dict[str, str]]:
+        """Get chat history as a list of dictionaries"""
+        return [{"role": msg.role, "content": msg.content} for msg in self.chat_history]
 
 @dataclass
 class ExecutionStep:
 
@@ -6,6 +6,7 @@
 Intelligently collects and judges context information
 """
 
+import json
 from typing import List, Dict, Any, Optional
 from .base import BaseNode
 from ..core.state import WorkflowState, StreamEvent
@@ -69,108 +70,79 @@ async def process(self, state: WorkflowState) -> WorkflowState:
 
         # LLM-driven iterative collection process
         iteration = 0
-        tool_history = []  # Record tool call history
         while iteration < self.max_iterations:
             iteration += 1
             progress = iteration / self.max_iterations
-            
             await self.streaming_manager.emit(StreamEvent(
                 type=EventType.RUNNING,
                 content=f"Round {iteration} of intelligent context collection...",
                 stage=WorkflowStage.CONTEXT_GATHERING,
                 progress=progress
             ))
-            tool_calls = await self.strategy.analyze_and_plan_tools(
-                state.intent,
-                state.contexts,
-                max_tools=5,
-                iteration=iteration,
-                tool_history=tool_history
-            )
-            # # Filter duplicate calls
-            # if tool_history:
-            #     tool_calls = await self.strategy.filter_duplicate_calls(
-            #         tool_calls,
-            #         tool_history
-            #     )
-            if tool_calls:
-                # 2. Concurrently execute tool calls
-                await self.streaming_manager.emit(StreamEvent(
-                    type=EventType.RUNNING,
-                    content=f"Concurrently calling {len(tool_calls)} tools...",
-                    stage=WorkflowStage.CONTEXT_GATHERING))
-                new_context_items = await self.strategy.execute_tool_calls_parallel(tool_calls)
-                # Record tool call history
-                # for call in tool_calls:
-                #     func_name = call.get("function", {}).get("name")
-                #     func_args = call.get("function", {}).get("arguments", {})
-                #     tool_history.append({
-                #         "tool_name": func_name,
-                #         "query": func_args.get("query", ""),
-                #         "iteration": iteration
-                #     })
-                # 3. Add results to context
-                for item in new_context_items:
-                    state.contexts.add_item(item)
-                await self.streaming_manager.emit(StreamEvent(
-                    type=EventType.DONE,
-                    content=f"Collected {len(new_context_items)} new context items in this round",
-                    stage=WorkflowStage.CONTEXT_GATHERING))
-            # 4. LLM evaluates sufficiency
+
+            # 1. Evaluate sufficiency first (including first iteration)
             sufficiency = await self.strategy.evaluate_sufficiency(
                 state.contexts,
                 state.intent
             )
             state.contexts.sufficiency = sufficiency
-            self.logger.info(f"sufficiency {sufficiency}")
-            
-            # 5. If there are many context items, filter for relevance
-            if len(state.contexts.items) > 5:  # Only filter if more than 5 items
+
+            if sufficiency == ContextSufficiency.SUFFICIENT:
                 await self.streaming_manager.emit(StreamEvent(
-                    type=EventType.RUNNING,
-                    content="Filtering irrelevant context...",
-                    stage=WorkflowStage.CONTEXT_GATHERING
+                    type=EventType.DONE,
+                    content=f"Context is sufficient, collected {len(state.contexts.items)} items in total",
+                    stage=WorkflowStage.CONTEXT_GATHERING,
+                    progress=1.0
                 ))
-                
-                # Get relevant context IDs
-                relevant_ids = await self.strategy.filter_relevant_contexts(
-                    state.contexts.items,
-                    state.intent.enhanced_query or state.intent.original_query
-                )
-                
-                # Convert relevant IDs to a set for quick lookup
-                relevant_id_set = set(relevant_ids)
-                
-                # Mark irrelevant context and keep relevant ones
-                original_count = len(state.contexts.items)
-                filtered_items = []
-                
-                for item in state.contexts.items:
-                    if item.id in relevant_id_set:
-                        item.is_relevant = True
-                        filtered_items.append(item)
-                    else:
-                        item.is_relevant = False
-                        item.relevance_reason = "Judged irrelevant to the user's question by the LLM"
-                
-                # Update the context collection
-                state.contexts.items = filtered_items
-                
-                # await self.streaming_manager.emit(StreamEvent(
-                #     type=EventType.DONE,
-                #     content=f"Retained {len(filtered_items)} relevant context items after filtering (originally {original_count})",
-                #     stage=WorkflowStage.CONTEXT_GATHERING
-                # ))
-            
-            if sufficiency == ContextSufficiency.SUFFICIENT:
+                break
+
+            # 2. Analyze information gap and plan tool calls
+            tool_calls, _ = await self.strategy.analyze_and_plan_tools(
+                state.intent,
+                state.contexts,
+                iteration=iteration
+            )
+
+            if not tool_calls:
                 await self.streaming_manager.emit(StreamEvent(
                     type=EventType.DONE,
-                    content=f"Context collection complete, collected {len(state.contexts.items)} items in total",
+                    content=f"No more tools to call, ending collection with {len(state.contexts.items)} items",
                     stage=WorkflowStage.CONTEXT_GATHERING,
                     progress=1.0
                 ))
                 break
-            elif iteration >= self.max_iterations:
+
+            # 3. Execute tool calls concurrently
+            await self.streaming_manager.emit(StreamEvent(
+                type=EventType.RUNNING,
+                content=f"Concurrently calling {len(tool_calls)} tools...",
+                stage=WorkflowStage.CONTEXT_GATHERING))
+            new_context_items = await self.strategy.execute_tool_calls_parallel(tool_calls)
+
+            # 4. Validate and filter tool results
+            await self.streaming_manager.emit(StreamEvent(
+                type=EventType.RUNNING,
+                content="Validating tool results and filtering relevant contexts...",
+                stage=WorkflowStage.CONTEXT_GATHERING
+            ))
+            validated_items, _ = await self.strategy.validate_and_filter_tool_results(
+                tool_calls,
+                new_context_items,
+                state.intent,
+                state.contexts
+            )
+
+            # 5. Add validated results to context collection
+            for item in validated_items:
+                state.contexts.add_item(item)
+
+            await self.streaming_manager.emit(StreamEvent(
+                type=EventType.DONE,
+                content=f"Round {iteration}: Added {len(validated_items)} relevant context items (filtered from {len(new_context_items)} total)",
+                stage=WorkflowStage.CONTEXT_GATHERING))
+
+            # Check if reached max iterations
+            if iteration >= self.max_iterations:
                 state.contexts.sufficiency = ContextSufficiency.PARTIAL
                 await self.streaming_manager.emit(StreamEvent(
                     type=EventType.DONE,
 
@@ -33,7 +33,7 @@ async def process(self, state: WorkflowState) -> WorkflowState:
         """Process intent analysis"""
         await self.streaming_manager.emit(StreamEvent(type=EventType.THINKING, content="Analyzing your intent...", stage=WorkflowStage.INTENT_ANALYSIS))        
         # 1. Classify query type
-        query_type = await self._classify_query(state.query.text)
+        query_type = await self._classify_query(state.query.text, state.contexts.get_chat_history())
         if not query_type:
             await self.streaming_manager.emit(StreamEvent(type=EventType.FAIL, content="Intent analysis failed", stage=WorkflowStage.INTENT_ANALYSIS,
                                           metadata={ "query": state.query.text }))
@@ -59,12 +59,12 @@ async def process(self, state: WorkflowState) -> WorkflowState:
         # )
         return state
 
-    async def _classify_query(self, query: str) -> QueryType:
+    async def _classify_query(self, query: str, chat_history: List[Dict[str, str]]) -> QueryType:
         """Use LLM to classify query types, including confidence assessment and fallback strategies"""
         prompt_group = get_prompt_group("chat_workflow.query_classification")
         messages = [
             {"role": "system", "content": prompt_group['system']},
-            {"role": "user", "content": prompt_group['user'].format(query=query)}
+            {"role": "user", "content": prompt_group['user'].format(query=query, chat_history=json.dumps(chat_history))}
         ]
         response = await generate_with_messages_async(
             messages,
 
@@ -37,6 +37,9 @@ class TodoTask:
     assignee: Optional[str] = None
     participants: List[str] = field(default_factory=list)
     context_reference: Optional[str] = None
+    reason: Optional[str] = None
+    created_at: Optional[str] = None
+
 
 
 class SmartTodoManager:
@@ -79,22 +82,23 @@ def generate_todo_tasks(self, start_time: int, end_time: int) -> Optional[str]:
             # 2. Get regular context data
             contexts = self._get_task_relevant_contexts(start_time, end_time, activity_insights)
             # 3. Get historical todo completion status
-            historical_todos = self._get_historical_todos(days=1)
+            historical_todos = self._get_historical_todos()
             # 4. Synthesize all information to generate high-quality todos
             tasks = self._extract_tasks_from_contexts_enhanced(
                 contexts, start_time, end_time, activity_insights, historical_todos)
 
             if not tasks:
-                logger.info("No clear tasks were identified from the activity.")
                 return None
             # Store in the SQLite todo table
             todo_ids = []
             for task in tasks:
                 participants_str = ""
                 if task.get('participants') and len(task['participants']) > 0:
                     participants_str = ",".join(task['participants'])
-                
+
                 content = task.get('description', '')
+                reason = task.get('reason', '')
+                logger.info(f"Generated Todo Task: {task}")
                 urgency = self._map_priority_to_urgency(task.get('priority', 'normal'))
 
                 deadline = None
@@ -107,12 +111,13 @@ def generate_todo_tasks(self, start_time: int, end_time: int) -> Optional[str]:
                             deadline = datetime.datetime.strptime(task['due_date'], '%Y-%m-%d')
                     except:
                         pass
-                
+
                 todo_id = self.storage.insert_todo(
                     content=content,
                     urgency=urgency,
                     end_time=deadline,
-                    assignee=participants_str
+                    assignee=participants_str,
+                    reason=reason
                 )
                 todo_ids.append(todo_id)
 
@@ -172,12 +177,12 @@ def _get_recent_activity_insights(self, start: int, end: int) -> Dict[str, Any]:
             logger.exception(f"Failed to get activity insights: {e}")
             return {}
 
-    def _get_historical_todos(self, days: int = 7) -> List[Dict[str, Any]]:
+    def _get_historical_todos(self, days: int = 7, limit: int = 50) -> List[Dict[str, Any]]:
         """Get historical todo records.
         """
         try:
             start_time = datetime.datetime.now() - datetime.timedelta(days=days)
-            todos = self.storage.get_todos(limit=50, start_time=start_time)
+            todos = self.storage.get_todos(limit=limit, start_time=start_time)
             return todos
         except Exception as e:
             logger.exception(f"Failed to get historical todos: {e}")
@@ -291,7 +296,8 @@ def _post_process_tasks(self, tasks: List[Dict]) -> List[Dict]:
                     'assignee': task.get('assignee', ''),  # Task assignee
                     'participants': task.get('participants', []),  # List of participants
                     'context_reference': task.get('context_reference', ''),
-                    'created_at': datetime.datetime.now().isoformat()
+                    'created_at': datetime.datetime.now().isoformat(),
+                    'reason': task.get('reason', '')
                 }
 
                 # Process the deadline
 
@@ -216,7 +216,6 @@ def check_and_generate_daily_report():
                 self._task_timers['report'].start()
 
         check_and_generate_daily_report()
-        logger.info(f"Daily report generation timer started, checking every minute, generation time: {self._daily_report_time}")
 
     def _start_activity_timer(self):
         """Start activity recording timer"""