chore(app_runner): Remove some dead code
Signed-off-by: -LAN- <laipz8200@outlook.com>pull/21739/head
parent
a0204e084f
commit
8260a4dea3
@ -0,0 +1,645 @@
|
|||||||
|
# Queue-Based Graph Engine Specification
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document specifies the design and implementation of a new queue-based graph engine for workflow execution. The new engine will replace the current thread-based execution model with a more scalable and manageable queue-based approach.
|
||||||
|
|
||||||
|
## Architecture Components
|
||||||
|
|
||||||
|
### 1. Graph
|
||||||
|
|
||||||
|
A data structure representing the workflow definition, independent of execution.
|
||||||
|
|
||||||
|
**Responsibilities:**
|
||||||
|
- Store nodes and edges
|
||||||
|
- Provide graph traversal methods
|
||||||
|
- Parse from dictionary representation
|
||||||
|
- Validate graph structure (cycles, connectivity)
|
||||||
|
|
||||||
|
**Key Methods:**
|
||||||
|
```python
|
||||||
|
class Graph:
|
||||||
|
nodes: Dict[str, BaseNode]
|
||||||
|
edges: List[Edge]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict[str, Any]) -> 'Graph':
|
||||||
|
"""Parse a dictionary to create a Graph instance"""
|
||||||
|
|
||||||
|
def get_node(self, node_id: str) -> BaseNode:
|
||||||
|
"""Get node by ID"""
|
||||||
|
|
||||||
|
def get_edges_from(self, node_id: str) -> List[Edge]:
|
||||||
|
"""Get all outgoing edges from a node"""
|
||||||
|
|
||||||
|
def get_edges_to(self, node_id: str) -> List[Edge]:
|
||||||
|
"""Get all incoming edges to a node"""
|
||||||
|
|
||||||
|
def validate(self) -> None:
|
||||||
|
"""Validate graph structure"""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. GraphEngine
|
||||||
|
|
||||||
|
The execution engine that processes a Graph using queue-based scheduling.
|
||||||
|
|
||||||
|
**Responsibilities:**
|
||||||
|
- Execute graphs using queue-based scheduling
|
||||||
|
- Manage execution queues (ready_q, running_q, completed_q)
|
||||||
|
- Schedule nodes based on preconditions
|
||||||
|
- Handle event propagation
|
||||||
|
- Support suspend/resume operations
|
||||||
|
|
||||||
|
**Key Methods:**
|
||||||
|
```python
|
||||||
|
class GraphEngine:
|
||||||
|
def __init__(self, graph: Graph, runtime_state: GraphRuntimeState):
|
||||||
|
"""Initialize with graph and runtime state"""
|
||||||
|
|
||||||
|
def run(self, start_node_id: str) -> None:
|
||||||
|
"""Start execution from specified node"""
|
||||||
|
|
||||||
|
def resume(self) -> None:
|
||||||
|
"""Resume execution from saved state"""
|
||||||
|
|
||||||
|
def suspend(self) -> GraphRuntimeState:
|
||||||
|
"""Suspend execution and return current state"""
|
||||||
|
|
||||||
|
def schedule(self) -> None:
|
||||||
|
"""Main scheduling loop - moves nodes between queues"""
|
||||||
|
|
||||||
|
def check_preconditions(self, node: BaseNode) -> bool:
|
||||||
|
"""Check if node's preconditions are satisfied"""
|
||||||
|
|
||||||
|
def execute_node(self, node: ExecutableNode) -> None:
|
||||||
|
"""Execute a single node"""
|
||||||
|
```
|
||||||
|
|
||||||
|
**Queue Management:**
|
||||||
|
- `ready_q`: Nodes whose preconditions are satisfied
|
||||||
|
- `running_q`: Currently executing nodes
|
||||||
|
- `completed_q`: Successfully completed nodes
|
||||||
|
- `failed_q`: Failed nodes for retry or error handling
|
||||||
|
|
||||||
|
### 3. GraphRuntimeState
|
||||||
|
|
||||||
|
Captures the complete execution state for suspend/resume functionality.
|
||||||
|
|
||||||
|
**Responsibilities:**
|
||||||
|
- Store queue states
|
||||||
|
- Track node execution status
|
||||||
|
- Maintain variable pool state
|
||||||
|
- Support serialization/deserialization
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class GraphRuntimeState:
|
||||||
|
# Queue states
|
||||||
|
ready_nodes: List[str]
|
||||||
|
running_nodes: List[str]
|
||||||
|
completed_nodes: List[str]
|
||||||
|
failed_nodes: List[str]
|
||||||
|
|
||||||
|
# Node execution states
|
||||||
|
node_states: Dict[str, NodeExecutionState]
|
||||||
|
|
||||||
|
# Variable pool
|
||||||
|
variables: Dict[str, Any]
|
||||||
|
|
||||||
|
# Execution metadata
|
||||||
|
start_time: datetime
|
||||||
|
last_checkpoint: datetime
|
||||||
|
execution_id: str
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
"""Serialize state to dictionary"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict[str, Any]) -> 'GraphRuntimeState':
|
||||||
|
"""Deserialize state from dictionary"""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Edge
|
||||||
|
|
||||||
|
Represents connections between nodes in the graph.
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class Edge:
|
||||||
|
source_node_id: str
|
||||||
|
target_node_id: str
|
||||||
|
condition: Optional[Callable[[GraphEngine], bool]] = None
|
||||||
|
priority: int = 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Node Hierarchy
|
||||||
|
|
||||||
|
#### BaseNode
|
||||||
|
|
||||||
|
Abstract base class for all nodes.
|
||||||
|
|
||||||
|
```python
|
||||||
|
class BaseNode(ABC):
|
||||||
|
node_id: str
|
||||||
|
node_type: NodeType
|
||||||
|
data: dict[str, Any]
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def preconditions(self) -> List[Callable[[GraphEngine], bool]]:
|
||||||
|
"""Return list of precondition checks"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def from_dict(cls, data: dict[str, Any]) -> Self:
|
||||||
|
"""Factory method for node creation with validation"""
|
||||||
|
|
||||||
|
def get_dependencies(self) -> List[str]:
|
||||||
|
"""Return list of node IDs this node depends on"""
|
||||||
|
```
|
||||||
|
|
||||||
|
#### ExecutableNode
|
||||||
|
|
||||||
|
Nodes that perform actual work.
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ExecutableNode(BaseNode):
|
||||||
|
@abstractmethod
|
||||||
|
async def execute(self, context: ExecutionContext) -> NodeResult:
|
||||||
|
"""Execute the node's logic"""
|
||||||
|
|
||||||
|
def preconditions(self) -> List[Callable[[GraphEngine], bool]]:
|
||||||
|
"""Default: all upstream nodes must be completed"""
|
||||||
|
return [
|
||||||
|
lambda engine: all(
|
||||||
|
engine.is_node_completed(dep_id)
|
||||||
|
for dep_id in self.get_dependencies()
|
||||||
|
)
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Special Node Types
|
||||||
|
|
||||||
|
#### StartNode
|
||||||
|
- **Characteristics:**
|
||||||
|
- Always has satisfied preconditions
|
||||||
|
- No dependencies
|
||||||
|
- Triggers initial execution
|
||||||
|
- Only one per workflow
|
||||||
|
- Cannot be skipped
|
||||||
|
|
||||||
|
```python
|
||||||
|
class StartNode(ExecutableNode):
|
||||||
|
def preconditions(self) -> List[Callable[[GraphEngine], bool]]:
|
||||||
|
return [] # Always ready
|
||||||
|
|
||||||
|
async def execute(self, context: ExecutionContext) -> NodeResult:
|
||||||
|
return NodeResult(status="success", outputs={})
|
||||||
|
```
|
||||||
|
|
||||||
|
#### EndNode
|
||||||
|
- **Characteristics:**
|
||||||
|
- Marks workflow completion
|
||||||
|
- Can have multiple end nodes (different paths)
|
||||||
|
- Triggers cleanup operations
|
||||||
|
- Collects final outputs
|
||||||
|
|
||||||
|
```python
|
||||||
|
class EndNode(ExecutableNode):
|
||||||
|
def preconditions(self) -> List[Callable[[GraphEngine], bool]]:
|
||||||
|
# At least one path must reach this node
|
||||||
|
return [lambda engine: any(
|
||||||
|
engine.is_node_completed(dep_id)
|
||||||
|
for dep_id in self.get_dependencies()
|
||||||
|
)]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Execution Flow
|
||||||
|
|
||||||
|
### 1. Graph Creation Phase
|
||||||
|
```python
|
||||||
|
# Parse from dictionary
|
||||||
|
graph_data = {
|
||||||
|
"nodes": [...],
|
||||||
|
"edges": [...]
|
||||||
|
}
|
||||||
|
graph = Graph.from_dict(graph_data)
|
||||||
|
graph.validate()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Engine Initialization
|
||||||
|
```python
|
||||||
|
# Fresh start
|
||||||
|
runtime_state = GraphRuntimeState.create_new()
|
||||||
|
engine = GraphEngine(graph, runtime_state)
|
||||||
|
engine.run(start_node_id="start_node")
|
||||||
|
|
||||||
|
# Resume from suspended state
|
||||||
|
saved_state = GraphRuntimeState.from_dict(saved_state_dict)
|
||||||
|
engine = GraphEngine(graph, saved_state)
|
||||||
|
engine.resume()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Scheduling Loop
|
||||||
|
```
|
||||||
|
while not all_nodes_processed():
|
||||||
|
# Check completed nodes and update downstream
|
||||||
|
for node_id in completed_q:
|
||||||
|
for edge in graph.get_edges_from(node_id):
|
||||||
|
target_node = graph.get_node(edge.target_node_id)
|
||||||
|
if check_preconditions(target_node):
|
||||||
|
ready_q.add(target_node)
|
||||||
|
|
||||||
|
# Execute ready nodes
|
||||||
|
while ready_q and has_available_workers():
|
||||||
|
node = ready_q.pop()
|
||||||
|
running_q.add(node)
|
||||||
|
submit_to_worker(node)
|
||||||
|
|
||||||
|
# Handle timeouts and failures
|
||||||
|
check_timeouts()
|
||||||
|
process_failed_nodes()
|
||||||
|
|
||||||
|
# Checkpoint state periodically
|
||||||
|
if should_checkpoint():
|
||||||
|
runtime_state.update_from_engine(engine)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Node Execution
|
||||||
|
```
|
||||||
|
1. Worker picks up node from queue
|
||||||
|
2. Validates preconditions (double-check)
|
||||||
|
3. Execute node logic
|
||||||
|
4. Update variable pool in runtime state
|
||||||
|
5. Emit events
|
||||||
|
6. Move to completed_q or failed_q
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Suspend/Resume Operations
|
||||||
|
```python
|
||||||
|
# Suspend
|
||||||
|
current_state = engine.suspend()
|
||||||
|
state_dict = current_state.to_dict()
|
||||||
|
# Save state_dict to database/storage
|
||||||
|
|
||||||
|
# Resume
|
||||||
|
saved_state = GraphRuntimeState.from_dict(state_dict)
|
||||||
|
engine = GraphEngine(graph, saved_state)
|
||||||
|
engine.resume()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Plan
|
||||||
|
|
||||||
|
### Phase 1: Core Infrastructure
|
||||||
|
1. Implement Graph class with `from_dict` parsing
|
||||||
|
2. Implement base node classes (BaseNode, ExecutableNode)
|
||||||
|
3. Create GraphRuntimeState for state management
|
||||||
|
4. Build precondition framework
|
||||||
|
5. Implement GraphEngine with queue management
|
||||||
|
|
||||||
|
### Phase 2: Node Migration
|
||||||
|
1. Convert existing nodes to ExecutableNode
|
||||||
|
2. Implement `from_dict` methods for each node type
|
||||||
|
3. Add precondition logic to each node type
|
||||||
|
4. Update node execution to async
|
||||||
|
|
||||||
|
### Phase 3: State Management
|
||||||
|
1. Implement suspend/resume functionality
|
||||||
|
2. Add checkpoint mechanisms
|
||||||
|
3. Create state persistence layer
|
||||||
|
4. Add recovery mechanisms
|
||||||
|
|
||||||
|
### Phase 4: Advanced Features
|
||||||
|
1. Priority-based scheduling
|
||||||
|
2. Resource-aware execution
|
||||||
|
3. Distributed execution support
|
||||||
|
4. Advanced retry mechanisms
|
||||||
|
5. Performance optimizations
|
||||||
|
|
||||||
|
## Benefits
|
||||||
|
|
||||||
|
1. **Scalability**: Easy to distribute across workers
|
||||||
|
2. **Resource Management**: Better control over concurrent executions
|
||||||
|
3. **Observability**: Clear queue states for monitoring
|
||||||
|
4. **Fault Tolerance**: Failed nodes can be retried independently
|
||||||
|
5. **Flexibility**: Easy to add new scheduling strategies
|
||||||
|
|
||||||
|
## Migration Strategy
|
||||||
|
|
||||||
|
1. **Compatibility Layer**: Maintain backward compatibility during transition
|
||||||
|
2. **Feature Flag**: Toggle between old and new engines
|
||||||
|
3. **Gradual Rollout**: Test with simple workflows first
|
||||||
|
4. **Performance Monitoring**: Compare metrics between implementations
|
||||||
|
5. **Rollback Plan**: Easy switch back to old engine if needed
|
||||||
|
|
||||||
|
## Key Design Decisions
|
||||||
|
|
||||||
|
1. **Graph and GraphEngine Separation**: Graph is a pure data structure, GraphEngine handles execution
|
||||||
|
2. **GraphRuntimeState**: Enables suspend/resume functionality with complete state capture
|
||||||
|
3. **No ContainerNode Initially**: Focus on core execution model first
|
||||||
|
4. **Queue-Based Scheduling**: Better resource management and scalability than thread-based approach
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. **Queue Backend**: In-memory, Redis, or database-backed?
|
||||||
|
2. **Worker Pool**: Thread-based, process-based, or distributed?
|
||||||
|
3. **State Persistence**: How frequently to checkpoint?
|
||||||
|
4. **Event System**: Keep current or migrate to message queue?
|
||||||
|
5. **Priority Algorithm**: Simple numeric or complex scoring?
|
||||||
|
6. **Graph Parsing**: Should we reuse existing node parsing logic or create new?
|
||||||
|
|
||||||
|
## Additional Components
|
||||||
|
|
||||||
|
### 7. ExecutionContext
|
||||||
|
|
||||||
|
Provides runtime context for node execution.
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class ExecutionContext:
|
||||||
|
graph_engine: GraphEngine
|
||||||
|
runtime_state: GraphRuntimeState
|
||||||
|
variable_pool: VariablePool
|
||||||
|
user_id: str
|
||||||
|
workflow_id: str
|
||||||
|
execution_id: str
|
||||||
|
|
||||||
|
def get_variable(self, key: str) -> Any:
|
||||||
|
"""Get variable from pool"""
|
||||||
|
return self.variable_pool.get(key)
|
||||||
|
|
||||||
|
def set_variable(self, key: str, value: Any) -> None:
|
||||||
|
"""Set variable in pool"""
|
||||||
|
self.variable_pool.set(key, value)
|
||||||
|
|
||||||
|
def emit_event(self, event: BaseEvent) -> None:
|
||||||
|
"""Emit event to engine"""
|
||||||
|
self.graph_engine.handle_event(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8. NodeExecutionState
|
||||||
|
|
||||||
|
Tracks individual node execution state.
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class NodeExecutionState:
|
||||||
|
node_id: str
|
||||||
|
status: Literal["pending", "running", "completed", "failed", "skipped"]
|
||||||
|
start_time: Optional[datetime] = None
|
||||||
|
end_time: Optional[datetime] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
outputs: Optional[dict[str, Any]] = None
|
||||||
|
retry_count: int = 0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
"""Serialize to dictionary"""
|
||||||
|
return {
|
||||||
|
"node_id": self.node_id,
|
||||||
|
"status": self.status,
|
||||||
|
"start_time": self.start_time.isoformat() if self.start_time else None,
|
||||||
|
"end_time": self.end_time.isoformat() if self.end_time else None,
|
||||||
|
"error": self.error,
|
||||||
|
"outputs": self.outputs,
|
||||||
|
"retry_count": self.retry_count
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9. Event System Integration
|
||||||
|
|
||||||
|
```python
|
||||||
|
class GraphEngine:
|
||||||
|
def handle_event(self, event: BaseEvent) -> None:
|
||||||
|
"""Handle events from nodes"""
|
||||||
|
if isinstance(event, NodeRunStartedEvent):
|
||||||
|
self._on_node_started(event)
|
||||||
|
elif isinstance(event, NodeRunSucceededEvent):
|
||||||
|
self._on_node_succeeded(event)
|
||||||
|
elif isinstance(event, NodeRunFailedEvent):
|
||||||
|
self._on_node_failed(event)
|
||||||
|
|
||||||
|
# Propagate to external listeners
|
||||||
|
self._event_emitter.emit(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10. VariablePool
|
||||||
|
|
||||||
|
Manages shared variables between nodes.
|
||||||
|
|
||||||
|
```python
|
||||||
|
class VariablePool:
|
||||||
|
def __init__(self, initial_vars: Optional[dict[str, Any]] = None):
|
||||||
|
self._variables: dict[str, Any] = initial_vars or {}
|
||||||
|
self._locks: dict[str, threading.Lock] = defaultdict(threading.Lock)
|
||||||
|
|
||||||
|
def get(self, key: str, default: Any = None) -> Any:
|
||||||
|
"""Thread-safe variable retrieval"""
|
||||||
|
with self._locks[key]:
|
||||||
|
return self._variables.get(key, default)
|
||||||
|
|
||||||
|
def set(self, key: str, value: Any) -> None:
|
||||||
|
"""Thread-safe variable setting"""
|
||||||
|
with self._locks[key]:
|
||||||
|
self._variables[key] = value
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
"""Serialize for state persistence"""
|
||||||
|
return self._variables.copy()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example: Graph.from_dict Implementation
|
||||||
|
|
||||||
|
Based on the existing workflow structure, here's how Graph.from_dict would parse the data:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict[str, Any]) -> 'Graph':
|
||||||
|
"""
|
||||||
|
Parse workflow data dictionary to create a Graph instance.
|
||||||
|
|
||||||
|
Expected format:
|
||||||
|
{
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"id": "node_123",
|
||||||
|
"data": {
|
||||||
|
"type": "llm",
|
||||||
|
"title": "LLM Node",
|
||||||
|
...node specific config...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"edges": [
|
||||||
|
{
|
||||||
|
"id": "edge_123",
|
||||||
|
"source": "node_1",
|
||||||
|
"target": "node_2",
|
||||||
|
"sourceHandle": "source",
|
||||||
|
"targetHandle": "target"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
nodes = {}
|
||||||
|
edges = []
|
||||||
|
|
||||||
|
# Parse nodes
|
||||||
|
for node_data in data.get("nodes", []):
|
||||||
|
node_id = node_data["id"]
|
||||||
|
node_type = node_data["data"]["type"]
|
||||||
|
|
||||||
|
# Get the appropriate node class based on type
|
||||||
|
node_class = NODE_TYPE_MAPPING.get(node_type)
|
||||||
|
if not node_class:
|
||||||
|
raise ValueError(f"Unknown node type: {node_type}")
|
||||||
|
|
||||||
|
# Create node instance
|
||||||
|
node = node_class.from_dict(node_data["data"])
|
||||||
|
node.node_id = node_id
|
||||||
|
nodes[node_id] = node
|
||||||
|
|
||||||
|
# Parse edges
|
||||||
|
for edge_data in data.get("edges", []):
|
||||||
|
edge = Edge(
|
||||||
|
source_node_id=edge_data["source"],
|
||||||
|
target_node_id=edge_data["target"],
|
||||||
|
# Additional edge properties can be added here
|
||||||
|
)
|
||||||
|
edges.append(edge)
|
||||||
|
|
||||||
|
graph = cls(nodes=nodes, edges=edges)
|
||||||
|
graph.validate()
|
||||||
|
return graph
|
||||||
|
```
|
||||||
|
|
||||||
|
## Detailed Implementation Example
|
||||||
|
|
||||||
|
### Queue-Based Scheduling Algorithm
|
||||||
|
|
||||||
|
```python
|
||||||
|
class GraphEngine:
|
||||||
|
def __init__(self, graph: Graph, runtime_state: GraphRuntimeState):
|
||||||
|
self.graph = graph
|
||||||
|
self.runtime_state = runtime_state
|
||||||
|
|
||||||
|
# Initialize queues from runtime state
|
||||||
|
self.ready_q = deque(runtime_state.ready_nodes)
|
||||||
|
self.running_q = set(runtime_state.running_nodes)
|
||||||
|
self.completed_q = set(runtime_state.completed_nodes)
|
||||||
|
self.failed_q = set(runtime_state.failed_nodes)
|
||||||
|
|
||||||
|
# Worker pool
|
||||||
|
self.executor = ThreadPoolExecutor(max_workers=10)
|
||||||
|
self.futures: Dict[str, Future] = {}
|
||||||
|
|
||||||
|
def schedule(self) -> None:
|
||||||
|
"""Main scheduling loop"""
|
||||||
|
while self._has_pending_nodes():
|
||||||
|
# Move nodes from completed to ready if preconditions met
|
||||||
|
self._update_ready_queue()
|
||||||
|
|
||||||
|
# Submit ready nodes to workers
|
||||||
|
while self.ready_q and self._has_available_workers():
|
||||||
|
node_id = self.ready_q.popleft()
|
||||||
|
self._submit_node(node_id)
|
||||||
|
|
||||||
|
# Wait for at least one node to complete
|
||||||
|
if self.futures:
|
||||||
|
done, _ = concurrent.futures.wait(
|
||||||
|
self.futures.values(),
|
||||||
|
timeout=0.1,
|
||||||
|
return_when=concurrent.futures.FIRST_COMPLETED
|
||||||
|
)
|
||||||
|
|
||||||
|
for future in done:
|
||||||
|
node_id = self._get_node_id_from_future(future)
|
||||||
|
self._handle_node_completion(node_id, future)
|
||||||
|
|
||||||
|
# Checkpoint periodically
|
||||||
|
if self._should_checkpoint():
|
||||||
|
self._checkpoint()
|
||||||
|
|
||||||
|
def _update_ready_queue(self) -> None:
|
||||||
|
"""Check all nodes and move to ready queue if preconditions met"""
|
||||||
|
for node_id in self.graph.nodes:
|
||||||
|
if (node_id not in self.ready_q and
|
||||||
|
node_id not in self.running_q and
|
||||||
|
node_id not in self.completed_q and
|
||||||
|
node_id not in self.failed_q):
|
||||||
|
|
||||||
|
node = self.graph.get_node(node_id)
|
||||||
|
if self._check_preconditions(node):
|
||||||
|
self.ready_q.append(node_id)
|
||||||
|
|
||||||
|
def _check_preconditions(self, node: BaseNode) -> bool:
|
||||||
|
"""Check if all preconditions are satisfied"""
|
||||||
|
for condition in node.preconditions():
|
||||||
|
if not condition(self):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _submit_node(self, node_id: str) -> None:
|
||||||
|
"""Submit node for execution"""
|
||||||
|
self.running_q.add(node_id)
|
||||||
|
self.runtime_state.node_states[node_id].status = "running"
|
||||||
|
self.runtime_state.node_states[node_id].start_time = datetime.now()
|
||||||
|
|
||||||
|
future = self.executor.submit(self._execute_node, node_id)
|
||||||
|
self.futures[node_id] = future
|
||||||
|
```
|
||||||
|
|
||||||
|
### Node Result Handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class NodeResult:
|
||||||
|
status: Literal["success", "failed", "skipped"]
|
||||||
|
outputs: Optional[dict[str, Any]] = None
|
||||||
|
error: Optional[Exception] = None
|
||||||
|
|
||||||
|
class GraphEngine:
|
||||||
|
def _execute_node(self, node_id: str) -> NodeResult:
|
||||||
|
"""Execute a single node"""
|
||||||
|
try:
|
||||||
|
node = self.graph.get_node(node_id)
|
||||||
|
context = self._create_execution_context(node_id)
|
||||||
|
|
||||||
|
# Emit start event
|
||||||
|
context.emit_event(NodeRunStartedEvent(
|
||||||
|
node_id=node_id,
|
||||||
|
node_type=node.node_type,
|
||||||
|
timestamp=datetime.now()
|
||||||
|
))
|
||||||
|
|
||||||
|
# Execute node
|
||||||
|
if isinstance(node, ExecutableNode):
|
||||||
|
result = asyncio.run(node.execute(context))
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Node {node_id} is not executable")
|
||||||
|
|
||||||
|
# Emit success event
|
||||||
|
context.emit_event(NodeRunSucceededEvent(
|
||||||
|
node_id=node_id,
|
||||||
|
outputs=result.outputs,
|
||||||
|
timestamp=datetime.now()
|
||||||
|
))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Emit failure event
|
||||||
|
context.emit_event(NodeRunFailedEvent(
|
||||||
|
node_id=node_id,
|
||||||
|
error=str(e),
|
||||||
|
timestamp=datetime.now()
|
||||||
|
))
|
||||||
|
|
||||||
|
return NodeResult(status="failed", error=e)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. Review and approve specification
|
||||||
|
2. Create detailed API documentation
|
||||||
|
3. Set up development environment
|
||||||
|
4. Begin Phase 1 implementation
|
||||||
|
5. Create comprehensive test suite
|
||||||
Loading…
Reference in New Issue