feat: Create a APIWorkflowRunRepository
Signed-off-by: -LAN- <laipz8200@outlook.com>pull/21458/head
parent
b2b4049279
commit
b450acf94b
@ -0,0 +1,180 @@
|
||||
"""
|
||||
API WorkflowRun Repository Protocol
|
||||
|
||||
This module defines the protocol for service-layer WorkflowRun operations.
|
||||
The repository provides an abstraction layer for WorkflowRun database operations
|
||||
used by service classes, separating service-layer concerns from core domain logic.
|
||||
|
||||
Key Features:
|
||||
- Paginated workflow run queries with filtering
|
||||
- Bulk deletion operations with OSS backup support
|
||||
- Multi-tenant data isolation
|
||||
- Expired record cleanup with data retention
|
||||
- Service-layer specific query patterns
|
||||
|
||||
Usage:
|
||||
This protocol should be used by service classes that need to perform
|
||||
WorkflowRun database operations. It provides a clean interface that
|
||||
hides implementation details and supports dependency injection.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
|
||||
|
||||
session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
|
||||
repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
|
||||
|
||||
# Get paginated workflow runs
|
||||
runs = repo.get_paginated_workflow_runs(
|
||||
tenant_id="tenant-123",
|
||||
app_id="app-456",
|
||||
triggered_from="debugging",
|
||||
limit=20
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional, Protocol
|
||||
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.workflow import WorkflowRun
|
||||
|
||||
|
||||
class APIWorkflowRunRepository(Protocol):
|
||||
"""
|
||||
Protocol for service-layer WorkflowRun repository operations.
|
||||
|
||||
This protocol defines the interface for WorkflowRun database operations
|
||||
that are specific to service-layer needs, including pagination, filtering,
|
||||
and bulk operations with data backup support.
|
||||
"""
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
limit: int = 20,
|
||||
last_id: Optional[str] = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Retrieves workflow runs for a specific app and trigger source with
|
||||
cursor-based pagination support. Used primarily for debugging and
|
||||
workflow run listing in the UI.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
|
||||
limit: Maximum number of records to return (default: 20)
|
||||
last_id: Cursor for pagination - ID of the last record from previous page
|
||||
|
||||
Returns:
|
||||
InfiniteScrollPagination object containing:
|
||||
- data: List of WorkflowRun objects
|
||||
- limit: Applied limit
|
||||
- has_more: Boolean indicating if more records exist
|
||||
|
||||
Raises:
|
||||
ValueError: If last_id is provided but the corresponding record doesn't exist
|
||||
"""
|
||||
...
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> Optional[WorkflowRun]:
|
||||
"""
|
||||
Get a specific workflow run by ID.
|
||||
|
||||
Retrieves a single workflow run with tenant and app isolation.
|
||||
Used for workflow run detail views and execution tracking.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
run_id: Workflow run identifier
|
||||
|
||||
Returns:
|
||||
WorkflowRun object if found, None otherwise
|
||||
"""
|
||||
...
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup.
|
||||
|
||||
Retrieves workflow runs created before the specified date for
|
||||
cleanup operations. Used by scheduled tasks to remove old data
|
||||
while maintaining data retention policies.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
before_date: Only return runs created before this date
|
||||
batch_size: Maximum number of records to return
|
||||
|
||||
Returns:
|
||||
Sequence of WorkflowRun objects to be processed for cleanup
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs.
|
||||
|
||||
Performs bulk deletion of workflow runs by ID. This method should
|
||||
be used after backing up the data to OSS storage for retention.
|
||||
|
||||
Args:
|
||||
run_ids: Sequence of workflow run IDs to delete
|
||||
|
||||
Returns:
|
||||
Number of records actually deleted
|
||||
|
||||
Note:
|
||||
This method performs hard deletion. Ensure data is backed up
|
||||
to OSS storage before calling this method for compliance with
|
||||
data retention policies.
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app.
|
||||
|
||||
Performs bulk deletion of all workflow runs associated with an app.
|
||||
Used during app cleanup operations. Processes records in batches
|
||||
to avoid memory issues and long-running transactions.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
batch_size: Number of records to process in each batch
|
||||
|
||||
Returns:
|
||||
Total number of records deleted across all batches
|
||||
|
||||
Note:
|
||||
This method performs hard deletion without backup. Use with caution
|
||||
and ensure proper data retention policies are followed.
|
||||
"""
|
||||
...
|
||||
@ -0,0 +1,218 @@
|
||||
"""
|
||||
SQLAlchemy API WorkflowRun Repository Implementation
|
||||
|
||||
This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
|
||||
protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries with proper session management and multi-tenant data isolation.
|
||||
|
||||
Key Features:
|
||||
- SQLAlchemy 2.0 style queries for modern database operations
|
||||
- Cursor-based pagination for efficient large dataset handling
|
||||
- Bulk operations with batch processing for performance
|
||||
- Multi-tenant data isolation and security
|
||||
- Proper session management with dependency injection
|
||||
|
||||
Implementation Notes:
|
||||
- Uses sessionmaker for consistent session management
|
||||
- Implements cursor-based pagination using created_at timestamps
|
||||
- Provides efficient bulk deletion with batch processing
|
||||
- Maintains data consistency with proper transaction handling
|
||||
"""
|
||||
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Optional, cast
|
||||
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.workflow import WorkflowRun
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DifyAPISQLAlchemyWorkflowRunRepository:
|
||||
"""
|
||||
SQLAlchemy implementation of APIWorkflowRunRepository.
|
||||
|
||||
Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries. Supports dependency injection through sessionmaker and
|
||||
maintains proper multi-tenant data isolation.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker instance for database connections
|
||||
"""
|
||||
|
||||
def __init__(self, session_maker: sessionmaker) -> None:
|
||||
"""
|
||||
Initialize the repository with a sessionmaker.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker for database connections
|
||||
"""
|
||||
self._session_maker = session_maker
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
limit: int = 20,
|
||||
last_id: Optional[str] = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Implements cursor-based pagination using created_at timestamps for
|
||||
efficient handling of large datasets. Filters by tenant, app, and
|
||||
trigger source for proper data isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
# Build base query with filters
|
||||
base_stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.triggered_from == triggered_from,
|
||||
)
|
||||
|
||||
if last_id:
|
||||
# Get the last workflow run for cursor-based pagination
|
||||
last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
|
||||
last_workflow_run = session.scalar(last_run_stmt)
|
||||
|
||||
if not last_workflow_run:
|
||||
raise ValueError("Last workflow run not exists")
|
||||
|
||||
# Get records created before the last run's timestamp
|
||||
workflow_runs = session.scalars(
|
||||
base_stmt.where(
|
||||
WorkflowRun.created_at < last_workflow_run.created_at,
|
||||
WorkflowRun.id != last_workflow_run.id,
|
||||
)
|
||||
.order_by(WorkflowRun.created_at.desc())
|
||||
.limit(limit)
|
||||
).all()
|
||||
else:
|
||||
# First page - get most recent records
|
||||
workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit)).all()
|
||||
|
||||
# Check if there are more records for pagination
|
||||
has_more = False
|
||||
if len(workflow_runs) == limit:
|
||||
current_page_last_run = workflow_runs[-1]
|
||||
remaining_count = session.scalar(
|
||||
select(WorkflowRun.id)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.triggered_from == triggered_from,
|
||||
WorkflowRun.created_at < current_page_last_run.created_at,
|
||||
WorkflowRun.id != current_page_last_run.id,
|
||||
)
|
||||
.limit(1)
|
||||
)
|
||||
has_more = remaining_count is not None
|
||||
|
||||
return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> Optional[WorkflowRun]:
|
||||
"""
|
||||
Get a specific workflow run by ID with tenant and app isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.id == run_id,
|
||||
)
|
||||
return cast(Optional[WorkflowRun], session.scalar(stmt))
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup operations.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = (
|
||||
select(WorkflowRun)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
return cast(Sequence[WorkflowRun], session.scalars(stmt).all())
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs using bulk deletion.
|
||||
"""
|
||||
if not run_ids:
|
||||
return 0
|
||||
|
||||
with self._session_maker() as session:
|
||||
stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
deleted_count = cast(int, result.rowcount)
|
||||
logger.info(f"Deleted {deleted_count} workflow runs by IDs")
|
||||
return deleted_count
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app in batches.
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Get a batch of run IDs to delete
|
||||
stmt = (
|
||||
select(WorkflowRun.id)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
run_ids = session.scalars(stmt).all()
|
||||
|
||||
if not run_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = session.execute(delete_stmt)
|
||||
session.commit()
|
||||
|
||||
batch_deleted = result.rowcount
|
||||
total_deleted += batch_deleted
|
||||
|
||||
logger.info(f"Deleted batch of {batch_deleted} workflow runs for app {app_id}")
|
||||
|
||||
# If we deleted fewer records than the batch size, we're done
|
||||
if batch_deleted < batch_size:
|
||||
break
|
||||
|
||||
logger.info(f"Total deleted {total_deleted} workflow runs for app {app_id}")
|
||||
return total_deleted
|
||||
Loading…
Reference in New Issue