diff --git a/api/controllers/web/passport.py b/api/controllers/web/passport.py index 4625c1f43d..e30998c803 100644 --- a/api/controllers/web/passport.py +++ b/api/controllers/web/passport.py @@ -19,6 +19,8 @@ class PassportResource(Resource): def get(self): system_features = FeatureService.get_system_features() app_code = request.headers.get("X-App-Code") + user_id = request.args.get("user_id") + if app_code is None: raise Unauthorized("X-App-Code header is missing.") @@ -36,16 +38,33 @@ class PassportResource(Resource): if not app_model or app_model.status != "normal" or not app_model.enable_site: raise NotFound() - end_user = EndUser( - tenant_id=app_model.tenant_id, - app_id=app_model.id, - type="browser", - is_anonymous=True, - session_id=generate_session_id(), - ) - - db.session.add(end_user) - db.session.commit() + if user_id: + end_user = ( + db.session.query(EndUser).filter(EndUser.app_id == app_model.id, EndUser.session_id == user_id).first() + ) + + if end_user: + pass + else: + end_user = EndUser( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + type="browser", + is_anonymous=True, + session_id=user_id, + ) + db.session.add(end_user) + db.session.commit() + else: + end_user = EndUser( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + type="browser", + is_anonymous=True, + session_id=generate_session_id(), + ) + db.session.add(end_user) + db.session.commit() payload = { "iss": site.app_id, diff --git a/api/core/agent/output_parser/cot_output_parser.py b/api/core/agent/output_parser/cot_output_parser.py index 61fa774ea5..7c8f09e6b9 100644 --- a/api/core/agent/output_parser/cot_output_parser.py +++ b/api/core/agent/output_parser/cot_output_parser.py @@ -12,39 +12,45 @@ class CotAgentOutputParser: def handle_react_stream_output( cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: - def parse_action(json_str): - try: - action = json.loads(json_str, strict=False) - action_name = None - action_input = None - - # cohere always returns a list - if isinstance(action, list) and len(action) == 1: - action = action[0] - - for key, value in action.items(): - if "input" in key.lower(): - action_input = value - else: - action_name = value - - if action_name is not None and action_input is not None: - return AgentScratchpadUnit.Action( - action_name=action_name, - action_input=action_input, - ) + def parse_action(action) -> Union[str, AgentScratchpadUnit.Action]: + action_name = None + action_input = None + if isinstance(action, str): + try: + action = json.loads(action, strict=False) + except json.JSONDecodeError: + return action or "" + + # cohere always returns a list + if isinstance(action, list) and len(action) == 1: + action = action[0] + + for key, value in action.items(): + if "input" in key.lower(): + action_input = value else: - return json_str or "" + action_name = value + + if action_name is not None and action_input is not None: + return AgentScratchpadUnit.Action( + action_name=action_name, + action_input=action_input, + ) + else: + return json.dumps(action) + + def extra_json_from_code_block(code_block) -> list[Union[list, dict]]: + blocks = re.findall(r"```[json]*\s*([\[{].*[]}])\s*```", code_block, re.DOTALL | re.IGNORECASE) + if not blocks: + return [] + try: + json_blocks = [] + for block in blocks: + json_text = re.sub(r"^[a-zA-Z]+\n", "", block.strip(), flags=re.MULTILINE) + json_blocks.append(json.loads(json_text, strict=False)) + return json_blocks except: - return json_str or "" - - def extra_json_from_code_block(code_block) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: - code_blocks = re.findall(r"```(.*?)```", code_block, re.DOTALL) - if not code_blocks: - return - for block in code_blocks: - json_text = re.sub(r"^[a-zA-Z]+\n", "", block.strip(), flags=re.MULTILINE) - yield parse_action(json_text) + return [] code_block_cache = "" code_block_delimiter_count = 0 @@ -78,7 +84,7 @@ class CotAgentOutputParser: delta = response_content[index : index + steps] yield_delta = False - if delta == "`": + if not in_json and delta == "`": last_character = delta code_block_cache += delta code_block_delimiter_count += 1 @@ -159,8 +165,14 @@ class CotAgentOutputParser: if code_block_delimiter_count == 3: if in_code_block: last_character = delta - yield from extra_json_from_code_block(code_block_cache) - code_block_cache = "" + action_json_list = extra_json_from_code_block(code_block_cache) + if action_json_list: + for action_json in action_json_list: + yield parse_action(action_json) + code_block_cache = "" + else: + index += steps + continue in_code_block = not in_code_block code_block_delimiter_count = 0 diff --git a/api/core/ops/entities/config_entity.py b/api/core/ops/entities/config_entity.py index 9fa796595b..874b2800b2 100644 --- a/api/core/ops/entities/config_entity.py +++ b/api/core/ops/entities/config_entity.py @@ -88,26 +88,6 @@ class OpikConfig(BaseTracingConfig): return v -class WeaveConfig(BaseTracingConfig): - """ - Model class for Weave tracing config. - """ - - api_key: str - entity: str | None = None - project: str - endpoint: str = "https://trace.wandb.ai" - - @field_validator("endpoint") - @classmethod - def set_value(cls, v, info: ValidationInfo): - if v is None or v == "": - v = "https://trace.wandb.ai" - if not v.startswith("https://"): - raise ValueError("endpoint must start with https://") - - return v - class WeaveConfig(BaseTracingConfig): """ diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index b023408b06..7a70f45630 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -19,7 +19,6 @@ from core.ops.entities.config_entity import ( LangfuseConfig, LangSmithConfig, OpikConfig, - WeaveConfig, TracingProviderEnum, WeaveConfig, ) @@ -36,7 +35,6 @@ from core.ops.entities.trace_entity import ( ) from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace -from core.ops.weave_trace.weave_trace import WeaveDataTrace from core.ops.opik_trace.opik_trace import OpikDataTrace from core.ops.utils import get_message_data from core.ops.weave_trace.weave_trace import WeaveDataTrace @@ -48,8 +46,6 @@ from tasks.ops_trace_task import process_trace_tasks def build_opik_trace_instance(config: OpikConfig): - from core.ops.opik_trace.opik_trace import OpikDataTrace - return OpikDataTrace(config) diff --git a/api/factories/file_factory.py b/api/factories/file_factory.py index 796113e100..60de7a11b5 100644 --- a/api/factories/file_factory.py +++ b/api/factories/file_factory.py @@ -134,8 +134,9 @@ def _build_from_local_file( if row is None: raise ValueError("Invalid upload file") - file_type = FileType(mapping.get("type", "custom")) - file_type = _standardize_file_type(file_type, extension="." + row.extension, mime_type=row.mime_type) + file_type = _standardize_file_type(extension="." + row.extension, mime_type=row.mime_type) + if file_type.value != mapping.get("type", "custom"): + raise ValueError("Detected file type does not match the specified type. Please verify the file.") return File( id=mapping.get("id"), @@ -173,10 +174,9 @@ def _build_from_remote_url( if upload_file is None: raise ValueError("Invalid upload file") - file_type = FileType(mapping.get("type", "custom")) - file_type = _standardize_file_type( - file_type, extension="." + upload_file.extension, mime_type=upload_file.mime_type - ) + file_type = _standardize_file_type(extension="." + upload_file.extension, mime_type=upload_file.mime_type) + if file_type.value != mapping.get("type", "custom"): + raise ValueError("Detected file type does not match the specified type. Please verify the file.") return File( id=mapping.get("id"), @@ -198,8 +198,9 @@ def _build_from_remote_url( mime_type, filename, file_size = _get_remote_file_info(url) extension = mimetypes.guess_extension(mime_type) or ("." + filename.split(".")[-1] if "." in filename else ".bin") - file_type = FileType(mapping.get("type", "custom")) - file_type = _standardize_file_type(file_type, extension=extension, mime_type=mime_type) + file_type = _standardize_file_type(extension=extension, mime_type=mime_type) + if file_type.value != mapping.get("type", "custom"): + raise ValueError("Detected file type does not match the specified type. Please verify the file.") return File( id=mapping.get("id"), @@ -250,8 +251,10 @@ def _build_from_tool_file( raise ValueError(f"ToolFile {mapping.get('tool_file_id')} not found") extension = "." + tool_file.file_key.split(".")[-1] if "." in tool_file.file_key else ".bin" - file_type = FileType(mapping.get("type", "custom")) - file_type = _standardize_file_type(file_type, extension=extension, mime_type=tool_file.mimetype) + + file_type = _standardize_file_type(extension=extension, mime_type=tool_file.mimetype) + if file_type.value != mapping.get("type", "custom"): + raise ValueError("Detected file type does not match the specified type. Please verify the file.") return File( id=mapping.get("id"), @@ -302,12 +305,10 @@ def _is_file_valid_with_config( return True -def _standardize_file_type(file_type: FileType, /, *, extension: str = "", mime_type: str = "") -> FileType: +def _standardize_file_type(*, extension: str = "", mime_type: str = "") -> FileType: """ - If custom type, try to guess the file type by extension and mime_type. + Infer the possible actual type of the file based on the extension and mime_type """ - if file_type != FileType.CUSTOM: - return FileType(file_type) guessed_type = None if extension: guessed_type = _get_file_type_by_extension(extension) diff --git a/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py b/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py new file mode 100644 index 0000000000..4a613e35b0 --- /dev/null +++ b/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py @@ -0,0 +1,70 @@ +import json +from collections.abc import Generator + +from core.agent.entities import AgentScratchpadUnit +from core.agent.output_parser.cot_output_parser import CotAgentOutputParser +from core.model_runtime.entities.llm_entities import AssistantPromptMessage, LLMResultChunk, LLMResultChunkDelta + + +def mock_llm_response(text) -> Generator[LLMResultChunk, None, None]: + for i in range(len(text)): + yield LLMResultChunk( + model="model", + prompt_messages=[], + delta=LLMResultChunkDelta(index=0, message=AssistantPromptMessage(content=text[i], tool_calls=[])), + ) + + +def test_cot_output_parser(): + test_cases = [ + { + "input": 'Through: abc\nAction: ```{"action": "Final Answer", "action_input": "```echarts\n {}\n```"}```', + "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, + "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', + }, + # code block with json + { + "input": 'Through: abc\nAction: ```json\n{"action": "Final Answer", "action_input": "```echarts\n {' + '}\n```"}```', + "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, + "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', + }, + # code block with JSON + { + "input": 'Through: abc\nAction: ```JSON\n{"action": "Final Answer", "action_input": "```echarts\n {' + '}\n```"}```', + "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, + "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', + }, + # list + { + "input": 'Through: abc\nAction: ```[{"action": "Final Answer", "action_input": "```echarts\n {}\n```"}]```', + "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, + "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', + }, + # no code block + { + "input": 'Through: abc\nAction: {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}', + "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, + "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', + }, + # no code block and json + {"input": "Through: abc\nAction: efg", "action": {}, "output": "Through: abc\n efg"}, + ] + + parser = CotAgentOutputParser() + usage_dict = {} + for test_case in test_cases: + # mock llm_response as a generator by text + llm_response: Generator[LLMResultChunk, None, None] = mock_llm_response(test_case["input"]) + results = parser.handle_react_stream_output(llm_response, usage_dict) + output = "" + for result in results: + if isinstance(result, str): + output += result + elif isinstance(result, AgentScratchpadUnit.Action): + if test_case["action"]: + assert result.to_dict() == test_case["action"] + output += json.dumps(result.to_dict()) + if test_case["output"]: + assert output == test_case["output"] diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx index c002b2dc21..221559ff28 100644 --- a/web/app/activate/page.tsx +++ b/web/app/activate/page.tsx @@ -1,25 +1,12 @@ import React from 'react' import Header from '../signin/_header' -import style from '../signin/page.module.css' import ActivateForm from './activateForm' import cn from '@/utils/classnames' const Activate = () => { return ( -