From 511d5731d97db01dd16770ec42f9fcffce0a985b Mon Sep 17 00:00:00 2001 From: Dongyu Li <544104925@qq.com> Date: Wed, 28 May 2025 15:17:28 +0800 Subject: [PATCH] feat(kb api):add get chunk detail by chunk id api --- .../service_api/dataset/segment.py | 23 ++++++ .../datasets/template/template.en.mdx | 70 +++++++++++++++++++ .../datasets/template/template.ja.mdx | 70 ++++++++++++++++++- .../datasets/template/template.zh.mdx | 69 ++++++++++++++++++ 4 files changed, 231 insertions(+), 1 deletion(-) diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index ea4be4e511..9d8f69ac42 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -208,6 +208,28 @@ class DatasetSegmentApi(DatasetApiResource): ) return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200 + def get(self, tenant_id, dataset_id, document_id, segment_id): + # check dataset + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() + if not dataset: + raise NotFound("Dataset not found.") + # check user's model setting + DatasetService.check_dataset_model_setting(dataset) + # check document + document_id = str(document_id) + document = DocumentService.get_document(dataset_id, document_id) + if not document: + raise NotFound("Document not found.") + # check segment + segment_id = str(segment_id) + segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) + if not segment: + raise NotFound("Segment not found.") + + return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200 + class ChildChunkApi(DatasetApiResource): """Resource for child chunks.""" @@ -389,6 +411,7 @@ class DatasetChildChunkApi(DatasetApiResource): return {"data": marshal(child_chunk, child_chunk_fields)}, 200 + api.add_resource(SegmentApi, "/datasets//documents//segments") api.add_resource( DatasetSegmentApi, "/datasets//documents//segments/" diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 3393c636cd..806657c507 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -1298,6 +1298,76 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
+ + + + Get details of a specific document segment in the specified knowledge base + + ### Path + + + Knowledge Base ID + + + Document ID + + + Segment ID + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' + ``` + + + ```json {{ title: 'Response' }} + { + "data": { + "id": "chunk_id", + "position": 2, + "document_id": "document_id", + "content": "Segment content text", + "sign_content": "Signature content text", + "answer": "Answer content (if in Q&A mode)", + "word_count": 470, + "tokens": 382, + "keywords": ["keyword1", "keyword2"], + "index_node_id": "index_node_id", + "index_node_hash": "index_node_hash", + "hit_count": 0, + "enabled": true, + "status": "completed", + "created_by": "creator_id", + "created_at": creation_timestamp, + "updated_at": update_timestamp, + "indexing_at": indexing_timestamp, + "completed_at": completion_timestamp, + "error": null, + "child_chunks": [] + }, + "doc_form": "text_model" + } + ``` + + + + +
+ + + + 指定されたナレッジベース内の特定のドキュメントセグメントの詳細を表示します + + ### パス + + + ナレッジベースID + + + ドキュメントID + + + セグメントID + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' + ``` + + + ```json {{ title: 'Response' }} + { + "data": { + "id": "セグメントID", + "position": 2, + "document_id": "ドキュメントID", + "content": "セグメント内容テキスト", + "sign_content": "署名内容テキスト", + "answer": "回答内容(Q&Aモードの場合)", + "word_count": 470, + "tokens": 382, + "keywords": ["キーワード1", "キーワード2"], + "index_node_id": "インデックスノードID", + "index_node_hash": "インデックスノードハッシュ", + "hit_count": 0, + "enabled": true, + "status": "completed", + "created_by": "作成者ID", + "created_at": 作成タイムスタンプ, + "updated_at": 更新タイムスタンプ, + "indexing_at": インデックス作成タイムスタンプ, + "completed_at": 完了タイムスタンプ, + "error": null, + "child_chunks": [] + }, + "doc_form": "text_model" + } + ``` + + + + +
+ + + + + 查看指定知识库中特定文档的分段详情 + + ### Path + + + 知识库 ID + + + 文档 ID + + + 分段 ID + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' + ``` + + + ```json {{ title: 'Response' }} + { + "data": { + "id": "分段唯一ID", + "position": 2, + "document_id": "所属文档ID", + "content": "分段内容文本", + "sign_content": "签名内容文本", + "answer": "答案内容(如果有)", + "word_count": 470, + "tokens": 382, + "keywords": ["关键词1", "关键词2"], + "index_node_id": "索引节点ID", + "index_node_hash": "索引节点哈希值", + "hit_count": 0, + "enabled": true, + "status": "completed", + "created_by": "创建者ID", + "created_at": 创建时间戳, + "updated_at": 更新时间戳, + "indexing_at": 索引时间戳, + "completed_at": 完成时间戳, + "error": null, + "child_chunks": [] + }, + "doc_form": "text_model" + } + ``` + + + + +
+ +