From 511d5731d97db01dd16770ec42f9fcffce0a985b Mon Sep 17 00:00:00 2001
From: Dongyu Li <544104925@qq.com>
Date: Wed, 28 May 2025 15:17:28 +0800
Subject: [PATCH] feat(kb api):add get chunk detail by chunk id api

---
 .../service_api/dataset/segment.py            | 23 ++++++
 .../datasets/template/template.en.mdx         | 70 +++++++++++++++++++
 .../datasets/template/template.ja.mdx         | 70 ++++++++++++++++++-
 .../datasets/template/template.zh.mdx         | 69 ++++++++++++++++++
 4 files changed, 231 insertions(+), 1 deletion(-)
diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py
index ea4be4e511..9d8f69ac42 100644
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@@ -208,6 +208,28 @@ class DatasetSegmentApi(DatasetApiResource):
         )
         return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200
 
+    def get(self, tenant_id, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check segment
+        segment_id = str(segment_id)
+        segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id)
+        if not segment:
+            raise NotFound("Segment not found.")
+
+        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
+
 
 class ChildChunkApi(DatasetApiResource):
     """Resource for child chunks."""
@@ -389,6 +411,7 @@ class DatasetChildChunkApi(DatasetApiResource):
         return {"data": marshal(child_chunk, child_chunk_fields)}, 200
 
 
+
 api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
 api.add_resource(
     DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>"
diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx
index 3393c636cd..806657c507 100644
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@@ -1298,6 +1298,76 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 
 <hr className='ml-0 mr-0' />
 
+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
+  method='GET'
+  title='Get a Chunk Details in a Document'
+  name='#view_document_chunk'
+/>
+<Row>
+  <Col>
+    Get details of a specific document segment in the specified knowledge base
+
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Knowledge Base ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        Document ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        Segment ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": {
+        "id": "chunk_id",
+        "position": 2,
+        "document_id": "document_id",
+        "content": "Segment content text",
+        "sign_content": "Signature content text",
+        "answer": "Answer content (if in Q&A mode)",
+        "word_count": 470,
+        "tokens": 382,
+        "keywords": ["keyword1", "keyword2"],
+        "index_node_id": "index_node_id",
+        "index_node_hash": "index_node_hash",
+        "hit_count": 0,
+        "enabled": true,
+        "status": "completed",
+        "created_by": "creator_id",
+        "created_at": creation_timestamp,
+        "updated_at": update_timestamp,
+        "indexing_at": indexing_timestamp,
+        "completed_at": completion_timestamp,
+        "error": null,
+        "child_chunks": []
+      },
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
   method='DELETE'
diff --git a/web/app/(commonLayout)/datasets/template/template.ja.mdx b/web/app/(commonLayout)/datasets/template/template.ja.mdx
index defd48816d..bffc91316c 100644
--- a/web/app/(commonLayout)/datasets/template/template.ja.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.ja.mdx
@@ -1057,6 +1057,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
+  method='GET'
+  title='ドキュメントセグメントの詳細を表示'
+  name='#view_document_segment'
+/>
+<Row>
+  <Col>
+    指定されたナレッジベース内の特定のドキュメントセグメントの詳細を表示します
+
+    ### パス
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        ナレッジベースID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        ドキュメントID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        セグメントID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="リクエスト"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}'
+    ```
+    </CodeGroup>
+    <CodeGroup title="レスポンス">
+    ```json {{ title: 'Response' }}
+    {
+      "data": {
+        "id": "セグメントID",
+        "position": 2,
+        "document_id": "ドキュメントID",
+        "content": "セグメント内容テキスト",
+        "sign_content": "署名内容テキスト",
+        "answer": "回答内容(Q&Aモードの場合)",
+        "word_count": 470,
+        "tokens": 382,
+        "keywords": ["キーワード1", "キーワード2"],
+        "index_node_id": "インデックスノードID",
+        "index_node_hash": "インデックスノードハッシュ",
+        "hit_count": 0,
+        "enabled": true,
+        "status": "completed",
+        "created_by": "作成者ID",
+        "created_at": 作成タイムスタンプ,
+        "updated_at": 更新タイムスタンプ,
+        "indexing_at": インデックス作成タイムスタンプ,
+        "completed_at": 完了タイムスタンプ,
+        "error": null,
+        "child_chunks": []
+      },
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
+<Heading
   method='DELETE'
   title='ドキュメント内のチャンクを削除'
   name='#delete_segment'
@@ -1100,7 +1169,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 <hr className='ml-0 mr-0' />
 
 <Heading
-  url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
   method='POST'
   title='ドキュメント内のチャンクを更新'
   name='#update_segment'
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx
index 04b5837651..d9ae6ab7bc 100644
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@@ -1351,6 +1351,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
 
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
+  method='GET'
+  title='查看文档分段详情'
+  name='#view_document_segment'
+/>
+<Row>
+  <Col>
+    查看指定知识库中特定文档的分段详情
+
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        知识库 ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        文档 ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        分段 ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": {
+        "id": "分段唯一ID",
+        "position": 2,
+        "document_id": "所属文档ID",
+        "content": "分段内容文本",
+        "sign_content": "签名内容文本",
+        "answer": "答案内容(如果有)",
+        "word_count": 470,
+        "tokens": 382,
+        "keywords": ["关键词1", "关键词2"],
+        "index_node_id": "索引节点ID",
+        "index_node_hash": "索引节点哈希值",
+        "hit_count": 0,
+        "enabled": true,
+        "status": "completed",
+        "created_by": "创建者ID",
+        "created_at": 创建时间戳,
+        "updated_at": 更新时间戳,
+        "indexing_at": 索引时间戳,
+        "completed_at": 完成时间戳,
+        "error": null,
+        "child_chunks": []
+      },
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
+<Heading
   method='POST'
   title='更新文档分段'
   name='#update_segment'