feat(kb api):add get chunk detail by chunk id api

pull/20426/head
Dongyu Li 12 months ago
parent 4c46f04d77
commit 511d5731d9

@ -208,6 +208,28 @@ class DatasetSegmentApi(DatasetApiResource):
) )
return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200 return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200
def get(self, tenant_id, dataset_id, document_id, segment_id):
# check dataset
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset_id, document_id)
if not document:
raise NotFound("Document not found.")
# check segment
segment_id = str(segment_id)
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id)
if not segment:
raise NotFound("Segment not found.")
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
class ChildChunkApi(DatasetApiResource): class ChildChunkApi(DatasetApiResource):
"""Resource for child chunks.""" """Resource for child chunks."""
@ -389,6 +411,7 @@ class DatasetChildChunkApi(DatasetApiResource):
return {"data": marshal(child_chunk, child_chunk_fields)}, 200 return {"data": marshal(child_chunk, child_chunk_fields)}, 200
api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments") api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
api.add_resource( api.add_resource(
DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>" DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>"

@ -1298,6 +1298,76 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<hr className='ml-0 mr-0' /> <hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='GET'
title='Get a Chunk Details in a Document'
name='#view_document_chunk'
/>
<Row>
<Col>
Get details of a specific document segment in the specified knowledge base
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Knowledge Base ID
</Property>
<Property name='document_id' type='string' key='document_id'>
Document ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
Segment ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": {
"id": "chunk_id",
"position": 2,
"document_id": "document_id",
"content": "Segment content text",
"sign_content": "Signature content text",
"answer": "Answer content (if in Q&A mode)",
"word_count": 470,
"tokens": 382,
"keywords": ["keyword1", "keyword2"],
"index_node_id": "index_node_id",
"index_node_hash": "index_node_hash",
"hit_count": 0,
"enabled": true,
"status": "completed",
"created_by": "creator_id",
"created_at": creation_timestamp,
"updated_at": update_timestamp,
"indexing_at": indexing_timestamp,
"completed_at": completion_timestamp,
"error": null,
"child_chunks": []
},
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='DELETE' method='DELETE'

@ -1057,6 +1057,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='GET'
title='ドキュメントセグメントの詳細を表示'
name='#view_document_segment'
/>
<Row>
<Col>
指定されたナレッジベース内の特定のドキュメントセグメントの詳細を表示します
### パス
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
ナレッジベースID
</Property>
<Property name='document_id' type='string' key='document_id'>
ドキュメントID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
セグメントID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="リクエスト"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}'
```
</CodeGroup>
<CodeGroup title="レスポンス">
```json {{ title: 'Response' }}
{
"data": {
"id": "セグメントID",
"position": 2,
"document_id": "ドキュメントID",
"content": "セグメント内容テキスト",
"sign_content": "署名内容テキスト",
"answer": "回答内容(Q&Aモードの場合)",
"word_count": 470,
"tokens": 382,
"keywords": ["キーワード1", "キーワード2"],
"index_node_id": "インデックスードID",
"index_node_hash": "インデックスノードハッシュ",
"hit_count": 0,
"enabled": true,
"status": "completed",
"created_by": "作成者ID",
"created_at": 作成タイムスタンプ,
"updated_at": 更新タイムスタンプ,
"indexing_at": インデックス作成タイムスタンプ,
"completed_at": 完了タイムスタンプ,
"error": null,
"child_chunks": []
},
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading
method='DELETE' method='DELETE'
title='ドキュメント内のチャンクを削除' title='ドキュメント内のチャンクを削除'
name='#delete_segment' name='#delete_segment'
@ -1100,7 +1169,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<hr className='ml-0 mr-0' /> <hr className='ml-0 mr-0' />
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='POST' method='POST'
title='ドキュメント内のチャンクを更新' title='ドキュメント内のチャンクを更新'
name='#update_segment' name='#update_segment'

@ -1351,6 +1351,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<Heading <Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
method='GET'
title='查看文档分段详情'
name='#view_document_segment'
/>
<Row>
<Col>
查看指定知识库中特定文档的分段详情
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
知识库 ID
</Property>
<Property name='document_id' type='string' key='document_id'>
文档 ID
</Property>
<Property name='segment_id' type='string' key='segment_id'>
分段 ID
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
--header 'Authorization: Bearer {api_key}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"data": {
"id": "分段唯一ID",
"position": 2,
"document_id": "所属文档ID",
"content": "分段内容文本",
"sign_content": "签名内容文本",
"answer": "答案内容(如果有)",
"word_count": 470,
"tokens": 382,
"keywords": ["关键词1", "关键词2"],
"index_node_id": "索引节点ID",
"index_node_hash": "索引节点哈希值",
"hit_count": 0,
"enabled": true,
"status": "completed",
"created_by": "创建者ID",
"created_at": 创建时间戳,
"updated_at": 更新时间戳,
"indexing_at": 索引时间戳,
"completed_at": 完成时间戳,
"error": null,
"child_chunks": []
},
"doc_form": "text_model"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading
method='POST' method='POST'
title='更新文档分段' title='更新文档分段'
name='#update_segment' name='#update_segment'

Loading…
Cancel
Save