pull/13742/merge
cpwan 1 year ago committed by GitHub
commit a402e96dab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -115,6 +115,7 @@ class DatasetDocumentStore:
tokens=tokens,
enabled=False,
created_by=self._user_id,
page_number=doc.metadata.get("page", 0),
)
if doc.metadata.get("answer"):
segment_document.answer = doc.metadata.pop("answer", "")

@ -265,6 +265,7 @@ class DatasetRetrieval:
source["word_count"] = segment.word_count
source["segment_position"] = segment.position
source["index_node_hash"] = segment.index_node_hash
source["page_number"] = segment.page_number
if segment.answer:
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
else:

@ -139,6 +139,7 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
source["word_count"] = segment.word_count
source["segment_position"] = segment.position
source["index_node_hash"] = segment.index_node_hash
source["page_number"] = segment.page_number
if segment.answer:
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
else:

@ -214,6 +214,7 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
source["word_count"] = segment.word_count
source["segment_position"] = segment.position
source["index_node_hash"] = segment.index_node_hash
source["page_number"] = segment.page_number
if segment.answer:
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
else:

@ -302,6 +302,7 @@ class KnowledgeRetrievalNode(LLMNode):
"segment_word_count": segment.word_count,
"segment_position": segment.position,
"segment_index_node_hash": segment.index_node_hash,
"segment_page_number": segment.page_number,
"doc_metadata": document.doc_metadata,
},
"title": document.name,

@ -0,0 +1,33 @@
"""add page column to segment
Revision ID: 8c83edec42e8
Revises: d28f2004b072
Create Date: 2025-05-15 03:08:23.196234
"""
from alembic import op
import models as models
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '8c83edec42e8'
down_revision = 'd28f2004b072'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('document_segments', schema=None) as batch_op:
batch_op.add_column(sa.Column('page_number', sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('document_segments', schema=None) as batch_op:
batch_op.drop_column('page_number')
# ### end Alembic commands ###

@ -657,6 +657,7 @@ class DocumentSegment(Base):
dataset_id = db.Column(StringUUID, nullable=False)
document_id = db.Column(StringUUID, nullable=False)
position: Mapped[int]
page_number = db.Column(db.Integer, nullable=True)
content = db.Column(db.Text, nullable=False)
answer = db.Column(db.Text, nullable=True)
word_count = db.Column(db.Integer, nullable=False)

Loading…
Cancel
Save