From b0e4026d8bae56215b9f383f708a4b8fe82da250 Mon Sep 17 00:00:00 2001 From: Aurelius Huang Date: Mon, 30 Sep 2024 15:51:41 +0800 Subject: [PATCH 1/2] fix: Compatible with special characters in pg full-text search. --- api/core/rag/datasource/vdb/pgvector/pgvector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index d90707ebcd..25a10a1e48 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -166,7 +166,7 @@ class PGVector(BaseVector): with self._get_cursor() as cur: cur.execute( - f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score + f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), plainto_tsquery(%s)) AS score FROM {self.table_name} WHERE to_tsvector(text) @@ plainto_tsquery(%s) ORDER BY score DESC From 863365326f9f0dc5155616b2ad7b27e42ade015f Mon Sep 17 00:00:00 2001 From: Aurelius Huang Date: Fri, 18 Jul 2025 22:01:20 +0800 Subject: [PATCH 2/2] feat(notion): Notion Database extracts Rows content `in row order` and appends `Row Page URL` --- api/core/rag/extractor/notion_extractor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index eca955ddd1..032b30c34d 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -1,5 +1,6 @@ import json import logging +import operator from typing import Any, Optional, cast import requests @@ -130,13 +131,15 @@ class NotionExtractor(BaseExtractor): data[property_name] = value row_dict = {k: v for k, v in data.items() if v} row_content = "" - for key, value in row_dict.items(): + for key, value in sorted(row_dict.items(), key=operator.itemgetter(0)): if isinstance(value, dict): value_dict = {k: v for k, v in value.items() if v} value_content = "".join(f"{k}:{v} " for k, v in value_dict.items()) row_content = row_content + f"{key}:{value_content}\n" else: row_content = row_content + f"{key}:{value}\n" + if "url" in result: + row_content = row_content + f"Row Page URL:{result.get('url', '')}\n" database_content.append(row_content) has_more = response_data.get("has_more", False)