You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

60 lines
1.7 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from util.use_pgvector import connect_to_db, insert_vectors, search_similar_vectors, setup_vector_extension
import json
from util.use_opanai import generation_vector
def txt_to_json_objects(file_path):
"""
读取txt文件的每一行转换成json对象
"""
json_objects = []
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # 去除行首尾空白字符
if line: # 跳过空行
try:
# 假设每行本身就是一个有效的JSON字符串
json_obj = json.loads(line)
json_objects.append(json_obj)
except json.JSONDecodeError:
# 如果不是JSON格式可以自定义处理方式
print(f"无法解析行: {line}")
return json_objects
async def main():
# 连接数据库
conn = connect_to_db()
# 设置向量扩展
setup_vector_extension(conn)
json_objects = txt_to_json_objects("规建域识别可能涉及的表名及说明1.txt")
sample_data = []
for json_obj in json_objects:
content = json.dumps(json_obj)
emb = await generation_vector(content)
sample_data.append((json_obj["表名"], content, emb))
# 插入数据
insert_vectors(conn, sample_data)
# 搜索相似向量
# query_vector = [1.0, 1.0, 1.0]
# similar_docs = search_similar_vectors(conn, query_vector)
#
# print("\n相似文档搜索结果:")
# for content, distance in similar_docs:
# print(f"内容: {content}, 距离: {distance}")
# 关闭连接
conn.close()
# 使用asyncio运行异步函数
import asyncio
if __name__ == "__main__":
asyncio.run(main())