You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gcgj-dify-1.7.0/api/configs/ext/dataset_config.yml

43 lines
1.0 KiB
YAML

#data_source:
# type: upload_file
# info_list:
# data_source_type: upload_file
# file_info_list:
# file_ids:
# - none
indexing_technique: high_quality
process_rule:
rules:
pre_processing_rules:
- id: remove_extra_spaces
enabled: true
- id: remove_urls_emails
enabled: true
segmentation:
separator: '&&&&&'
max_tokens: 500
chunk_overlap: 50
mode: custom
doc_form: text_model
doc_language: Chinese
retrieval_model:
search_method: hybrid_search
reranking_enable: true
reranking_mode: weighted_score
reranking_model:
reranking_provider_name: langgenius/huggingface_tei/huggingface_tei
reranking_model_name: bge-reranker-large
weights:
weight_type: customized
vector_setting:
vector_weight: 0.7
embedding_provider_name: ''
embedding_model_name: ''
keyword_setting:
keyword_weight: 0.3
top_k: 10
score_threshold_enabled: false
score_threshold: 0
embedding_model: 'bge-m3:latest'
embedding_model_provider: langgenius/ollama/ollama