#data_source: # type: upload_file # info_list: # data_source_type: upload_file # file_info_list: # file_ids: # - none indexing_technique: high_quality process_rule: rules: pre_processing_rules: - id: remove_extra_spaces enabled: true - id: remove_urls_emails enabled: true segmentation: separator: '&&&&&' max_tokens: 500 chunk_overlap: 50 mode: custom doc_form: text_model doc_language: Chinese retrieval_model: search_method: hybrid_search reranking_enable: true reranking_mode: weighted_score reranking_model: reranking_provider_name: langgenius/huggingface_tei/huggingface_tei reranking_model_name: bge-reranker-large weights: weight_type: customized vector_setting: vector_weight: 0.7 embedding_provider_name: '' embedding_model_name: '' keyword_setting: keyword_weight: 0.3 top_k: 10 score_threshold_enabled: false score_threshold: 0 embedding_model: 'bge-m3:latest' embedding_model_provider: langgenius/ollama/ollama