toycorp-bm25s / config.yaml
zhuocheng's picture
Update FlexRAG retriever
4cbdc2d verified
raw
history blame
912 Bytes
log_interval: 1000
top_k: 10
batch_size: 32
query_preprocess_pipeline:
processor_type: []
length_filter_config:
max_tokens: null
min_tokens: null
max_chars: null
min_chars: null
max_bytes: null
min_bytes: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
token_normalize_config:
lang: en
penn: true
norm_quote_commas: true
norm_numbers: true
pre_replace_unicode_punct: false
post_remove_control_chars: false
perl_parity: false
truncate_config:
max_chars: null
max_bytes: null
max_tokens: null
tokenizer_config:
tokenizer_type: moses
hf_tokenizer_path: null
tiktok_tokenizer_name: null
lang: null
database_path: null
method: lucene
idf_method: null
backend: auto
k1: 1.5
b: 0.75
delta: 0.5
lang: english
indexed_fields:
- text