QueryGym
QueryGym Leaderboard
Reproducible benchmarks for LLM query reformulation.
← Methods

csqe

csqe
All results produced by QueryGym · fully reproducible!

12 model × retriever configurations for this method across BEIR, MS MARCO DL, and DL-HARD.
Click any row or the + button to expand. Tabs switch dataset context. The three steps (reformulate → retrieve → evaluate) update accordingly.

Retriever
Model
Datasets
Metric
12 / 12 configs
best in column
Model Retriever ArguAnaDBPediaFiQASciFactCOVIDNewsBRIGHT — AOPSBRIGHT — BiologyBRIGHT — Earth ScienceBRIGHT — EconomicsBRIGHT — LeetCodeBRIGHT — PonyBRIGHT — PsychologyBRIGHT — RoboticsBRIGHT — Stack OverflowBRIGHT — Sustainable LivingBRIGHT — TheoremQA QuestionsBRIGHT — TheoremQA TheoremsDL-HARDDL 2019DL 2020
nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@1k nDCG@10 R@1k nDCG@10 R@1k
Qwen2.5-72B-Instruct BGE-base-en-v1.5 0.6229 0.9886 0.4024 0.4897 0.3796 0.7461 0.7484 0.9667 0.7793 0.1410 0.4626 0.4812 0.3757 0.8531 0.7179 0.8944 0.6687 0.8722
methodcsqe llmQwen2.5-72B-Instruct retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-72B-Instruct BM25 0.3864 0.3556 0.4639 0.2132 0.7141 0.6716 0.1491 0.3861 0.4892 0.2848 0.6998 0.6391 0.8608 0.5606 0.8603
methodcsqe llmQwen2.5-72B-Instruct retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-72B-Instruct SPLADE++ 0.5118 0.9787 0.3686 0.5021 0.3075 0.6521 0.6966 0.9433 0.6118 0.1082 0.3871 0.4548 0.2857 0.8246 0.6189 0.9070 0.5736 0.9052
methodcsqe llmQwen2.5-72B-Instruct retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct BGE-base-en-v1.5 0.6231 0.9893 0.3826 0.4879 0.3939 0.7437 0.7415 0.9727 0.7862 0.1449 0.4360 0.5126 0.3671 0.8348 0.7127 0.8803 0.6885 0.8850
methodcsqe llmQwen2.5-7B-Instruct retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct BM25 0.4008 0.9403 0.3767 0.5078 0.2200 0.5466 0.7183 0.9543 0.6757 0.1600 0.4504 0.5795 0.3322 0.7913 0.6873 0.8921 0.6083 0.8596
methodcsqe llmQwen2.5-7B-Instruct retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"mode":"zs","num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct SPLADE++ 0.5100 0.9801 0.3661 0.4830 0.3035 0.6521 0.6765 0.9527 0.6096 0.1024 0.4079 0.4866 0.3025 0.8057 0.6523 0.9089 0.6164 0.9039
methodcsqe llmQwen2.5-7B-Instruct retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 BGE-base-en-v1.5 0.6218 0.9915 0.4242 0.5229 0.4067 0.7384 0.7553 0.9633 0.7879 0.1431 0.4631 0.5075 0.4144 0.8640 0.7551 0.9009 0.7139 0.8968
methodcsqe llmgpt-4.1 retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 BM25 0.3977 0.9445 0.3899 0.5136 0.2473 0.5835 0.7206 0.9487 0.6994 0.1638 0.4790 0.5909 0.3658 0.7873 0.6899 0.9035 0.6548 0.8871
methodcsqe llmgpt-4.1 retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 SPLADE++ 0.3801 0.9829 0.3962 0.5232 0.3294 0.6748 0.7065 0.9593 0.6811 0.1116 0.4502 0.5018 0.3690 0.8341 0.6936 0.9193 0.6796 0.9397
methodcsqe llmgpt-4.1 retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano BGE-base-en-v1.5 0.6210 0.9886 0.4147 0.5123 0.4112 0.7489 0.7583 0.9600 0.8174 0.1442 0.4351 0.4753 0.3516 0.8371 0.7304 0.8749 0.6873 0.8535
methodcsqe llmgpt-4.1-nano retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano BM25 0.3964 0.9381 0.3647 0.4939 0.2401 0.5553 0.7099 0.9587 0.6171 0.1543 0.4271 0.5221 0.2436 0.7327 0.5410 0.8221 0.5142 0.8586
methodcsqe llmgpt-4.1-nano retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano SPLADE++ 0.3792 0.9801 0.3805 0.5235 0.3256 0.6702 0.7055 0.9533 0.6313 0.1132 0.4193 0.4601 0.2789 0.7872 0.6134 0.8900 0.5883 0.9119
methodcsqe llmgpt-4.1-nano retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method csqe \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt