QueryGym
QueryGym Leaderboard
Reproducible benchmarks for LLM query reformulation.
← Methods

lamer

lamer
All results produced by QueryGym · fully reproducible!

12 model × retriever configurations for this method across BEIR, MS MARCO DL, and DL-HARD.
Click any row or the + button to expand. Tabs switch dataset context. The three steps (reformulate → retrieve → evaluate) update accordingly.

Retriever
Model
Datasets
Metric
12 / 12 configs
best in column
Model Retriever ArguAnaDBPediaFiQASciFactCOVIDNewsBRIGHT — AOPSBRIGHT — BiologyBRIGHT — Earth ScienceBRIGHT — EconomicsBRIGHT — LeetCodeBRIGHT — PonyBRIGHT — PsychologyBRIGHT — RoboticsBRIGHT — Stack OverflowBRIGHT — Sustainable LivingBRIGHT — TheoremQA QuestionsBRIGHT — TheoremQA TheoremsDL-HARDDL 2019DL 2020
nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@100 nDCG@10 R@1k nDCG@10 R@1k nDCG@10 R@1k
Qwen2.5-72B-Instruct BGE-base-en-v1.5 0.6210 0.9893 0.4139 0.5001 0.4096 0.7483 0.7524 0.9800 0.7941 0.1401 0.4512 0.4936 0.4055 0.8453 0.7219 0.8859 0.7276 0.9045
methodlamer llmQwen2.5-72B-Instruct retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-72B-Instruct BM25 0.4111 0.4010 0.5217 0.2395 0.7251 0.7240 0.1667 0.4677 0.6105 0.3635 0.7820 0.6651 0.8666 0.6711 0.8920
methodlamer llmQwen2.5-72B-Instruct retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-72B-Instruct SPLADE++ 0.5161 0.9815 0.3697 0.4883 0.3041 0.6516 0.7046 0.9600 0.6543 0.1057 0.4161 0.4850 0.3648 0.8156 0.6651 0.8956 0.6483 0.9195
methodlamer llmQwen2.5-72B-Instruct retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-72B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct BGE-base-en-v1.5 0.6195 0.9908 0.3900 0.4838 0.3981 0.7318 0.7466 0.9733 0.7843 0.1360 0.4517 0.4753 0.3788 0.8315 0.7113 0.8668 0.6825 0.8940
methodlamer llmQwen2.5-7B-Instruct retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct BM25 0.4063 0.9388 0.3896 0.5139 0.2337 0.5558 0.7140 0.9593 0.6955 0.1704 0.4424 0.5960 0.3570 0.7633 0.6602 0.8553 0.6322 0.8933
methodlamer llmQwen2.5-7B-Instruct retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
Qwen2.5-7B-Instruct SPLADE++ 0.5148 0.9794 0.3499 0.4799 0.2944 0.6487 0.6651 0.9560 0.6339 0.1002 0.3967 0.4728 0.3280 0.7917 0.6465 0.8654 0.6076 0.9213
methodlamer llmQwen2.5-7B-Instruct retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model Qwen/Qwen2.5-7B-Instruct \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 BGE-base-en-v1.5 0.6204 0.9893 0.4018 0.4998 0.4080 0.7410 0.7572 0.9733 0.7796 0.1373 0.4367 0.4591 0.4120 0.8557 0.7032 0.8888 0.7148 0.9026
methodlamer llmgpt-4.1 retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 BM25 0.4119 0.9452 0.3989 0.5159 0.2616 0.5901 0.7253 0.9487 0.7020 0.1661 0.4799 0.5960 0.3555 0.8065 0.6368 0.8566 0.6530 0.9002
methodlamer llmgpt-4.1 retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1 SPLADE++ 0.3836 0.9829 0.3559 0.4904 0.3292 0.6724 0.7182 0.9577 0.6312 0.1081 0.4520 0.4770 0.3673 0.8246 0.6836 0.9065 0.6390 0.9378
methodlamer llmgpt-4.1 retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1 \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano BGE-base-en-v1.5 0.6254 0.9900 0.3827 0.4804 0.4009 0.7310 0.7507 0.9593 0.8007 0.1340 0.4060 0.4264 0.3759 0.8352 0.7265 0.8894 0.7135 0.8846
methodlamer llmgpt-4.1-nano retrieverBGE-base-en-v1.5
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BGE-base-en-v1.5 (dense)
python -m pyserini.search.faiss \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.bge-base-en-v1.5 \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder BAAI/bge-base-en-v1.5 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano BM25 0.4037 0.9388 0.3440 0.4807 0.2360 0.5449 0.7220 0.9393 0.6721 0.1748 0.4328 0.5575 0.3398 0.7697 0.6731 0.8548 0.6560 0.8865
methodlamer llmgpt-4.1-nano retrieverBM25
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.flat \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · BM25 (lexical)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --bm25 --k1 0.9 --b 0.4 \
  --output run.txt \
  --hits 1000
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt
gpt-4.1-nano SPLADE++ 0.3800 0.9780 0.3316 0.4680 0.3014 0.6543 0.7207 0.9443 0.6285 0.1143 0.4012 0.4661 0.3459 0.7969 0.6916 0.8975 0.6254 0.9244
methodlamer llmgpt-4.1-nano retrieverSPLADE++
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-arguana \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-arguana.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-arguana-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-dbpedia-entity \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-dbpedia-entity.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-dbpedia-entity-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-fiqa \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-fiqa.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-fiqa-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-scifact \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-scifact.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-scifact-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-covid \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-covid.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-covid-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset beir-v1.0.0-trec-news \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index beir-v1.0.0-trec-news.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@100
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.100 \
  beir-v1.0.0-trec-news-test run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.dlhard \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  /mnt/data/son/Thesis/t5/data/dlhard/neutral_queries.tsv run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2019 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl19-passage run.txt
1 reformulate querygym → reformulated_queries.tsv
python examples/querygym_pyserini/pipeline.py \
    --dataset msmarco-v1-passage.trecdl2020 \
    --method lamer \
    --model openai/gpt-4.1-nano \
    --steps reformulate \
    --temperature 1 \
    --max-tokens 128 \
    --method-params '{"num_examples":4,"train_split":"train"}' \
    --output-dir outputs/reproduce
2 retrieve pyserini · SPLADE++ (learned_sparse)
python -m pyserini.search.lucene \
  --threads 16 --batch-size 128 \
  --index msmarco-v1-passage.splade-pp-ed \
  --topics outputs/reproduce/queries/reformulated_queries.tsv \
  --encoder naver/splade-cocondenser-ensembledistil \
  --output run.txt \
  --hits 1000 --impact
3 evaluate trec_eval · nDCG@10 + R@1k
python -m pyserini.eval.trec_eval -c -m ndcg.cut.10 -m recall.1000 \
  dl20-passage run.txt