Skip to content

fix instruction retrival#1072

Merged
orionw merged 10 commits into
embeddings-benchmark:mainfrom
Samoed:fix_instruction_retrival
Jul 19, 2024
Merged

fix instruction retrival#1072
orionw merged 10 commits into
embeddings-benchmark:mainfrom
Samoed:fix_instruction_retrival

Conversation

@Samoed

@Samoed Samoed commented Jul 10, 2024

Copy link
Copy Markdown
Member

Checklist

  • Run tests locally to make sure nothing is broken using make test.
  • Run the formatter to format the code using make lint.

Result of running Core17InstructionRetrieva.json

{
  "dataset_revision": "e39ff896cf3efbbdeeb950e6bd7c79f266995b07",
  "evaluation_time": 24.111242532730103,
  "kg_co2_emissions": null,
  "mteb_version": "1.12.57",
  "scores": {
    "test": [
      {
        "hf_subset": "default",
        "individual_changed_map_at_1": 0.0068,
        "individual_changed_map_at_10": 0.03596,
        "individual_changed_map_at_100": 0.06657,
        "individual_changed_map_at_1000": 0.12378,
        "individual_changed_map_at_20": 0.04172,
        "individual_changed_map_at_3": 0.01873,
        "individual_changed_map_at_5": 0.02736,
        "individual_changed_mrr_at_1": 0.25,
        "individual_changed_mrr_at_10": 0.42825396825396816,
        "individual_changed_mrr_at_100": 0.43742916006982496,
        "individual_changed_mrr_at_1000": 0.43742916006982496,
        "individual_changed_mrr_at_20": 0.43575396825396817,
        "individual_changed_mrr_at_3": 0.3916666666666667,
        "individual_changed_mrr_at_5": 0.40166666666666667,
        "individual_changed_naucs_at_1000_diff1": 0.18180116558220433,
        "individual_changed_naucs_at_1000_max": -0.006367217597249062,
        "individual_changed_naucs_at_1000_std": -0.2552568223838718,
        "individual_changed_naucs_at_100_diff1": 0.18180116558220433,
        "individual_changed_naucs_at_100_max": -0.006367217597249062,
        "individual_changed_naucs_at_100_std": -0.2552568223838718,
        "individual_changed_naucs_at_10_diff1": 0.17946233138402323,
        "individual_changed_naucs_at_10_max": -0.002408481438124014,
        "individual_changed_naucs_at_10_std": -0.26665099704773765,
        "individual_changed_naucs_at_1_diff1": 0.09106289076866483,
        "individual_changed_naucs_at_1_max": -0.01044501655020222,
        "individual_changed_naucs_at_1_std": -0.025891872011769124,
        "individual_changed_naucs_at_20_diff1": 0.1850094213714369,
        "individual_changed_naucs_at_20_max": -0.006223321132769148,
        "individual_changed_naucs_at_20_std": -0.25812005104283436,
        "individual_changed_naucs_at_3_diff1": 0.1944447740297338,
        "individual_changed_naucs_at_3_max": -0.0059206701391770825,
        "individual_changed_naucs_at_3_std": -0.25864667006798675,
        "individual_changed_naucs_at_5_diff1": 0.20187474658291793,
        "individual_changed_naucs_at_5_max": -0.017027228690772077,
        "individual_changed_naucs_at_5_std": -0.2562193650237642,
        "individual_changed_ndcg_at_1": 0.2,
        "individual_changed_ndcg_at_10": 0.18404,
        "individual_changed_ndcg_at_100": 0.19755,
        "individual_changed_ndcg_at_1000": 0.51787,
        "individual_changed_ndcg_at_20": 0.16513,
        "individual_changed_ndcg_at_3": 0.19874,
        "individual_changed_ndcg_at_5": 0.17855,
        "individual_changed_precision_at_1": 0.25,
        "individual_changed_precision_at_10": 0.22,
        "individual_changed_precision_at_100": 0.1055,
        "individual_changed_precision_at_1000": 0.0545,
        "individual_changed_precision_at_20": 0.165,
        "individual_changed_precision_at_3": 0.25,
        "individual_changed_precision_at_5": 0.22,
        "individual_changed_recall_at_1": 0.0068,
        "individual_changed_recall_at_10": 0.0547,
        "individual_changed_recall_at_100": 0.21482,
        "individual_changed_recall_at_1000": 1.0,
        "individual_changed_recall_at_20": 0.07715,
        "individual_changed_recall_at_3": 0.02189,
        "individual_changed_recall_at_5": 0.03313,
        "individual_original_map_at_1": 0.0024,
        "individual_original_map_at_10": 0.01247,
        "individual_original_map_at_100": 0.02979,
        "individual_original_map_at_1000": 0.06491,
        "individual_original_map_at_20": 0.01682,
        "individual_original_map_at_3": 0.00828,
        "individual_original_map_at_5": 0.00911,
        "individual_original_mrr_at_1": 0.1,
        "individual_original_mrr_at_10": 0.2049206349206349,
        "individual_original_mrr_at_100": 0.21969562686559535,
        "individual_original_mrr_at_1000": 0.22023731248888928,
        "individual_original_mrr_at_20": 0.2147123015873016,
        "individual_original_mrr_at_3": 0.15833333333333333,
        "individual_original_mrr_at_5": 0.17833333333333334,
        "individual_original_naucs_at_1000_diff1": 0.1956179430385096,
        "individual_original_naucs_at_1000_max": 0.3631162380582865,
        "individual_original_naucs_at_1000_std": -0.33772224370667286,
        "individual_original_naucs_at_100_diff1": 0.1949088022123342,
        "individual_original_naucs_at_100_max": 0.361846795447621,
        "individual_original_naucs_at_100_std": -0.3375473116005373,
        "individual_original_naucs_at_10_diff1": 0.19370837022534854,
        "individual_original_naucs_at_10_max": 0.3874446149976966,
        "individual_original_naucs_at_10_std": -0.3444738084179253,
        "individual_original_naucs_at_1_diff1": 0.29246042393345845,
        "individual_original_naucs_at_1_max": 0.22806546820499052,
        "individual_original_naucs_at_1_std": -0.1381808425006709,
        "individual_original_naucs_at_20_diff1": 0.1829247099169734,
        "individual_original_naucs_at_20_max": 0.359594932697118,
        "individual_original_naucs_at_20_std": -0.3293361901308791,
        "individual_original_naucs_at_3_diff1": 0.18519328098464846,
        "individual_original_naucs_at_3_max": 0.4222252114404855,
        "individual_original_naucs_at_3_std": -0.3266019874564928,
        "individual_original_naucs_at_5_diff1": 0.23491488189720652,
        "individual_original_naucs_at_5_max": 0.3639756330485743,
        "individual_original_naucs_at_5_std": -0.3457851627559816,
        "individual_original_ndcg_at_1": 0.075,
        "individual_original_ndcg_at_10": 0.08397,
        "individual_original_ndcg_at_100": 0.12348,
        "individual_original_ndcg_at_1000": 0.4038,
        "individual_original_ndcg_at_20": 0.08608,
        "individual_original_ndcg_at_3": 0.08588,
        "individual_original_ndcg_at_5": 0.07727,
        "individual_original_precision_at_1": 0.1,
        "individual_original_precision_at_10": 0.105,
        "individual_original_precision_at_100": 0.058,
        "individual_original_precision_at_1000": 0.0327,
        "individual_original_precision_at_20": 0.0925,
        "individual_original_precision_at_3": 0.11667,
        "individual_original_precision_at_5": 0.1,
        "individual_original_recall_at_1": 0.0024,
        "individual_original_recall_at_10": 0.03134,
        "individual_original_recall_at_100": 0.17071,
        "individual_original_recall_at_1000": 1.0,
        "individual_original_recall_at_20": 0.05663,
        "individual_original_recall_at_3": 0.01431,
        "individual_original_recall_at_5": 0.01759,
        "languages": [
          "eng-Latn"
        ],
        "main_score": 0.0,
        "p-MRR": 0.0
      }
    ]
  },
  "task_name": "Core17InstructionRetrieval"
}

Previously, when I attempted to run it, I encountered an error: DenseRetrievalExactSearch.__init__() got multiple values for argument 'model'. Furthermore, the old output format caused it to fail to execute correctly. This also follows #1037

Loading
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants