Skip to content

Commit

Permalink
updated file version | added args to exact_match
Browse files Browse the repository at this point in the history
  • Loading branch information
RawthiL committed Dec 4, 2024
1 parent 97dccd3 commit 578dbdb
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
4 changes: 3 additions & 1 deletion lm_eval/tasks/mmlu/generative/_default_template_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_punctuation: true
ignore_case: true
filter_list:
- name: get_response
filter:
Expand All @@ -27,6 +29,6 @@ filter_list:
regex_pattern: "^(.*?)\\s*$"
- function: take_first
metadata:
version: 2.0
version: 3.0
dataset_kwargs:
trust_remote_code: true
22 changes: 16 additions & 6 deletions lm_eval/tasks/mmlu/generative/_mmlu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,43 @@ task:
- mmlu_stem_generative
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
weight_by_size: true
ignore_punctuation: true
ignore_case: true
filter_list: get_response
- group: other
task:
- mmlu_other_generative
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
weight_by_size: true
ignore_punctuation: true
ignore_case: true
filter_list: get_response
- group: social sciences
task:
- mmlu_social_sciences_generative
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
weight_by_size: true
ignore_punctuation: true
ignore_case: true
filter_list: get_response
- group: humanities
task:
- mmlu_humanities_generative
aggregate_metric_list:
- metric: exact_match
weight_by_size: True
weight_by_size: true
ignore_punctuation: true
ignore_case: true
filter_list: get_response
aggregate_metric_list:
- aggregation: mean
metric: exact_match
weight_by_size: True
weight_by_size: true
ignore_punctuation: true
ignore_case: true
filter_list: get_response
metadata:
version: 2
version: 3

0 comments on commit 578dbdb

Please sign in to comment.