LLaMA-Factory/evaluation/cmmlu/cmmlu.py

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import datasets
import pandas as pd


_CITATION = """\
@article{li2023cmmlu,
  title={CMMLU: Measuring massive multitask language understanding in Chinese},
  author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
  journal={arXiv preprint arXiv:2306.09212},
  year={2023}
}
"""

_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
"""

_HOMEPAGE = "https://github.com/haonan-li/CMMLU"

_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"

_URL = "cmmlu.zip"

task_list = [
     'agronomy',
     'anatomy',
     'ancient_chinese',
     'arts',
     'astronomy',
     'business_ethics',
     'chinese_civil_service_exam',
     'chinese_driving_rule',
     'chinese_food_culture',
     'chinese_foreign_policy',
     'chinese_history',
     'chinese_literature',
     'chinese_teacher_qualification',
     'clinical_knowledge',
     'college_actuarial_science',
     'college_education',
     'college_engineering_hydrology',
     'college_law',
     'college_mathematics',
     'college_medical_statistics',
     'college_medicine',
     'computer_science',
     'computer_security',
     'conceptual_physics',
     'construction_project_management',
     'economics',
     'education',
     'electrical_engineering',
     'elementary_chinese',
     'elementary_commonsense',
     'elementary_information_and_technology',
     'elementary_mathematics',
     'ethnology',
     'food_science',
     'genetics',
     'global_facts',
     'high_school_biology',
     'high_school_chemistry',
     'high_school_geography',
     'high_school_mathematics',
     'high_school_physics',
     'high_school_politics',
     'human_sexuality',
     'international_law',
     'journalism',
     'jurisprudence',
     'legal_and_moral_basis',
     'logical',
     'machine_learning',
     'management',
     'marketing',
     'marxist_theory',
     'modern_chinese',
     'nutrition',
     'philosophy',
     'professional_accounting',
     'professional_law',
     'professional_medicine',
     'professional_psychology',
     'public_relations',
     'security_study',
     'sociology',
     'sports_science',
     'traditional_chinese_medicine',
     'virology',
     'world_history',
     'world_religions',
]


class CMMLUConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.1"), **kwargs)


class CMMLU(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CMMLUConfig(
            name=task_name,
        )
        for task_name in task_list
    ]

    def _info(self):
        features = datasets.Features(
            {
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
                },
            ),
        ]

    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            question = instance.pop("Question", "")
            answer = instance.pop("Answer", "")
            instance["question"] = question
            instance["answer"] = answer
            yield i, instance
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`import os`

			`import datasets`
			`import pandas as pd`


			`_CITATION = """\`
			`@article{li2023cmmlu,`
			`title={CMMLU: Measuring massive multitask language understanding in Chinese},`
			`author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},`
			`journal={arXiv preprint arXiv:2306.09212},`
			`year={2023}`
			`}`
			`"""`

			`_DESCRIPTION = """\`
			`CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.`
			`"""`

			`_HOMEPAGE = "https://github.com/haonan-li/CMMLU"`

			`_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"`

			`_URL = "cmmlu.zip"`

			`task_list = [`
			`'agronomy',`
			`'anatomy',`
			`'ancient_chinese',`
			`'arts',`
			`'astronomy',`
			`'business_ethics',`
			`'chinese_civil_service_exam',`
			`'chinese_driving_rule',`
			`'chinese_food_culture',`
			`'chinese_foreign_policy',`
			`'chinese_history',`
			`'chinese_literature',`
			`'chinese_teacher_qualification',`
			`'clinical_knowledge',`
			`'college_actuarial_science',`
			`'college_education',`
			`'college_engineering_hydrology',`
			`'college_law',`
			`'college_mathematics',`
			`'college_medical_statistics',`
			`'college_medicine',`
			`'computer_science',`
			`'computer_security',`
			`'conceptual_physics',`
			`'construction_project_management',`
			`'economics',`
			`'education',`
			`'electrical_engineering',`
			`'elementary_chinese',`
			`'elementary_commonsense',`
			`'elementary_information_and_technology',`
			`'elementary_mathematics',`
			`'ethnology',`
			`'food_science',`
			`'genetics',`
			`'global_facts',`
			`'high_school_biology',`
			`'high_school_chemistry',`
			`'high_school_geography',`
			`'high_school_mathematics',`
			`'high_school_physics',`
			`'high_school_politics',`
			`'human_sexuality',`
			`'international_law',`
			`'journalism',`
			`'jurisprudence',`
			`'legal_and_moral_basis',`
			`'logical',`
			`'machine_learning',`
			`'management',`
			`'marketing',`
			`'marxist_theory',`
			`'modern_chinese',`
			`'nutrition',`
			`'philosophy',`
			`'professional_accounting',`
			`'professional_law',`
			`'professional_medicine',`
			`'professional_psychology',`
			`'public_relations',`
			`'security_study',`
			`'sociology',`
			`'sports_science',`
			`'traditional_chinese_medicine',`
			`'virology',`
			`'world_history',`
			`'world_religions',`
			`]`


			`class CMMLUConfig(datasets.BuilderConfig):`
			`def __init__(self, **kwargs):`
			`super().__init__(version=datasets.Version("1.0.1"), **kwargs)`


			`class CMMLU(datasets.GeneratorBasedBuilder):`
			`BUILDER_CONFIGS = [`
			`CMMLUConfig(`
			`name=task_name,`
			`)`
			`for task_name in task_list`
			`]`

			`def _info(self):`
			`features = datasets.Features(`
			`{`
			`"question": datasets.Value("string"),`
			`"A": datasets.Value("string"),`
			`"B": datasets.Value("string"),`
			`"C": datasets.Value("string"),`
			`"D": datasets.Value("string"),`
			`"answer": datasets.Value("string"),`
			`}`
			`)`
			`return datasets.DatasetInfo(`
			`description=_DESCRIPTION,`
			`features=features,`
			`homepage=_HOMEPAGE,`
			`license=_LICENSE,`
			`citation=_CITATION,`
			`)`

			`def _split_generators(self, dl_manager):`
			`data_dir = dl_manager.download_and_extract(_URL)`
			`task_name = self.config.name`
			`return [`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TEST,`
			`gen_kwargs={`
			`"filepath": os.path.join(data_dir, f"test/{task_name}.csv"),`
			`},`
			`),`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TRAIN,`
			`gen_kwargs={`
			`"filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),`
			`},`
			`),`
			`]`

			`def _generate_examples(self, filepath):`
			`df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")`
			`for i, instance in enumerate(df.to_dict(orient="records")):`
fix #1068 #1074 2023-09-28 06:39:16 +00:00			`question = instance.pop("Question", "")`
			`answer = instance.pop("Answer", "")`
			`instance["question"] = question`
			`instance["answer"] = answer`
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`yield i, instance`