LLaMA-Factory/evaluation/cmmlu/cmmlu.py

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import datasets
import pandas as pd


_CITATION = """\
@article{li2023cmmlu,
  title={CMMLU: Measuring massive multitask language understanding in Chinese},
  author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
  journal={arXiv preprint arXiv:2306.09212},
  year={2023}
}
"""

_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
"""

_HOMEPAGE = "https://github.com/haonan-li/CMMLU"

_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"

_URL = "cmmlu.zip"

task_list = [
    "agronomy",
    "anatomy",
    "ancient_chinese",
    "arts",
    "astronomy",
    "business_ethics",
    "chinese_civil_service_exam",
    "chinese_driving_rule",
    "chinese_food_culture",
    "chinese_foreign_policy",
    "chinese_history",
    "chinese_literature",
    "chinese_teacher_qualification",
    "clinical_knowledge",
    "college_actuarial_science",
    "college_education",
    "college_engineering_hydrology",
    "college_law",
    "college_mathematics",
    "college_medical_statistics",
    "college_medicine",
    "computer_science",
    "computer_security",
    "conceptual_physics",
    "construction_project_management",
    "economics",
    "education",
    "electrical_engineering",
    "elementary_chinese",
    "elementary_commonsense",
    "elementary_information_and_technology",
    "elementary_mathematics",
    "ethnology",
    "food_science",
    "genetics",
    "global_facts",
    "high_school_biology",
    "high_school_chemistry",
    "high_school_geography",
    "high_school_mathematics",
    "high_school_physics",
    "high_school_politics",
    "human_sexuality",
    "international_law",
    "journalism",
    "jurisprudence",
    "legal_and_moral_basis",
    "logical",
    "machine_learning",
    "management",
    "marketing",
    "marxist_theory",
    "modern_chinese",
    "nutrition",
    "philosophy",
    "professional_accounting",
    "professional_law",
    "professional_medicine",
    "professional_psychology",
    "public_relations",
    "security_study",
    "sociology",
    "sports_science",
    "traditional_chinese_medicine",
    "virology",
    "world_history",
    "world_religions",
]


class CMMLUConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.1"), **kwargs)


class CMMLU(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CMMLUConfig(
            name=task_name,
        )
        for task_name in task_list
    ]

    def _info(self):
        features = datasets.Features(
            {
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
                },
            ),
        ]

    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            question = instance.pop("Question", "")
            answer = instance.pop("Answer", "")
            instance["question"] = question
            instance["answer"] = answer
            yield i, instance
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
add license 2024-06-15 09:54:33 +00:00
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`import os`

			`import datasets`
			`import pandas as pd`


			`_CITATION = """\`
			`@article{li2023cmmlu,`
			`title={CMMLU: Measuring massive multitask language understanding in Chinese},`
			`author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},`
			`journal={arXiv preprint arXiv:2306.09212},`
			`year={2023}`
			`}`
			`"""`

			`_DESCRIPTION = """\`
			`CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.`
			`"""`

			`_HOMEPAGE = "https://github.com/haonan-li/CMMLU"`

			`_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"`

			`_URL = "cmmlu.zip"`

			`task_list = [`
fix eval scripts 2024-05-04 16:53:07 +00:00			`"agronomy",`
			`"anatomy",`
			`"ancient_chinese",`
			`"arts",`
			`"astronomy",`
			`"business_ethics",`
			`"chinese_civil_service_exam",`
			`"chinese_driving_rule",`
			`"chinese_food_culture",`
			`"chinese_foreign_policy",`
			`"chinese_history",`
			`"chinese_literature",`
			`"chinese_teacher_qualification",`
			`"clinical_knowledge",`
			`"college_actuarial_science",`
			`"college_education",`
			`"college_engineering_hydrology",`
			`"college_law",`
			`"college_mathematics",`
			`"college_medical_statistics",`
			`"college_medicine",`
			`"computer_science",`
			`"computer_security",`
			`"conceptual_physics",`
			`"construction_project_management",`
			`"economics",`
			`"education",`
			`"electrical_engineering",`
			`"elementary_chinese",`
			`"elementary_commonsense",`
			`"elementary_information_and_technology",`
			`"elementary_mathematics",`
			`"ethnology",`
			`"food_science",`
			`"genetics",`
			`"global_facts",`
			`"high_school_biology",`
			`"high_school_chemistry",`
			`"high_school_geography",`
			`"high_school_mathematics",`
			`"high_school_physics",`
			`"high_school_politics",`
			`"human_sexuality",`
			`"international_law",`
			`"journalism",`
			`"jurisprudence",`
			`"legal_and_moral_basis",`
			`"logical",`
			`"machine_learning",`
			`"management",`
			`"marketing",`
			`"marxist_theory",`
			`"modern_chinese",`
			`"nutrition",`
			`"philosophy",`
			`"professional_accounting",`
			`"professional_law",`
			`"professional_medicine",`
			`"professional_psychology",`
			`"public_relations",`
			`"security_study",`
			`"sociology",`
			`"sports_science",`
			`"traditional_chinese_medicine",`
			`"virology",`
			`"world_history",`
			`"world_religions",`
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`]`


			`class CMMLUConfig(datasets.BuilderConfig):`
			`def __init__(self, **kwargs):`
			`super().__init__(version=datasets.Version("1.0.1"), **kwargs)`


			`class CMMLU(datasets.GeneratorBasedBuilder):`
			`BUILDER_CONFIGS = [`
			`CMMLUConfig(`
			`name=task_name,`
			`)`
			`for task_name in task_list`
			`]`

			`def _info(self):`
			`features = datasets.Features(`
			`{`
			`"question": datasets.Value("string"),`
			`"A": datasets.Value("string"),`
			`"B": datasets.Value("string"),`
			`"C": datasets.Value("string"),`
			`"D": datasets.Value("string"),`
			`"answer": datasets.Value("string"),`
			`}`
			`)`
			`return datasets.DatasetInfo(`
			`description=_DESCRIPTION,`
			`features=features,`
			`homepage=_HOMEPAGE,`
			`license=_LICENSE,`
			`citation=_CITATION,`
			`)`

			`def _split_generators(self, dl_manager):`
			`data_dir = dl_manager.download_and_extract(_URL)`
			`task_name = self.config.name`
			`return [`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TEST,`
			`gen_kwargs={`
			`"filepath": os.path.join(data_dir, f"test/{task_name}.csv"),`
			`},`
			`),`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TRAIN,`
			`gen_kwargs={`
			`"filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),`
			`},`
			`),`
			`]`

			`def _generate_examples(self, filepath):`
			`df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")`
			`for i, instance in enumerate(df.to_dict(orient="records")):`
fix #1068 #1074 2023-09-28 06:39:16 +00:00			`question = instance.pop("Question", "")`
			`answer = instance.pop("Answer", "")`
			`instance["question"] = question`
			`instance["answer"] = answer`
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`yield i, instance`