LLaMA-Factory/evaluation/ceval/ceval.py

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import datasets
import pandas as pd


_CITATION = """\
@article{huang2023ceval,
  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, 
  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
  journal={arXiv preprint arXiv:2305.08322},
  year={2023}
}
"""

_DESCRIPTION = """\
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
"""

_HOMEPAGE = "https://cevalbenchmark.com"

_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"

_URL = "ceval.zip"

task_list = [
    "computer_network",
    "operating_system",
    "computer_architecture",
    "college_programming",
    "college_physics",
    "college_chemistry",
    "advanced_mathematics",
    "probability_and_statistics",
    "discrete_mathematics",
    "electrical_engineer",
    "metrology_engineer",
    "high_school_mathematics",
    "high_school_physics",
    "high_school_chemistry",
    "high_school_biology",
    "middle_school_mathematics",
    "middle_school_biology",
    "middle_school_physics",
    "middle_school_chemistry",
    "veterinary_medicine",
    "college_economics",
    "business_administration",
    "marxism",
    "mao_zedong_thought",
    "education_science",
    "teacher_qualification",
    "high_school_politics",
    "high_school_geography",
    "middle_school_politics",
    "middle_school_geography",
    "modern_chinese_history",
    "ideological_and_moral_cultivation",
    "logic",
    "law",
    "chinese_language_and_literature",
    "art_studies",
    "professional_tour_guide",
    "legal_professional",
    "high_school_chinese",
    "high_school_history",
    "middle_school_history",
    "civil_servant",
    "sports_science",
    "plant_protection",
    "basic_medicine",
    "clinical_medicine",
    "urban_and_rural_planner",
    "accountant",
    "fire_engineer",
    "environmental_impact_assessment_engineer",
    "tax_accountant",
    "physician",
]


class CevalConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.0"), **kwargs)


class Ceval(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CevalConfig(
            name=task_name,
        )
        for task_name in task_list
    ]

    def _info(self):
        features = datasets.Features(
            {
                "id": datasets.Value("int32"),
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
                "explanation": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(
                        data_dir, "test", f"{task_name}_test.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
                    "filepath": os.path.join(
                        data_dir, "val", f"{task_name}_val.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(
                        data_dir, "dev", f"{task_name}_dev.csv"
                    ),
                },
            ),
        ]

    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            if "answer" not in instance.keys():
                instance["answer"] = ""
            if "explanation" not in instance.keys():
                instance["explanation"] = ""
            yield i, instance
add MMLU and C-Eval script 2023-09-22 16:34:17 +00:00			`# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`import os`

			`import datasets`
			`import pandas as pd`


			`_CITATION = """\`
			`@article{huang2023ceval,`
			`title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},`
			`author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},`
			`journal={arXiv preprint arXiv:2305.08322},`
			`year={2023}`
			`}`
			`"""`

			`_DESCRIPTION = """\`
			`C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.`
			`"""`

			`_HOMEPAGE = "https://cevalbenchmark.com"`

			`_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"`

			`_URL = "ceval.zip"`

			`task_list = [`
			`"computer_network",`
			`"operating_system",`
			`"computer_architecture",`
			`"college_programming",`
			`"college_physics",`
			`"college_chemistry",`
			`"advanced_mathematics",`
			`"probability_and_statistics",`
			`"discrete_mathematics",`
			`"electrical_engineer",`
			`"metrology_engineer",`
			`"high_school_mathematics",`
			`"high_school_physics",`
			`"high_school_chemistry",`
			`"high_school_biology",`
			`"middle_school_mathematics",`
			`"middle_school_biology",`
			`"middle_school_physics",`
			`"middle_school_chemistry",`
			`"veterinary_medicine",`
			`"college_economics",`
			`"business_administration",`
			`"marxism",`
			`"mao_zedong_thought",`
			`"education_science",`
			`"teacher_qualification",`
			`"high_school_politics",`
			`"high_school_geography",`
			`"middle_school_politics",`
			`"middle_school_geography",`
			`"modern_chinese_history",`
			`"ideological_and_moral_cultivation",`
			`"logic",`
			`"law",`
			`"chinese_language_and_literature",`
			`"art_studies",`
			`"professional_tour_guide",`
			`"legal_professional",`
			`"high_school_chinese",`
			`"high_school_history",`
			`"middle_school_history",`
			`"civil_servant",`
			`"sports_science",`
			`"plant_protection",`
			`"basic_medicine",`
			`"clinical_medicine",`
			`"urban_and_rural_planner",`
			`"accountant",`
			`"fire_engineer",`
			`"environmental_impact_assessment_engineer",`
			`"tax_accountant",`
			`"physician",`
			`]`


add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`class CevalConfig(datasets.BuilderConfig):`
add MMLU and C-Eval script 2023-09-22 16:34:17 +00:00			`def __init__(self, **kwargs):`
			`super().__init__(version=datasets.Version("1.0.0"), **kwargs)`


add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`class Ceval(datasets.GeneratorBasedBuilder):`
add MMLU and C-Eval script 2023-09-22 16:34:17 +00:00			`BUILDER_CONFIGS = [`
add CMMLU, update eval script 2023-09-23 13:10:17 +00:00			`CevalConfig(`
add MMLU and C-Eval script 2023-09-22 16:34:17 +00:00			`name=task_name,`
			`)`
			`for task_name in task_list`
			`]`

			`def _info(self):`
			`features = datasets.Features(`
			`{`
			`"id": datasets.Value("int32"),`
			`"question": datasets.Value("string"),`
			`"A": datasets.Value("string"),`
			`"B": datasets.Value("string"),`
			`"C": datasets.Value("string"),`
			`"D": datasets.Value("string"),`
			`"answer": datasets.Value("string"),`
			`"explanation": datasets.Value("string"),`
			`}`
			`)`
			`return datasets.DatasetInfo(`
			`description=_DESCRIPTION,`
			`features=features,`
			`homepage=_HOMEPAGE,`
			`license=_LICENSE,`
			`citation=_CITATION,`
			`)`

			`def _split_generators(self, dl_manager):`
			`data_dir = dl_manager.download_and_extract(_URL)`
			`task_name = self.config.name`
			`return [`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TEST,`
			`gen_kwargs={`
			`"filepath": os.path.join(`
			`data_dir, "test", f"{task_name}_test.csv"`
			`),`
			`},`
			`),`
			`datasets.SplitGenerator(`
			`name=datasets.Split.VALIDATION,`
			`gen_kwargs={`
			`"filepath": os.path.join(`
			`data_dir, "val", f"{task_name}_val.csv"`
			`),`
			`},`
			`),`
			`datasets.SplitGenerator(`
			`name=datasets.Split.TRAIN,`
			`gen_kwargs={`
			`"filepath": os.path.join(`
			`data_dir, "dev", f"{task_name}_dev.csv"`
			`),`
			`},`
			`),`
			`]`

			`def _generate_examples(self, filepath):`
			`df = pd.read_csv(filepath, encoding="utf-8")`
			`for i, instance in enumerate(df.to_dict(orient="records")):`
			`if "answer" not in instance.keys():`
			`instance["answer"] = ""`
			`if "explanation" not in instance.keys():`
			`instance["explanation"] = ""`
			`yield i, instance`