This commit is contained in:
hoshi-hiyouga 2024-09-04 19:10:30 +08:00 committed by GitHub
parent ebddce5d4a
commit 44d6947e55
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -223,13 +223,14 @@ def get_dataset(
dataset_module: Dict[str, "Dataset"] = {}
if "train" in dataset_dict:
dataset_module["train_dataset"] = dataset_dict["train"]
if "validation" in dataset_dict:
dataset_module["eval_dataset"] = dataset_dict["validation"]
if data_args.streaming:
dataset_module = {k: v.to_iterable_dataset() for k, v in dataset_module.items()}
return dataset_module
return dataset_module, template
if data_args.streaming:
raise ValueError("Turn off `streaming` when saving dataset to disk.")