update args for MsDataset.load
This commit is contained in:
parent
fe4acc66b0
commit
09533e95ed
|
@ -129,7 +129,6 @@
|
|||
},
|
||||
"firefly": {
|
||||
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
||||
"ms_hub_url": "AI-ModelScope/firefly-train-1.1M",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"response": "target"
|
||||
|
|
|
@ -58,6 +58,11 @@ def get_dataset(
|
|||
dataset = MsDataset.load(
|
||||
dataset_name=data_path,
|
||||
subset_name=data_name,
|
||||
split=data_args.split,
|
||||
data_files=data_files,
|
||||
cache_dir=model_args.cache_dir,
|
||||
token=model_args.ms_hub_token,
|
||||
streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
|
||||
).to_hf_dataset()
|
||||
else:
|
||||
dataset = load_dataset(
|
||||
|
|
|
@ -59,6 +59,10 @@ class ModelArguments:
|
|||
default=None,
|
||||
metadata={"help": "Auth token to log in with Hugging Face Hub."}
|
||||
)
|
||||
ms_hub_token: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Auth token to log in with ModelScope Hub."}
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
self.compute_dtype = None
|
||||
|
|
Loading…
Reference in New Issue