File tree Expand file tree Collapse file tree 2 files changed +9
-9
lines changed Expand file tree Collapse file tree 2 files changed +9
-9
lines changed Original file line number Diff line number Diff line change @@ -5394,8 +5394,10 @@ def shards_with_embedded_external_files(shards: Iterator[Dataset]) -> Iterator[D
5394
5394
shard_path_in_repo = f"{ data_dir } /{ split } -{ index :05d} -of-{ num_shards :05d} .parquet"
5395
5395
buffer = BytesIO ()
5396
5396
shard .to_parquet (buffer )
5397
- uploaded_size += buffer .tell ()
5398
- shard_addition = CommitOperationAdd (path_in_repo = shard_path_in_repo , path_or_fileobj = buffer )
5397
+ parquet_content = buffer .getvalue ()
5398
+ uploaded_size += len (parquet_content )
5399
+ del buffer
5400
+ shard_addition = CommitOperationAdd (path_in_repo = shard_path_in_repo , path_or_fileobj = parquet_content )
5399
5401
api .preupload_lfs_files (
5400
5402
repo_id = repo_id ,
5401
5403
additions = [shard_addition ],
@@ -5705,10 +5707,11 @@ def push_to_hub(
5705
5707
with open (dataset_infos_path , encoding = "utf-8" ) as f :
5706
5708
dataset_infos : dict = json .load (f )
5707
5709
dataset_infos [config_name ] = asdict (info_to_dump )
5708
- buffer = BytesIO ()
5709
- buffer .write (json .dumps (dataset_infos , indent = 4 ).encode ("utf-8" ))
5710
5710
additions .append (
5711
- CommitOperationAdd (path_in_repo = config .DATASETDICT_INFOS_FILENAME , path_or_fileobj = buffer )
5711
+ CommitOperationAdd (
5712
+ path_in_repo = config .DATASETDICT_INFOS_FILENAME ,
5713
+ path_or_fileobj = json .dumps (dataset_infos , indent = 4 ).encode ("utf-8" ),
5714
+ )
5712
5715
)
5713
5716
# push to README
5714
5717
DatasetInfosDict ({config_name : info_to_dump }).to_dataset_card_data (dataset_card_data )
Original file line number Diff line number Diff line change 7
7
import re
8
8
from collections .abc import Sequence
9
9
from functools import partial
10
- from io import BytesIO
11
10
from pathlib import Path
12
11
from typing import Callable , Optional , Union
13
12
@@ -1853,12 +1852,10 @@ def push_to_hub(
1853
1852
with open (dataset_infos_path , encoding = "utf-8" ) as f :
1854
1853
dataset_infos : dict = json .load (f )
1855
1854
dataset_infos [config_name ] = asdict (info_to_dump )
1856
- buffer = BytesIO ()
1857
- buffer .write (json .dumps (dataset_infos , indent = 4 ).encode ("utf-8" ))
1858
1855
additions .append (
1859
1856
CommitOperationAdd (
1860
1857
path_in_repo = config .DATASETDICT_INFOS_FILENAME ,
1861
- path_or_fileobj = buffer ,
1858
+ path_or_fileobj = json . dumps ( dataset_infos , indent = 4 ). encode ( "utf-8" ) ,
1862
1859
)
1863
1860
)
1864
1861
# push to README
You can’t perform that action at this time.
0 commit comments