Skip to content

Commit 337945d

Browse files
committed
Merge branch 'dev'
2 parents 9352fd6 + 2360dff commit 337945d

File tree

100 files changed

+219
-215
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+219
-215
lines changed

bin/chunkfile.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def process_byte_based_chunk(
147147
return actual, b"", total_read, False
148148
return None, prev_overlap, total_read - len(chunk), False
149149
current = actual[:chunk_size]
150-
next_overlap = actual[chunk_size - overlap : chunk_size]
150+
next_overlap = actual[chunk_size - overlap:chunk_size]
151151
return current, next_overlap, total_read, False
152152

153153

@@ -214,17 +214,14 @@ def split_file(
214214
total = file_size + (num_chunks - 1) * overlap
215215
chunk_size = total // num_chunks
216216

217-
with open(input_file, "rb", encoding="utf-8") as file:
218-
part_num = 1
219-
prev_overlap = b""
220-
prev_lines: List[bytes] = []
221-
total_read = 0
222-
223-
while True:
224-
is_first = part_num == 1
217+
if num_lines:
218+
# For line-based chunking, use text mode with UTF-8 encoding
219+
with open(input_file, "r", encoding="utf-8") as file:
220+
part_num = 1
221+
prev_lines: List[bytes] = []
225222

226-
if num_lines:
227-
# Line-based chunking
223+
while True:
224+
is_first = part_num == 1
228225
lines, prev_lines, should_stop = process_line_based_chunk(
229226
file, num_lines, overlap, is_first, prev_lines
230227
)
@@ -233,8 +230,16 @@ def split_file(
233230
write_chunk(lines, input_file, part_num, is_lines=True)
234231
if len(lines) < (num_lines - (0 if is_first else overlap)):
235232
break
236-
else:
237-
# Byte-based chunking
233+
part_num += 1
234+
else:
235+
# For byte-based chunking, use binary mode without encoding
236+
with open(input_file, "rb") as file:
237+
part_num = 1
238+
prev_overlap = b""
239+
total_read = 0
240+
241+
while True:
242+
is_first = part_num == 1
238243
assert chunk_size is not None # for type checker
239244
result = process_byte_based_chunk(
240245
file,
@@ -253,10 +258,9 @@ def split_file(
253258
write_chunk(current, input_file, part_num)
254259
if total_read >= file_size and not prev_overlap:
255260
break
256-
257-
part_num += 1
258-
if num_chunks and part_num > num_chunks:
259-
break
261+
part_num += 1
262+
if num_chunks and part_num > num_chunks:
263+
break
260264

261265
# Print summary
262266
print(f"Total chunks created: {part_num - 1}")

docs/shdoc/README.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/config_lib_sh.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/errno_lib_sh.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/__set_venv_vars.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/__venv_conda_check.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/_deprecated.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/_source_check.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/benv.md

Lines changed: 1 addition & 1 deletion

docs/shdoc/bin/shinclude/functions/cact.md

Lines changed: 1 addition & 1 deletion

0 commit comments

Comments
 (0)