working model
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#!/Users/shelbybark/code/cmg_split_zip_into_chunks/venv/bin/python3
|
||||
#!/usr/bin/python3
|
||||
import zipfile
|
||||
import os
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
import io
|
||||
from io import BytesIO
|
||||
|
||||
def get_zip_path():
|
||||
while True:
|
||||
@@ -11,62 +11,39 @@ def get_zip_path():
|
||||
else:
|
||||
print(f"The file at {zip_path} does not exist. Please try again.")
|
||||
|
||||
def split_and_zip(file_path):
|
||||
def split_zip(input_zip_path, output_folder, max_group_size=4.5 * 1024 * 1024 * 1024):
|
||||
# Ensure the output folder exists
|
||||
base = os.path.basename(input_zip_path)
|
||||
file_name = os.path.splitext(base)[0]
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
with ZipFile(file_path, 'r') as zip_ref:
|
||||
total_size = 0
|
||||
for file in zip_ref.namelist():
|
||||
total_size += zip_ref.getinfo(file).file_size
|
||||
with zipfile.ZipFile(input_zip_path, 'r') as source_zip:
|
||||
members = source_zip.namelist()
|
||||
group_size = 0
|
||||
group_number = 1
|
||||
|
||||
# Define the maximum group size in bytes
|
||||
max_group_size = 5 * 1024 ** 3
|
||||
for member in members:
|
||||
# Read the file content from the zip archive
|
||||
with BytesIO(source_zip.read(member)) as file_content:
|
||||
# Check if adding this file would exceed the maximum group size
|
||||
if (group_size + len(file_content.getvalue())) > max_group_size:
|
||||
group_number += 1
|
||||
group_size = 0
|
||||
|
||||
# Calculate the number of groups
|
||||
num_groups = -(-total_size // max_group_size) # Ceiling division
|
||||
output_zip_path = os.path.join(output_folder, f'{file_name}-group_{group_number}.zip')
|
||||
|
||||
# print(f"Total size: { round(total_size / 1024.0 / 1024.0 / 1024.0, 6)} GB")
|
||||
print(f"Max group size: { max_group_size } GB")
|
||||
print(f"Total size: { total_size } GB")
|
||||
print(f"Number of groups: {num_groups}")
|
||||
with zipfile.ZipFile(output_zip_path, 'a') as target_zip:
|
||||
target_zip.writestr(member, file_content.getvalue())
|
||||
|
||||
# Create a temporary directory for storing intermediate files
|
||||
tmp_dir = 'temp'
|
||||
os.makedirs(tmp_dir, exist_ok=True)
|
||||
# Update the size of the current group
|
||||
group_size += len(file_content.getvalue())
|
||||
|
||||
# Iterate over each group
|
||||
for i in range(num_groups):
|
||||
start_index = i * max_group_size
|
||||
end_index = min((i + 1) * max_group_size, total_size)
|
||||
|
||||
# Extract the files for this group from the zipped file
|
||||
with ZipFile(file_path, 'r') as zip_file:
|
||||
group_files = [(zip_file.infolist()[j].filename,
|
||||
io.BytesIO(),
|
||||
zip_file.open(zip_file.infolist()[j].filename, 'r'))
|
||||
for j in range(start_index, end_index)]
|
||||
print(f'Successfully split {input_zip_path} into {group_number} groups.')
|
||||
|
||||
# Write each file to a new zip file
|
||||
with open(os.path.join(tmp_dir, f'group_{i}.zip'), 'wb') as group_zip:
|
||||
group_zip.write(b'PK' + b'\x01\x0a' * 20)
|
||||
for filename, buffer, file in group_files:
|
||||
group_zip.write(f'{filename}\x00')
|
||||
group_zip.writestr(filename, buffer.read())
|
||||
del buffer
|
||||
# Example usage
|
||||
# input_zip_path = 'path/to/your/large.zip'
|
||||
input_zip_path = get_zip_path()
|
||||
output_folder = 'output'
|
||||
|
||||
# Clean up
|
||||
os.remove(file_path)
|
||||
|
||||
# Zip the intermediate files into final zip files
|
||||
for i in range(num_groups):
|
||||
with open(os.path.join(tmp_dir, f'group_{i}.zip'), 'rb') as group_zip:
|
||||
with ZipFile(f'output_group_{i}.zip', 'w', compression=ZIP_DEFLATED) as output_zip:
|
||||
for file_info in group_zip.infolist():
|
||||
if file_info.filename.startswith('group_'):
|
||||
output_zip.writestr(file_info.filename[len('group_'):], group_zip.open(file_info.filename, 'r').read())
|
||||
|
||||
# Clean up
|
||||
os.rmdir(tmp_dir)
|
||||
|
||||
zip_file = get_zip_path()
|
||||
# split_and_zip('input.zip')
|
||||
split_and_zip(zip_file)
|
||||
split_zip(input_zip_path, output_folder)
|
||||
|
||||
Reference in New Issue
Block a user