initial commit
This commit is contained in:
77
split_zip_into_under_5GB_chunks.py
Normal file
77
split_zip_into_under_5GB_chunks.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import os
|
||||
import zipfile
|
||||
from shutil import copy2
|
||||
|
||||
def split_zip_into_groups(zip_path, max_group_size_gb):
|
||||
# Open the existing zip file in read mode
|
||||
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||||
# Extract all the contents into a temporary directory
|
||||
temp_dir = os.path.splitext(zip_path)[0] + '_extracted'
|
||||
if not os.path.exists(temp_dir):
|
||||
os.makedirs(temp_dir)
|
||||
|
||||
zip_ref.extractall(temp_dir)
|
||||
|
||||
# Get a list of all files in the temporary directory
|
||||
file_list = [os.path.join(root, f) for root, dirs, files in os.walk(temp_dir) for f in files]
|
||||
|
||||
# Calculate the maximum size in bytes for each group
|
||||
max_group_size_bytes = max_group_size_gb * 1024**3
|
||||
|
||||
# Create a new directory to hold the split zip files
|
||||
output_dir = os.path.splitext(zip_path)[0] + '_split'
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
group_count = 1
|
||||
current_group_size = 0
|
||||
current_group_files = []
|
||||
|
||||
for file in file_list:
|
||||
# Get the size of the current file in bytes
|
||||
file_size = os.path.getsize(file)
|
||||
|
||||
if current_group_size + file_size > max_group_size_bytes:
|
||||
# If adding the current file exceeds the group size, save the current group and create a new one
|
||||
output_zip_path = os.path.join(output_dir, f'group_{group_count}.zip')
|
||||
|
||||
with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref:
|
||||
for file in current_group_files:
|
||||
# Copy each file to the temporary directory and add it to the new zip
|
||||
temp_file = os.path.join(temp_dir, os.path.relpath(file, start=temp_dir))
|
||||
copy2(file, temp_file)
|
||||
zip_ref.write(temp_file, os.path.relpath(file, start=temp_dir))
|
||||
os.remove(temp_file) # Clean up the temporary file
|
||||
|
||||
group_count += 1
|
||||
current_group_size = 0
|
||||
current_group_files = []
|
||||
|
||||
current_group_size += file_size
|
||||
current_group_files.append(file)
|
||||
|
||||
# If there are any remaining files in the last group, save them too
|
||||
if current_group_files:
|
||||
output_zip_path = os.path.join(output_dir, f'group_{group_count}.zip')
|
||||
with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref:
|
||||
for file in current_group_files:
|
||||
temp_file = os.path.join(temp_dir, os.path.relpath(file, start=temp_dir))
|
||||
copy2(file, temp_file)
|
||||
zip_ref.write(temp_file, os.path.relpath(file, start=temp_dir))
|
||||
os.remove(temp_file) # Clean up the temporary file
|
||||
|
||||
# Clean up the temporary directory
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def get_zip_path():
|
||||
while True:
|
||||
zip_path = input("Enter the path to the zipped file: ")
|
||||
if os.path.isfile(zip_path):
|
||||
return zip_path
|
||||
else:
|
||||
print(f"The file at {zip_path} does not exist. Please try again.")
|
||||
|
||||
# Example usage
|
||||
zip_path = get_zip_path()
|
||||
max_group_size_gb = 5
|
||||
split_zip_into_groups(zip_path, max_group_size_gb)
|
||||
Reference in New Issue
Block a user