From 4998854f490172fd1d78b76c066b351cad9dc2c9 Mon Sep 17 00:00:00 2001 From: Steven Crawford Date: Wed, 11 Dec 2024 10:52:59 -0600 Subject: [PATCH] initial commit --- split_zip_into_under_5GB_chunks.py | 77 ++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 split_zip_into_under_5GB_chunks.py diff --git a/split_zip_into_under_5GB_chunks.py b/split_zip_into_under_5GB_chunks.py new file mode 100644 index 0000000..ab7f507 --- /dev/null +++ b/split_zip_into_under_5GB_chunks.py @@ -0,0 +1,77 @@ +import os +import zipfile +from shutil import copy2 + +def split_zip_into_groups(zip_path, max_group_size_gb): + # Open the existing zip file in read mode + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Extract all the contents into a temporary directory + temp_dir = os.path.splitext(zip_path)[0] + '_extracted' + if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + + zip_ref.extractall(temp_dir) + + # Get a list of all files in the temporary directory + file_list = [os.path.join(root, f) for root, dirs, files in os.walk(temp_dir) for f in files] + + # Calculate the maximum size in bytes for each group + max_group_size_bytes = max_group_size_gb * 1024**3 + + # Create a new directory to hold the split zip files + output_dir = os.path.splitext(zip_path)[0] + '_split' + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + group_count = 1 + current_group_size = 0 + current_group_files = [] + + for file in file_list: + # Get the size of the current file in bytes + file_size = os.path.getsize(file) + + if current_group_size + file_size > max_group_size_bytes: + # If adding the current file exceeds the group size, save the current group and create a new one + output_zip_path = os.path.join(output_dir, f'group_{group_count}.zip') + + with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref: + for file in current_group_files: + # Copy each file to the temporary directory and add it to the new zip + temp_file = os.path.join(temp_dir, os.path.relpath(file, start=temp_dir)) + copy2(file, temp_file) + zip_ref.write(temp_file, os.path.relpath(file, start=temp_dir)) + os.remove(temp_file) # Clean up the temporary file + + group_count += 1 + current_group_size = 0 + current_group_files = [] + + current_group_size += file_size + current_group_files.append(file) + + # If there are any remaining files in the last group, save them too + if current_group_files: + output_zip_path = os.path.join(output_dir, f'group_{group_count}.zip') + with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_ref: + for file in current_group_files: + temp_file = os.path.join(temp_dir, os.path.relpath(file, start=temp_dir)) + copy2(file, temp_file) + zip_ref.write(temp_file, os.path.relpath(file, start=temp_dir)) + os.remove(temp_file) # Clean up the temporary file + + # Clean up the temporary directory + shutil.rmtree(temp_dir) + +def get_zip_path(): + while True: + zip_path = input("Enter the path to the zipped file: ") + if os.path.isfile(zip_path): + return zip_path + else: + print(f"The file at {zip_path} does not exist. Please try again.") + +# Example usage +zip_path = get_zip_path() +max_group_size_gb = 5 +split_zip_into_groups(zip_path, max_group_size_gb) \ No newline at end of file