Welcome to HBH! If you have tried to register and didn't get a verification email, please using the following link to resend the verification email.
Delete Duplicate Images (linux) - Python Code Bank
Delete Duplicate Images (linux)
Change the directory path to your folder path
directory = '/home/user/Documents/myFolder/' # Replace with your actual folder path
import os
import hashlib
# Function to compute hash of a file
def get_file_hash(file_path, chunk_size=4096):
try:
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
# Function to find and delete duplicate images
def delete_duplicates(directory):
if not os.path.isdir(directory):
print(f"The provided path '{directory}' is not a valid directory.")
return
hashes = {}
duplicate_count = 0
# Walk through the directory and its subdirectories
for root, dirs, files in os.walk(directory):
for filename in files:
# Check if the file is an image based on file extension
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')):
file_path = os.path.join(root, filename)
print(f"Processing: {file_path}") # Debugging: see what is being processed
# Get the file's hash
file_hash = get_file_hash(file_path)
if file_hash is None:
continue
# If the hash exists, it's a duplicate
if file_hash in hashes:
print(f"Duplicate found: {file_path}")
try:
os.remove(file_path) # Uncomment this to delete the file
print(f"Deleted: {file_path}")
duplicate_count += 1
except Exception as e:
print(f"Error deleting {file_path}: {e}")
else:
hashes[file_hash] = file_path
print(f"Deleted {duplicate_count} duplicate images.")
# Example usage
if __name__ == "__main__":
directory = '/home/user/Documents/myFolder/' # Replace with your actual folder path
delete_duplicates(directory)
Comments
Sorry but there are no comments to display