@chants: Thank you for your feedbacks
Version 2 with the following changelog:
-> Added a 4th optional argument MAX_FILE_SIZE_IN_MB, the md5 wont be computed is the file size in MB is larger than the MAX_FILE_SIZE_IN_MB specified.
-> If the 4th argument is not specified, all files md5 will be computed.
Code:
import os
import sys
import hashlib
import shutil
DEBUG_MODE=False
def get_filesize_mb(file_path):
size_b = os.path.getsize(file_path)
return size_b / 1048576
def md5sum(fname):
try:
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
except:
pass
return "ERROR"
if len(sys.argv) < 4:
print("Usage: python replace.py FOLDER MD5_SUM_TO_FIND FILE_PATH_TO_REPLACE_BY [MAX_FILE_SIZE_IN_MB]")
sys.exit(-1)
g_path = sys.argv[1]
md5_to_find = sys.argv[2].lower()
to_replace_by = sys.argv[3]
max_file_size = None
if len(sys.argv) >= 5:
max_file_size = int(sys.argv[4])
print("Maximum file size to check is %s MB" % sys.argv[4])
if not os.path.isfile(to_replace_by):
print("Error: the path of the file to replace by does not exists")
sys.exit(-1)
if __name__ == '__main__':
for root, directories, filenames in os.walk(g_path):
for filename in filenames:
full_path = os.path.join(root, filename)
fsize = get_filesize_mb(full_path)
if max_file_size and get_filesize_mb(full_path) > max_file_size:
# The current file size is too big, skipping it
print("Skipping file [%s] with size %sMB" % (full_path, str(fsize)))
continue
# No maximum filesize specified, processing all files
if max_file_size is None:
checksum = md5sum(full_path)
#print(full_path, md5sum(full_path))
if not DEBUG_MODE and checksum == md5_to_find:
print("Replacing [%s] by [%s]" % (full_path, to_replace_by))
shutil.copy(to_replace_by, full_path)
elif checksum == md5_to_find:
print("Should replace [%s] by [%s]" % (full_path, to_replace_by))