指定多个文件夹进行文件MD5获取和去重

#找重复的图片MD5
import glob
import hashlib
from collections import Counter 

md5List = []
filenames = glob.glob("H:/Python/众包采集/zd/zd/images/附件1/*.jpg")
for filename in filenames:
    with open(filename, 'rb') as inputfile:
        data = inputfile.read()
        md5List.append(hashlib.md5(data).hexdigest())
        print(filename, hashlib.md5(data).hexdigest())


filenames = glob.glob("H:/Python/众包采集/zd/zd/images/附件2/*.jpg")
for filename in filenames:
    with open(filename, 'rb') as inputfile:
        data = inputfile.read()
        md5List.append(hashlib.md5(data).hexdigest())
        print(filename, hashlib.md5(data).hexdigest())


# md5List.append("7989b444fa8b51ff8f30f69c2caa2aa6")
print(len(md5List))
res = dict(Counter(md5List))
print ({key:value for key,value in res.items()if value > 1}) #展现重复元素和重复次数
最后修改:2023 年 12 月 05 日
如果觉得我的文章对你有用,请随意赞赏