指定多个文件夹进行文件MD5获取和去重
#找重复的图片MD5
import glob
import hashlib
from collections import Counter
md5List = []
filenames = glob.glob("H:/Python/众包采集/zd/zd/images/附件1/*.jpg")
for filename in filenames:
with open(filename, 'rb') as inputfile:
data = inputfile.read()
md5List.append(hashlib.md5(data).hexdigest())
print(filename, hashlib.md5(data).hexdigest())
filenames = glob.glob("H:/Python/众包采集/zd/zd/images/附件2/*.jpg")
for filename in filenames:
with open(filename, 'rb') as inputfile:
data = inputfile.read()
md5List.append(hashlib.md5(data).hexdigest())
print(filename, hashlib.md5(data).hexdigest())
# md5List.append("7989b444fa8b51ff8f30f69c2caa2aa6")
print(len(md5List))
res = dict(Counter(md5List))
print ({key:value for key,value in res.items()if value > 1}) #展现重复元素和重复次数
2 条评论
内容的丰富性和深度让人仿佛置身于知识的海洋,受益匪浅。
323