Archived
1
0
This repository has been archived on 2020-12-10. You can view files and clone it, but cannot push or open issues or pull requests.
old/duplicates.py

59 lines
1.7 KiB
Python
Raw Normal View History

2018-10-16 16:28:42 +00:00
#!/usr/bin/python3
from os import listdir, remove
from os.path import isfile, join
import hashlib, pymysql
files = [f for f in listdir('files') if isfile(join('files', f))]
thumbs = [t for t in listdir('files/thumbs') if isfile(join('files/thumbs', t))]
newFiles = {}
db = pymysql.connect('localhost',
'kingofdog',
'XrE#513*IOC&tA*B',
'kingofdog')
cur = db.cursor()
def removeThumbs(originalImage):
originalName = originalImage.split('.')[0]
imageThumbs = [a for a in thumbs if a.startswith(originalName)]
for thumb in imageThumbs:
remove('./files/thumbs/' + thumb)
print(thumb)
def pushToDatabase(oldName, newFile):
try:
cur.execute('UPDATE files SET path = "files/' + newFile + '" WHERE name = "' + oldName + '";')
db.commit()
except:
db.rollback()
def removeDuplicates():
for file in files:
with open('./files/' + file, 'rb') as imageFile:
image = imageFile.read()
md5 = hashlib.md5(image).hexdigest()
if md5 not in newFiles:
newFiles[md5] = []
newFiles[md5].append(file)
for uniqueFile, entries in newFiles.items():
if(len(entries) > 1):
for entry in entries[1:]:
remove('./files/' + entry)
removeThumbs(entry)
pushToDatabase(entry.split('.')[0], entries[0])
print('Deleting: ' + entry)
print('-> New Location: "files/' + entries[0] + '"')
print('-> Updating location in databasee')
print('Deleting duplicate files...')
removeDuplicates()
print('Finished deleting all duplicate files and their thumbnails')
db.close()