First commit!

This commit is contained in:
Danilo Reyes 2022-03-16 10:56:48 -06:00
commit f9cd1b09da
2 changed files with 83 additions and 0 deletions

9
README.md Normal file
View File

@ -0,0 +1,9 @@
# rmlint reduplicate
This is a simple script, to use the leftover rmlint.json file to copy deleted duplicated files back to their original location.
The script is heavely commented, altho the only thing you need to edit is the first variable so it contains the path of your rmlint.json
I do encourage to give the script a read just to vaguely know what it's doing!
Feel free to open issues with suggestions or to ask for help

74
rmlint_reduplicate.py Executable file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env python3
import json
import os
from pathlib import Path
"""
This is a very simple python script, which only function is to "reduplicate" or rather restore the original
status of your files after wrongfully running rmlint.
Since rmlint deletes all files with a matching checksum BUT the original, it's possible to use the leftover rmlint.json file
to copy the original files back into their original destination.
There is only one value which you may technically need to edit, which is the path to your rmlint json down below this comment block
but, just in case give the document a read
"""
rmlint_file = "ENTER HERE THE PATH OF YOUR rmlint.json FILE!!!"
with open(rmlint_file, encoding="utf-8") as f:
json_data = json.load(f) # Load the data of the rmlint into a variable
def quote(line):
"""Add quotation marks to the file just to avoid errors"""
return '"' + line + '"'
def find_value(key, value):
"""
This function, will return the index number for the keys of all the twin files that have matching checksums
basically, it will be fed the value of a checksum, and go through the json data and find any files with the same
checksum and then return it on an list
"""
list_index = [] # declare empty list
for i, dic in enumerate(json_data):
if dic.get(key) == value: # if checksum == checksum
list_index.append(i) # append the index into the list to return
return list_index
for dic in json_data:
# This, is where the magic happens, it will separate the json into individual dictionaires
if dic.get("type") == "duplicate_file" and dic.get("is_original") == True:
# Looks for original files which belong to duplicated groups
checksum = dic.get("checksum")
twins = dic.get("twins")
# This calls the function, and stores the list of indexes
list_index = find_value("checksum", checksum)
original_file = dic.get("path")
missing_files = []
# Checks that the count of twins matches the amount of indexes found, should match
if dic.get("twins") == len(list_index):
for i in list_index:
# Now, it will list only files that were deleted
if json_data[i].get("is_original") == False:
# And append them to missing files
missing_files.append((json_data[i].get("path")))
"""
This is unnecesary, but just to make sure that it will ONLY attempt to copy if the original file still exists
in my case, I deleted a couple files, which when trying to copy them threw an error, this was a bit scary till I realize
those files I actually delted myself, this hopefully makes the experience more seamless for the user
"""
if Path(original_file).exists() == False:
print("ERROR!!! the original file is missing, perhaps you deleted it?")
print(original_file)
else:
# RESTORE THE FILES!!!!
print("Original file found:")
print(original_file)
print("Restoring to:")
for file in missing_files:
os.system("echo " + quote(file))
os.system("cp " + quote(str(original_file)) + " " + quote(str(file)))
print("______________________")