commit f9cd1b09da410dcafbc7c701e71baf42399c5c3b Author: Danilo Reyes Date: Wed Mar 16 10:56:48 2022 -0600 First commit! diff --git a/README.md b/README.md new file mode 100644 index 0000000..e5ea21a --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# rmlint reduplicate + +This is a simple script, to use the leftover rmlint.json file to copy deleted duplicated files back to their original location. + +The script is heavely commented, altho the only thing you need to edit is the first variable so it contains the path of your rmlint.json + +I do encourage to give the script a read just to vaguely know what it's doing! + +Feel free to open issues with suggestions or to ask for help diff --git a/rmlint_reduplicate.py b/rmlint_reduplicate.py new file mode 100755 index 0000000..9ac2a57 --- /dev/null +++ b/rmlint_reduplicate.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +import json +import os +from pathlib import Path + +""" +This is a very simple python script, which only function is to "reduplicate" or rather restore the original +status of your files after wrongfully running rmlint. + +Since rmlint deletes all files with a matching checksum BUT the original, it's possible to use the leftover rmlint.json file +to copy the original files back into their original destination. + +There is only one value which you may technically need to edit, which is the path to your rmlint json down below this comment block +but, just in case give the document a read +""" + +rmlint_file = "ENTER HERE THE PATH OF YOUR rmlint.json FILE!!!" + +with open(rmlint_file, encoding="utf-8") as f: + json_data = json.load(f) # Load the data of the rmlint into a variable + + +def quote(line): + """Add quotation marks to the file just to avoid errors""" + return '"' + line + '"' + + +def find_value(key, value): + """ + This function, will return the index number for the keys of all the twin files that have matching checksums + basically, it will be fed the value of a checksum, and go through the json data and find any files with the same + checksum and then return it on an list + """ + list_index = [] # declare empty list + for i, dic in enumerate(json_data): + if dic.get(key) == value: # if checksum == checksum + list_index.append(i) # append the index into the list to return + return list_index + + +for dic in json_data: + # This, is where the magic happens, it will separate the json into individual dictionaires + if dic.get("type") == "duplicate_file" and dic.get("is_original") == True: + # Looks for original files which belong to duplicated groups + checksum = dic.get("checksum") + twins = dic.get("twins") + # This calls the function, and stores the list of indexes + list_index = find_value("checksum", checksum) + original_file = dic.get("path") + missing_files = [] + # Checks that the count of twins matches the amount of indexes found, should match + if dic.get("twins") == len(list_index): + for i in list_index: + # Now, it will list only files that were deleted + if json_data[i].get("is_original") == False: + # And append them to missing files + missing_files.append((json_data[i].get("path"))) + """ + This is unnecesary, but just to make sure that it will ONLY attempt to copy if the original file still exists + in my case, I deleted a couple files, which when trying to copy them threw an error, this was a bit scary till I realize + those files I actually delted myself, this hopefully makes the experience more seamless for the user + """ + if Path(original_file).exists() == False: + print("ERROR!!! the original file is missing, perhaps you deleted it?") + print(original_file) + else: + # RESTORE THE FILES!!!! + print("Original file found:") + print(original_file) + print("Restoring to:") + for file in missing_files: + os.system("echo " + quote(file)) + os.system("cp " + quote(str(original_file)) + " " + quote(str(file))) + print("______________________")