rmlint-reduplicate/rmlint_reduplicate.py

#!/usr/bin/env python3
import json
import os
from pathlib import Path

"""
This is a very simple python script, which only function is to "reduplicate" or rather restore the original
status of your files after wrongfully running rmlint.

Since rmlint deletes all files with a matching checksum BUT the original, it's possible to use the leftover rmlint.json file
to copy the original files back into their original destination.

There is only one value which you may technically need to edit, which is the path to your rmlint json down below this comment block
but, just in case give the document a read
"""

rmlint_file = "ENTER HERE THE PATH OF YOUR rmlint.json FILE!!!"

with open(rmlint_file, encoding="utf-8") as f:
    json_data = json.load(f)  # Load the data of the rmlint into a variable


def quote(line):
    """Add quotation marks to the file just to avoid errors"""
    return '"' + line + '"'


def find_value(key, value):
    """
    This function, will return the index number for the keys of all the twin files that have matching checksums
    basically, it will be fed the value of a checksum, and go through the json data and find any files with the same
    checksum and then return it on an list
    """
    list_index = []  # declare empty list
    for i, dic in enumerate(json_data):
        if dic.get(key) == value:  # if checksum == checksum
            list_index.append(i)  # append the index into the list to return
    return list_index


for dic in json_data:
    # This, is where the magic happens, it will separate the json into individual dictionaires
    if dic.get("type") == "duplicate_file" and dic.get("is_original") == True:
        # Looks for original files which belong to duplicated groups
        checksum = dic.get("checksum")
        twins = dic.get("twins")
        # This calls the function, and stores the list of indexes
        list_index = find_value("checksum", checksum)
        original_file = dic.get("path")
        missing_files = []
        # Checks that the count of twins matches the amount of indexes found, should match
        if dic.get("twins") == len(list_index):
            for i in list_index:
                # Now, it will list only files that were deleted
                if json_data[i].get("is_original") == False:
                    # And append them to missing files
                    missing_files.append((json_data[i].get("path")))
        """
        This is unnecesary, but just to make sure that it will ONLY attempt to copy if the original file still exists
        in my case, I deleted a couple files, which when trying to copy them threw an error, this was a bit scary till I realize
        those files I actually delted myself, this hopefully makes the experience more seamless for the user
        """
        if Path(original_file).exists() == False:
            print("ERROR!!! the original file is missing, perhaps you deleted it?")
            print(original_file)
        else:
            # RESTORE THE FILES!!!!
            print("Original file found:")
            print(original_file)
            print("Restoring to:")
            for file in missing_files:
                os.system("echo " + quote(file))
                os.system("cp " + quote(str(original_file)) + " " + quote(str(file)))
        print("______________________")