DerfJagged/Delete_Duplicate_Files.py

## Delete_Duplicate_Files.py
#!/usr/bin/python3

import requests
import json
import mwclient

################### CONFIGURE ME ###################
site = mwclient.Site('YOUR_WEBSITE.com', path='/wiki/')
site.login(username='USERNAME_HERE', password='PASSWORD_HERE')
URL = "https://YOUR_WEBSITE.com/wiki/api.php"
####################################################

S = requests.Session()
PARAMS = {
    "action": "query",
    "list": "querypage",
    "qppage": "ListDuplicatedFiles",
    "qplimit": "100", #500 max
    "format": "json"
}

R = S.get(url=URL, params=PARAMS)
pages_with_duplicates_json = R.json()
title_list = ""

for result in pages_with_duplicates_json['query']['querypage']['results']:
    title = result['title']

    ### Remove the following two lines to proceed without asking
    print(f"Replace {title}? [Y/n]")
    if (input("") == ''):
        title_list += result['title'] + "|"

title_list = title_list[:-1]

if (title_list == ""):
    print("No pages with duplicates found.")
    quit()

print("\n" + title_list + "\n")

PARAMS = {
    "action": "query",
    "titles": title_list,
    "prop": "duplicatefiles",
    "format": "json"
}

R = S.get(url=URL, params=PARAMS)
data = R.json()

# Uncomment to see pages that will be replaced beforehand
#print(data)
#input("Press Enter to continue...")

# Loop through the JSON data and process duplicate files
for page_id, page_data in data['query']['pages'].items():
    original_file = page_data['title']
    try:
        duplicate_files = page_data['duplicatefiles']
    except:
        print("No more duplicate files found")
        quit()

    for duplicate_file in duplicate_files:
        duplicate_name = "File:" + duplicate_file['name']

        # Get information about the duplicate file
        duplicate_page = site.pages[duplicate_name]

        # Check if the duplicate file exists
        if duplicate_page.exists:
            # Delete the duplicate file
            duplicate_page.delete(reason='Duplicate file - redirecting to original')

            # Edit the deleted file's page content to redirect to the original file
            redirect_text = f"#REDIRECT [[{original_file}]]"
            duplicate_page.save(redirect_text, summary='Redirecting duplicate file to original')

            print(f"Redirected {duplicate_name} to {original_file} and deleted the duplicate.")
        else:
            print(f"{duplicate_name} doesn't exist.")
	#!/usr/bin/python3

	import requests
	import json
	import mwclient

	################### CONFIGURE ME ###################
	site = mwclient.Site('YOUR_WEBSITE.com', path='/wiki/')
	site.login(username='USERNAME_HERE', password='PASSWORD_HERE')
	URL = "https://YOUR_WEBSITE.com/wiki/api.php"
	####################################################

	S = requests.Session()
	PARAMS = {
	"action": "query",
	"list": "querypage",
	"qppage": "ListDuplicatedFiles",
	"qplimit": "100", #500 max
	"format": "json"
	}

	R = S.get(url=URL, params=PARAMS)
	pages_with_duplicates_json = R.json()
	title_list = ""

	for result in pages_with_duplicates_json['query']['querypage']['results']:
	title = result['title']

	### Remove the following two lines to proceed without asking
	print(f"Replace {title}? [Y/n]")
	if (input("") == ''):
	title_list += result['title'] + "\|"

	title_list = title_list[:-1]

	if (title_list == ""):
	print("No pages with duplicates found.")
	quit()

	print("\n" + title_list + "\n")

	PARAMS = {
	"action": "query",
	"titles": title_list,
	"prop": "duplicatefiles",
	"format": "json"
	}

	R = S.get(url=URL, params=PARAMS)
	data = R.json()

	# Uncomment to see pages that will be replaced beforehand
	#print(data)
	#input("Press Enter to continue...")

	# Loop through the JSON data and process duplicate files
	for page_id, page_data in data['query']['pages'].items():
	original_file = page_data['title']
	try:
	duplicate_files = page_data['duplicatefiles']
	except:
	print("No more duplicate files found")
	quit()

	for duplicate_file in duplicate_files:
	duplicate_name = "File:" + duplicate_file['name']

	# Get information about the duplicate file
	duplicate_page = site.pages[duplicate_name]

	# Check if the duplicate file exists
	if duplicate_page.exists:
	# Delete the duplicate file
	duplicate_page.delete(reason='Duplicate file - redirecting to original')

	# Edit the deleted file's page content to redirect to the original file
	redirect_text = f"#REDIRECT [[{original_file}]]"
	duplicate_page.save(redirect_text, summary='Redirecting duplicate file to original')

	print(f"Redirected {duplicate_name} to {original_file} and deleted the duplicate.")
	else:
	print(f"{duplicate_name} doesn't exist.")