daddiofaddio/json_rename.py

## json_rename.py
import os
import re
import json

# Directory containing the JSON files
json_dir = '/path/to/dir'
urls_file_path = '/path/to/urls.txt'


with open(urls_file_path, 'r') as file:
    urls = [url.strip() for url in file.readlines() if url.strip()]

# Read URLs from urls.txt
def is_roman_numeral(s):
    """Checks if a string is a Roman numeral."""
    return bool(re.match(r'^(I{1,3}|II|III|IV|V|VI|VII|VIII|IX|X)$', s))

def sanitize_for_filename(url):
    """Converts URL to a filename by removing 'https://' and replacing '/' with '_'."""
    return url.replace('https://www.example.com/', 'www.example_com').replace('/', '_') + '.json'

def find_matching_url(name):
    """Finds a URL matching the given name."""
    # Prepare the name for pattern matching
    pattern = re.compile(r'\b' + re.escape(name) + r'\b', re.IGNORECASE)
    for url in urls:
        if pattern.search(url):
            return url
    return None

def adjust_name(name):
    """Adjusts the name for matching, taking care of initials, suffixes, and 'Ann' or 'Anne'."""
    name_parts = name.split()
    # Initial handling
    if len(name_parts[0]) == 2 and name_parts[0].endswith('.'):
        name_parts[0] = name_parts[0][0]  # Remove period

    # Handle suffixes like 'Jr.', 'Sr.', or Roman numerals
    if name_parts[-1].lower() in ['jr', 'jr.', 'sr', 'sr.'] or is_roman_numeral(name_parts[-1]):
        name_parts = name_parts[:-1]  # Remove the suffix

    # Handle 'Ann' or 'Anne' cases
    if name_parts[0].lower() in ['ann', 'anne']:
        name_parts[0] = 'a'  # Use 'a' as the shorthand for 'Ann' or 'Anne'

    # Rejoin the name parts for the final name adjustment
    adjusted_name = ' '.join(name_parts)
    return adjusted_name

# Main renaming logic
for filename in os.listdir(json_dir):
    if filename.endswith('.json'):
        file_path = os.path.join(json_dir, filename)
        with open(file_path, 'r') as f:
            data = json.load(f)
        name = data.get('name', '')
        adjusted_name = adjust_name(name)
        matching_url = find_matching_url(adjusted_name)
        if matching_url:
            new_filename = sanitize_for_filename(matching_url)
            new_file_path = os.path.join(json_dir, new_filename)
            os.rename(file_path, new_file_path)
            print(f'Renamed "{filename}" to "{new_filename}"')
        else:
            print(f'No matching URL found for "{name}"')


def rename_files_to_final_format(directory):
    """
    Renames files from formats like '98104-wa-robert-mitchell-10136.html.json' and
    'ernest-matthews-8046.html.json' to 'www.example.com_url_string_98104-wa-robert-mitchell-10136.html.json'
    and 'www.example.com_url_string_ernest-matthews-8046.html.json'.
    """
    # Updated pattern to match both formats
    pattern = re.compile(r'^(?:\d{5}-[a-z]{2}-)?[a-z-]+-\d+\.html\.json$', re.I)

    for filename in os.listdir(directory):
        if pattern.match(filename):
            # Construct new filename format
            new_filename = f'www.example.com_url_string_{filename}'
            os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))
            print(f'Renamed "{filename}" to "{new_filename}"')

# Call the function at the end of your script
rename_files_to_final_format(json_dir)
	import os
	import re
	import json

	# Directory containing the JSON files
	json_dir = '/path/to/dir'
	urls_file_path = '/path/to/urls.txt'


	with open(urls_file_path, 'r') as file:
	urls = [url.strip() for url in file.readlines() if url.strip()]

	# Read URLs from urls.txt
	def is_roman_numeral(s):
	"""Checks if a string is a Roman numeral."""
	return bool(re.match(r'^(I{1,3}\|II\|III\|IV\|V\|VI\|VII\|VIII\|IX\|X)$', s))

	def sanitize_for_filename(url):
	"""Converts URL to a filename by removing 'https://' and replacing '/' with '_'."""
	return url.replace('https://www.example.com/', 'www.example_com').replace('/', '_') + '.json'

	def find_matching_url(name):
	"""Finds a URL matching the given name."""
	# Prepare the name for pattern matching
	pattern = re.compile(r'\b' + re.escape(name) + r'\b', re.IGNORECASE)
	for url in urls:
	if pattern.search(url):
	return url
	return None

	def adjust_name(name):
	"""Adjusts the name for matching, taking care of initials, suffixes, and 'Ann' or 'Anne'."""
	name_parts = name.split()
	# Initial handling
	if len(name_parts[0]) == 2 and name_parts[0].endswith('.'):
	name_parts[0] = name_parts[0][0] # Remove period

	# Handle suffixes like 'Jr.', 'Sr.', or Roman numerals
	if name_parts[-1].lower() in ['jr', 'jr.', 'sr', 'sr.'] or is_roman_numeral(name_parts[-1]):
	name_parts = name_parts[:-1] # Remove the suffix

	# Handle 'Ann' or 'Anne' cases
	if name_parts[0].lower() in ['ann', 'anne']:
	name_parts[0] = 'a' # Use 'a' as the shorthand for 'Ann' or 'Anne'

	# Rejoin the name parts for the final name adjustment
	adjusted_name = ' '.join(name_parts)
	return adjusted_name

	# Main renaming logic
	for filename in os.listdir(json_dir):
	if filename.endswith('.json'):
	file_path = os.path.join(json_dir, filename)
	with open(file_path, 'r') as f:
	data = json.load(f)
	name = data.get('name', '')
	adjusted_name = adjust_name(name)
	matching_url = find_matching_url(adjusted_name)
	if matching_url:
	new_filename = sanitize_for_filename(matching_url)
	new_file_path = os.path.join(json_dir, new_filename)
	os.rename(file_path, new_file_path)
	print(f'Renamed "{filename}" to "{new_filename}"')
	else:
	print(f'No matching URL found for "{name}"')


	def rename_files_to_final_format(directory):
	"""
	Renames files from formats like '98104-wa-robert-mitchell-10136.html.json' and
	'ernest-matthews-8046.html.json' to 'www.example.com_url_string_98104-wa-robert-mitchell-10136.html.json'
	and 'www.example.com_url_string_ernest-matthews-8046.html.json'.
	"""
	# Updated pattern to match both formats
	pattern = re.compile(r'^(?:\d{5}-[a-z]{2}-)?[a-z-]+-\d+\.html\.json$', re.I)

	for filename in os.listdir(directory):
	if pattern.match(filename):
	# Construct new filename format
	new_filename = f'www.example.com_url_string_{filename}'
	os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))
	print(f'Renamed "{filename}" to "{new_filename}"')

	# Call the function at the end of your script
	rename_files_to_final_format(json_dir)