data-enhanced/Remove_json_from_TMDB_fields.py

## Remove_json_from_TMDB_fields.py
# Import ast for ast.literal_eval
import ast

# Remove JSON from TMDB fields
# for genres, spoken_languages, production_companies, production_countries
# Works only with non-null values, so filter out null values before applying
# Requires import ast -- or use simply eval vs ast.literal_eval
def remove_json(content):
    # Interpret the content as a Python list of dictionaries
    content = ast.literal_eval(content)
    # Iterate through and build a list with the name from each dictionary
    names_list = []
    for dictionary in content:
        names_list.append(dictionary['name'])
    names_str = ", ".join(names_list)
    # Return the string of names to replace the original content
    return names_str

# Sample usage
# Apply the function to only non-null values for spoken_languages field
tmdb['spoken_languages'][tmdb['spoken_languages'].notna()].apply(remove_json)


# Custom function for the belongs_to_collection field
# This field only has one dict, not a list of dicts
# Works only with non-null values, so filter out null values before applying
# Requires import ast -- or use simply eval vs ast.literal_eval
def remove_json_from_collections(content):
    # Interpret the content as a Python list of dictionaries
    content = ast.literal_eval(content)
    # Return the collection name to replace the old content
    return content['name']
	# Import ast for ast.literal_eval
	import ast

	# Remove JSON from TMDB fields
	# for genres, spoken_languages, production_companies, production_countries
	# Works only with non-null values, so filter out null values before applying
	# Requires import ast -- or use simply eval vs ast.literal_eval
	def remove_json(content):
	# Interpret the content as a Python list of dictionaries
	content = ast.literal_eval(content)
	# Iterate through and build a list with the name from each dictionary
	names_list = []
	for dictionary in content:
	names_list.append(dictionary['name'])
	names_str = ", ".join(names_list)
	# Return the string of names to replace the original content
	return names_str

	# Sample usage
	# Apply the function to only non-null values for spoken_languages field
	tmdb['spoken_languages'][tmdb['spoken_languages'].notna()].apply(remove_json)


	# Custom function for the belongs_to_collection field
	# This field only has one dict, not a list of dicts
	# Works only with non-null values, so filter out null values before applying
	# Requires import ast -- or use simply eval vs ast.literal_eval
	def remove_json_from_collections(content):
	# Interpret the content as a Python list of dictionaries
	content = ast.literal_eval(content)
	# Return the collection name to replace the old content
	return content['name']