Last active
March 6, 2020 00:42
-
-
Save data-enhanced/a82cff60939b38706376bbe161198ae3 to your computer and use it in GitHub Desktop.
Remove json formatting from TMDB fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import ast for ast.literal_eval | |
import ast | |
# Remove JSON from TMDB fields | |
# for genres, spoken_languages, production_companies, production_countries | |
# Works only with non-null values, so filter out null values before applying | |
# Requires import ast -- or use simply eval vs ast.literal_eval | |
def remove_json(content): | |
# Interpret the content as a Python list of dictionaries | |
content = ast.literal_eval(content) | |
# Iterate through and build a list with the name from each dictionary | |
names_list = [] | |
for dictionary in content: | |
names_list.append(dictionary['name']) | |
names_str = ", ".join(names_list) | |
# Return the string of names to replace the original content | |
return names_str | |
# Sample usage | |
# Apply the function to only non-null values for spoken_languages field | |
tmdb['spoken_languages'][tmdb['spoken_languages'].notna()].apply(remove_json) | |
# Custom function for the belongs_to_collection field | |
# This field only has one dict, not a list of dicts | |
# Works only with non-null values, so filter out null values before applying | |
# Requires import ast -- or use simply eval vs ast.literal_eval | |
def remove_json_from_collections(content): | |
# Interpret the content as a Python list of dictionaries | |
content = ast.literal_eval(content) | |
# Return the collection name to replace the old content | |
return content['name'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment