Skip to content

Instantly share code, notes, and snippets.

@ErikGartner
Created May 23, 2017 17:33
Show Gist options
  • Save ErikGartner/318beb4da7597bb3aace9b5d9350ba89 to your computer and use it in GitHub Desktop.
Save ErikGartner/318beb4da7597bb3aace9b5d9350ba89 to your computer and use it in GitHub Desktop.
Preprocess lyrics for RNN
import csv
import requests
import sys
import json
LASTM_FM_KEY = ''
genre_cache = {}
def get_genres(artist_name):
artist_name = artist_name.strip().lower()
if artist_name not in genre_cache:
try:
print('Downloading genres for: %s' % artist_name)
data = requests.get('http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=%s&format=json&api_key=%s' % (artist_name, LASTM_FM_KEY))
genres = [n['name'] for n in data.json()['artist']['tags']['tag']]
except Exception as e:
print('Error while downloading genres for %s' % artist_name)
genres = []
finally:
genre_cache[artist_name] = genres
return genre_cache[artist_name]
def filter_row(row, target_genre):
# artist,song,link,text
genres = get_genres(row[0])
return target_genre in genres
input_file = sys.argv[1]
output_file = sys.argv[2]
target_genre = sys.argv[3]
genres_file = sys.argv[4]
with open(genres_file) as f:
genre_cache = json.load(f)
with open(input_file) as f:
reader = csv.reader(f, delimiter=',', quotechar='"')
rows = list(reader)
filtered_rows = [r for r in rows if filter_row(r, target_genre)]
with open(output_file, 'w') as f:
for filtered_row in filtered_rows:
f.write(filtered_row[3])
f.write('\n')
with open(genres_file, 'w') as f:
json.dump(genre_cache, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment