Created
June 17, 2018 15:09
-
-
Save seven0525/3e828961294b9cd8013bbd61a1d1ddc0 to your computer and use it in GitHub Desktop.
RADWIMPSっぽい曲LSTMで自動生成してみた(1)(歌詞と、コード進行のみ) ref: https://qiita.com/ahpjop/items/a1d2d159c614258828ab
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.layers import Dense, Activation, LSTM | |
from keras.optimizers import RMSprop | |
from keras.utils.data_utils import get_file | |
import numpy as np | |
import random | |
import sys | |
chars = sorted(list(set(text))) | |
print('Total chars:', len(chars)) | |
char_indices = dict((c,i) for i,c in enumerate(chars)) | |
indices_char = dict((i,c) for i,c in enumerate(chars)) | |
maxlen = 1 | |
step = 3 | |
sentences = [] | |
next_chars = [] | |
for i in range(0, len(text) - maxlen, step): | |
sentences.append(text[i: i + maxlen]) | |
next_chars.append(text[i + maxlen]) | |
# テキストのベクトル化 | |
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) | |
y = np.zeros((len(sentences), len(chars)), dtype=np.bool) | |
for i, sentence in enumerate(sentences): | |
for t, char in enumerate(sentence): | |
X[i, t, char_indices[char]] = 1 | |
y[i, char_indices[next_chars[i]]] = 1 | |
# モデルを定義する | |
model = Sequential() | |
model.add(LSTM(128, input_shape=(maxlen, len(chars)))) | |
model.add(Dense(len(chars))) | |
model.add(Activation('softmax')) | |
optimizer = RMSprop(lr=0.01) | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
def sample(preds, temperature=1.0): | |
preds = np.asarray(preds).astype('float64') | |
preds = np.log(preds) / temperature | |
exp_preds = np.exp(preds) | |
preds = exp_preds / np.sum(exp_preds) | |
probas = np.random.multinomial(1, preds, 1) | |
return np.argmax(probas) | |
for iteration in range(1,10): | |
print() | |
print('-' *50) | |
print('繰り返し回数: ', iteration) | |
model.fit(X, y, batch_size=128, epochs=1) | |
start_index = random.randint(0, len(text)-maxlen-1) | |
for diversity in [0.2, 0.5, 1.0, 1.2]: | |
print() | |
print('-----diveristy', diversity) | |
generated = '' | |
sentence = text[start_index: start_index + maxlen] | |
generated += sentence | |
print('----- Seedを生成しました: "' + sentence + '"') | |
sys.stdout.write(generated) | |
for i in range(40): | |
x = np.zeros((1,maxlen,len(chars))) | |
for t, char in enumerate(sentence): | |
x[0, t, char_indices[char]] = 1. | |
preds = model.predict(x, verbose=0)[0] | |
next_index = sample(preds, diversity) | |
next_char = indices_char[next_index] | |
generated += next_char | |
sentence = sentence[1:] + next_char | |
sys.stdout.write(next_char) | |
sys.stdout.flush() | |
print() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C → D → A → C → C → D → A → D → D → A → C → A → D | |
C → G → G → Bm → C → A → C → A → D → D → A → D | |
C → Bm7 → Bm → E → Bm → E → A → D → A → Bm7 | |
C → G → A# → G → G → A → C# → C → F# → Em → E | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
F → G → A → D → Bm → G → G# → A → D → C → B → A | |
F → G → A → G → Em → G → G → A → Bm → E → A → G | |
F → C → G → Em → C → A → G → A → G → C → D → G | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
G → C → D → C → D → C → Em → C → G → F → C → Bm | |
G → A → G → D → E → Em → G → Bm7 → Em → G | |
G → C → A → G → D → D → G → G → A → C → A → E |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.models import Sequential | |
from keras.layers import Dense, Activation, LSTM | |
from keras.optimizers import RMSprop | |
from keras.utils.data_utils import get_file | |
import numpy as np | |
import random | |
import sys | |
path = './rad_lyrics.txt' | |
text = open(path, "r").read() | |
chars = sorted(list(set(text))) | |
char_indices = dict((c,i) for i,c in enumerate(chars)) | |
indices_char = dict((i,c) for i,c in enumerate(chars)) | |
maxlen = 40 | |
step = 3 | |
sentences = [] | |
next_chars = [] | |
for i in range(0, len(text) - maxlen, step): | |
sentences.append(text[i: i + maxlen]) | |
next_chars.append(text[i + maxlen]) | |
# テキストのベクトル化 | |
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) | |
y = np.zeros((len(sentences), len(chars)), dtype=np.bool) | |
for i, sentence in enumerate(sentences): | |
for t, char in enumerate(sentence): | |
X[i, t, char_indices[char]] = 1 | |
y[i, char_indices[next_chars[i]]] = 1 | |
# モデルを定義する | |
model = Sequential() | |
model.add(LSTM(128, input_shape=(maxlen, len(chars)))) | |
model.add(Dense(len(chars))) | |
model.add(Activation('softmax')) | |
optimizer = RMSprop(lr=0.01) | |
model.compile(loss='categorical_crossentropy', optimizer=optimizer) | |
def sample(preds, temperature=1.0): | |
preds = np.asarray(preds).astype('float64') | |
preds = np.log(preds) / temperature | |
exp_preds = np.exp(preds) | |
preds = exp_preds / np.sum(exp_preds) | |
probas = np.random.multinomial(1, preds, 1) | |
return np.argmax(probas) | |
for iteration in range(1,120): | |
print() | |
print('-' *50) | |
print('繰り返し回数: ', iteration) | |
model.fit(X, y, batch_size=128, epochs=1) | |
start_index = random.randint(0, len(text)-maxlen-1) | |
for diversity in [0.2, 0.5, 1.0, 1.2]: | |
print() | |
print('-----diveristy', diversity) | |
generated = '' | |
sentence = text[start_index: start_index + maxlen] | |
generated += sentence | |
print('----- Seedを生成しました: "' + sentence + '"') | |
sys.stdout.write(generated) | |
for i in range(100): | |
x = np.zeros((1,maxlen,len(chars))) | |
for t, char in enumerate(sentence): | |
x[0, t, char_indices[char]] = 1. | |
preds = model.predict(x, verbose=0)[0] | |
next_index = sample(preds, diversity) | |
next_char = indices_char[next_index] | |
generated += next_char | |
sentence = sentence[1:] + next_char | |
sys.stdout.write(next_char) | |
sys.stdout.flush() | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from janome.tokenizer import Tokenizer | |
import json | |
# テキストファイルを読み込む | |
sjis = open('rad_lyrics.txt', 'rb').read() | |
text = sjis.decode('utf_8') | |
# テキストを形態素解析読み込みます | |
t = Tokenizer() | |
words = t.tokenize(text) | |
# 辞書を生成 | |
def make_dic(words): | |
tmp = ["@"] | |
dic = {} | |
for i in words: | |
word = i.surface | |
if word == "" or word == "\r\n" or word == "\n": continue | |
tmp.append(word) | |
if len(tmp) < 3: continue | |
if len(tmp) > 3: tmp = tmp[1:] | |
set_word3(dic, tmp) | |
if word == "。": | |
tmp = ["@"] | |
continue | |
return dic | |
# 三要素のリストを辞書として登録 | |
def set_word3(dic, s3): | |
w1, w2, w3 = s3 | |
if not w1 in dic: dic[w1] = {} | |
if not w2 in dic[w1]: dic[w1][w2] = {} | |
if not w3 in dic[w1][w2]: dic[w1][w2][w3] = 0 | |
dic[w1][w2][w3] += 1 | |
dic = make_dic(words) | |
json.dump(dic, open("markov-blog.json", "w", encoding="utf-8")) | |
##自動生成 | |
import json | |
dic = open("markov-blog.json" , "r") | |
dic = json.load(dic) | |
tweets_list = [] | |
import random | |
def word_choice(sel): | |
keys = sel.keys() | |
ran = random.choice(list(keys)) | |
return ran | |
def make_sentence(dic): | |
ret = [] | |
if not "@" in dic: return "no dic" | |
top = dic["@"] | |
w1 = word_choice(top) | |
w2 = word_choice(top[w1]) | |
ret.append(w1) | |
ret.append(w2) | |
while True: | |
w3 = word_choice(dic[w1][w2]) | |
ret.append(w3) | |
if w3 == "。": break | |
w1, w2 = w2, w3 | |
tweets_list.append(ret) | |
return "".join(ret) | |
for i in range(1): | |
s = make_sentence(dic) | |
tweets_list.append(s) | |
print(s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment