Skip to content

Instantly share code, notes, and snippets.

@cogas
Created July 31, 2018 12:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cogas/6e263b88d9ded42b6e72944ad4eb05b6 to your computer and use it in GitHub Desktop.
Save cogas/6e263b88d9ded42b6e72944ad4eb05b6 to your computer and use it in GitHub Desktop.
単語長分布ソースコード
# encoding: 'utf-8'
import random
import numpy as np
from datetime import datetime
from math import factorial, exp
SYL_MAX = 50
SAMPLING = 20
M = 0
N = 6350
def volume(i):
if i <= 2:
return 1+i
return 6*i
def buffer_effect(i, word):
v = volume(i)
if v == 1:
return 1
result = 1
for k in range(1, v):
result *= (space(i) - word*v - k) / (space(i) - word*(v-1) - k)
return result
def quality_filter(i, word, total):
S = 0.5
return (1 - S)
def space(i):
# return M**i
# return factorial(M) / (factorial(M-i) * factorial(i))
# return int(M / i) ** i
# return int(250 * (4*27)**(i-1) / i**i)
return 250 * 60 ** (i-1)
# return 92 * 36 ** (i-1)
# return 870**i
def run(total):
words = [0] * SYL_MAX
for i in range(N):
for j in range(SYL_MAX):
r = random.randint(1, SPACE[j])
word = words[j]
boundary = (SPACE[j] - volume(j+1) * word) * buffer_effect(j+1, word) * quality_filter(j+1, word, total)
if r < boundary:
words[j] += 1
break
else:
continue
return words
data = list()
SPACE = [space(i+1) for i in range(SYL_MAX)]
if __name__ == '__main__':
# print(SPACE)
for i in range(SAMPLING):
print("SAMPLING: {}".format(i+1))
data.append(run(i+1))
data = np.array(data)
average = np.average(data, axis=0)
tdatetime = datetime.now()
tstr = tdatetime.strftime('%Y%m%d%H%M%S')
np.savetxt("output/{}_rawdata.csv".format(tstr), data.T, delimiter=',')
np.savetxt("output/{}_average.csv".format(tstr), average.T, delimiter=',')
print("DONE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment