Skip to content

Instantly share code, notes, and snippets.

@ybenjo
Created December 30, 2015 08:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ybenjo/305c84fc0e817fab4f0e to your computer and use it in GitHub Desktop.
Save ybenjo/305c84fc0e817fab4f0e to your computer and use it in GitHub Desktop.
The Web as a Jungle: Non-Linear Dynamical Systems for Co-evolving Online Activities (WWW 2015)
import numpy as np
import sys
from collections import defaultdict
from lmfit import minimize, Parameters
from sklearn.decomposition import FastICA
import math
# This script is (incomplete and buggy) implementation of
# The Web as a Jungle: Non-Linear Dynamical Systems for Co-evolving Online Activities, (WWW 2015).
# http://www.cs.kumamoto-u.ac.jp/~yasuko/PUBLICATIONS/www15-ecoweb.pdf
# See original codes
# http://www.cs.kumamoto-u.ac.jp/~yasuko/SRC/ecoweb.zip
# Usage
# python3 ecoweb.py path_to_train_file time_of_train size_of_trends
# File format
# species_name \t t:count \t t:count ...
class Ecoweb:
def __init__(self, trend_size):
# fixed seasonal bin
self.n_p = 52
self.trend_size = trend_size
self.x_train = defaultdict(lambda: defaultdict(int))
self.x_test = defaultdict(lambda: defaultdict(int))
def read_file(self, input_file, t_train):
self.t_train = t_train
self.species_name_id = defaultdict(lambda: len(self.species_name_id))
self.species_id_name = defaultdict()
for l in open(input_file, 'r'):
# species \t time:count \t ...
ary = l.rstrip("\n").split("\t")
species_name = ary.pop(0)
species_id = self.species_name_id[species_name]
self.species_id_name[species_id] = species_name
for pair in ary:
t, count = tuple([int(x) for x in pair.split(':')])
if t < t_train:
self.x_train[species_id][t] = count
else:
self.x_test[species_id][t] = count
# list of target speciess
self.d = self.x_train.keys()
self.t_train
def initialize(self):
# initialize variables
# parameters!
self.K = defaultdict(lambda: defaultdict(lambda: float))
# initial popularity
self.p = defaultdict(lambda: float)
self.r = defaultdict(lambda: float)
self.A = defaultdict(lambda: defaultdict(lambda: float))
# popularity
self.P = defaultdict(lambda: defaultdict(lambda: float))
# estimated count
self.C = defaultdict(lambda: defaultdict(lambda: float))
# seasonal effect
self.e = defaultdict(lambda: defaultdict(lambda: 0.0))
self.W = defaultdict(lambda: defaultdict(lambda: 0.0))
self.B = np.zeros((self.trend_size, self.n_p))
# used in ICA
self.h = math.ceil(self.t_train / self.n_p)
for i in self.d:
# fix values
self.r[i] = 0.1
self.p[i] = 2000.0
self.K[i] = 10000
for j in self.d:
val = 1.0 if i == j else 0.0
self.A[i][j] = val
self.update_P_and_C()
def calc_P_and_C(self, K, p, r, A, W, use_seasonal = True):
P = defaultdict(lambda: defaultdict(lambda: float))
C = defaultdict(lambda: defaultdict(lambda: float))
for i in self.d:
P[i][-1] = p[i]
for t in range(0, self.t_train):
for i in self.d:
tau = t % self.n_p
sum_a_p = sum([A[i][j] * P[j][t - 1] for j in self.d])
P[i][t] = P[i][t - 1] * (1 + r[i] * (1 - sum_a_p / K[i]))
# reconstruct e
e = 0.0
if use_seasonal:
for k in range(0, self.trend_size - 1):
e += W[i][k] * self.B[k, tau]
C[i][t] = P[i][t] * (1 + e)
return (P, C)
def update_P_and_C(self):
self.P, self.C = self.calc_P_and_C(self.K, self.p, self.r, self.A, self.W, True)
def objective(self):
err = 0.0
for t in range(0, self.t_train):
for i in self.d:
err += pow(self.x_train[i][t] - self.C[i][t], 2)
return math.sqrt(err / (self.t_train * len(self.d)))
def objective_for_lmfit(self, params):
err = 0.0
# unpack
K = defaultdict(lambda: defaultdict(lambda: float))
p = defaultdict(lambda: float)
r = defaultdict(lambda: float)
A = defaultdict(lambda: defaultdict(lambda: float))
for i in self.d:
K[i] = params["K_{0}".format(i)]
p[i] = params["p_{0}".format(i)]
r[i] = params["r_{0}".format(i)]
# fix
A[i][i] = 1
for j in self.d:
if i == j : continue
A[i][j] = params["A_{0}_{1}".format(i, j)]
P, C = self.calc_P_and_C(K, p, r, A, self.W, True)
errs = [ ]
for i in self.d:
for t in range(0, self.t_train):
e = self.x_train[i][t] - C[i][t]
errs.append(pow(e, 2))
return errs
def levenberg_marquardt(self):
# expand each parameters
params = Parameters()
for i in self.d:
params.add("K_{0}".format(i), value = self.K[i], min = 0.0)
params.add("p_{0}".format(i), value = self.p[i])
params.add("r_{0}".format(i), value = self.r[i], min = 0.0, max = 1.0)
# A
for j in self.d:
# skip i == j
if i == j : continue
params.add("A_{0}_{1}".format(i, j), value = self.A[i][j], min = 0.0, max = 1.0)
ret = minimize(self.objective_for_lmfit, params, maxfev = 500)
# ret = minimize(self.objective_for_lmfit, params)
optimized_params = ret.params
# update parameters
for i in self.d:
self.K[i] = optimized_params["K_{0}".format(i)].value
self.p[i] = optimized_params["p_{0}".format(i)].value
self.r[i] = optimized_params["r_{0}".format(i)].value
for j in self.d:
if i == j : continue
self.A[i][j] = optimized_params["A_{0}_{1}".format(i, j)].value
# update P and C
self.update_P_and_C()
def objective_for_seasonal_component(self, params):
# unpack W
W = defaultdict(lambda: defaultdict(float))
for i in self.d:
for k in range(0, self.trend_size - 1):
W[i][k] = params["W_{0}_{1}".format(i, k)].value
# calc P and C
P, C = self.calc_P_and_C(self.K, self.p, self.r, self.A, W, True)
errs = [ ]
for t in range(0, self.t_train):
for i in self.d:
err = pow(self.x_train[i][t] - C[i][t], 2)
errs.append(err)
return errs
def decompose(self):
E = np.zeros((len(self.d) * self.h, self.n_p))
# then, make E
# in this step, we don't use seasonal component to calc new P.
P, C = self.calc_P_and_C(self.K, self.p, self.r, self.A, self.W, False)
for i in self.d:
for t in range(0, self.t_train):
v = (self.x_train[i][t] - P[i][t]) / (P[i][t] + 1)
quotient = t // self.n_p
tau = t % self.n_p
pos = (i * self.h) + quotient
E[pos, tau] = v
# extract independent components.
ica = FastICA(n_components = self.trend_size)
ica.fit(E)
self.B = np.transpose(ica.mixing_)
def levenberg_marquardt_seasonal_component(self):
self.decompose()
params = Parameters()
for i in self.d:
for k in range(0, self.trend_size - 1):
params.add("W_{0}_{1}".format(i, k), self.W[i][k])
ret = minimize(self.objective_for_seasonal_component, params)
optimized_params = ret.params
# update W
for i in self.d:
for k in range(0, self.trend_size - 1):
self.W[i][k] = optimized_params["W_{0}_{1}".format(i, k)].value
# update e[i][t]
for i in self.d:
for t in range(0, self.t_train):
tau = t % self.n_p
# reset e[i][t]
self.e[i][t] = 0.0
for k in range(0, self.trend_size - 1):
self.e[i][t] += self.W[i][k] * self.B[k, tau]
# update P and C
self.update_P_and_C()
def train(self, i = 1):
print("{0}'s train".format(i))
print("before: {0}".format(model.objective()))
self.levenberg_marquardt()
print("after: {0}".format(model.objective()))
self.levenberg_marquardt_seasonal_component()
print("after: {0}".format(model.objective()))
if __name__ == '__main__':
input_file = sys.argv[1]
t_train = int(sys.argv[2])
trend_size = int(sys.argv[3])
model = Ecoweb(trend_size)
model.read_file(input_file, t_train)
model.initialize()
for i in range(0, 100):
model.train(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment