Skip to content

Instantly share code, notes, and snippets.

@akelleh
Created August 15, 2016 04:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save akelleh/4081035c907ba923fc689f91927f101d to your computer and use it in GitHub Desktop.
Save akelleh/4081035c907ba923fc689f91927f101d to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.cross_validation import train_test_split
from sklearn.metrics import r2_score
import networkx as nx
import pandas as pd
from scipy.optimize import minimize
import time
class RandomData(object):
def __init__(self, variables=3, p=0.3, n=50):
# DAG iff there exists a lower triangular form; go ahead an make edges the lin reg coeffs
np.random.seed(int(100*time.time() % 4294967295))
adj_matrix = np.array([[np.random.normal() if np.random.binomial(1,p) and i > j else 0. for j in range(variables) ] for i in range(variables)])
self.g = nx.DiGraph(adj_matrix)
self.generate_data(n=n)
self.X = pd.DataFrame({attributes['name']:attributes['data'] for node, attributes in self.g.nodes(data=True)})
def generate_data(self, n=1000):
# generate data by traversing the graph from the roots; save as lists on nodes
roots = [node for node, indegree in self.g.in_degree().items() if indegree == 0]
for node in self.g.nodes():
self.g.node[node]['data'] = []
self.g.node[node]['name'] = 'X_{}'.format(node)
traversal = [node for node in self.bfs_traversal(roots)]
for _ in xrange(n):
for node in roots:
self.g.node[node]['data'].append(np.random.normal())
for node in traversal:
value = np.random.normal()
for predecessor in self.g.predecessors(node):
value += self.g.node[predecessor]['data'][-1] * self.g.adj[predecessor][node]['weight']
self.g.node[node]['data'].append(value)
def bfs_traversal(self, roots):
this_generation = [k for k, v in self.g.in_degree().items() if v ==0]
next_generation = []
traversal = []
while this_generation:
for node in this_generation:
for successor in self.g.successors(node):
next_generation.append(successor)
traversal.extend(list(set(next_generation)))
this_generation = next_generation
next_generation = []
seen = set()
traversal.reverse()
resolved_traversal = []
for node in traversal:
if node not in seen:
resolved_traversal.append(node)
seen.add(node)
resolved_traversal.reverse()
return resolved_traversal
df = RandomData().X
print df.head()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment