Conrad Lee conradlee

## emr_kill_hanging_job.py

# Copyright 2019 Parsely
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## emr_kill_hanging_job.py
# Copyright 2019 Parsely
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,

## bandit-election-analysis.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                conradlee
                / bandit-election-analysis.ipynb
            
            
              Created
              June 14, 2016 19:24
            
              
                Which candidate would a bayesian bandit write articles on?
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## caltech_example_benchmark
$ ./fb_edges Caltech36 | comm_algs/louvain_wrapper | ./label_feature_matrix Caltech36 | ./infer 10
0.36

## datenum2datetime.py
from datetime import datetime, timedelta

matlab_datenum = 731965.04835648148
python_datetime = datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum%1) - timedelta(days = 366)

## parse_photos.py
import csv
import gzip
from contextlib import closing

def photo_tup_generator(in_filename, bb_left, bb_right, bb_bottom, bb_top):
    """
    Note: rows yielded are in utf8 (have not been decoded to unicode)
    """
    with closing(gzip.open(in_filename, mode="rb", compresslevel=6)) as f:
        reader = csv.reader(f, escapechar='\\', doublequote=True, quotechar='"', quoting=csv.QUOTE_MINIMAL)

## leader_test.py
# First get distributions of lagged dot products in both directions
c1_leading_c2_dps = get_lag_dot_products(c1, c2, lagged_timesteps)
c2_leading_c1_dps = get_lag_dot_products(c2, c1, lagged_timesteps)

# Check whether distributions are the same using paired t-test
t_score, pval = scipy.stats.ttest_rel(c1_leading_c2_dps, c2_leading_c1_dps)

if pval < PVAL_THRESH:
    # Choose direction with larger mean
    if numpy.mean(c1_leading_c2_dps) > numpy.mean(c2_leading_c1_dps):

## test_kclique.py
#!/usr/bin/env python
from nose.tools import *
import networkx as nx
from itertools import combinations

def test_overlaping_K5():
    G = nx.Graph()
    G.add_edges_from(combinations(range(5), 2)) # Add a five clique
    G.add_edges_from(combinations(range(2,7), 2)) # Add another five clique
    c = list(nx.k_clique_communities(G, 4))

## mysql-utf8.txt
mysql> show variables like '%character%';
+--------------------------+----------------------------+
| Variable_name            | Value                      |
+--------------------------+----------------------------+
| character_set_client     | utf8                       |
| character_set_connection | utf8                       |
| character_set_database   | utf8                       |
| character_set_filesystem | binary                     |
| character_set_results    | utf8                       |
| character_set_server     | utf8                       |

## bin_points.py
import numpy as np
from collections import defaultdict

def bin_points(X, bin_size, min_bin_freq):
    bin_sizes = defaultdict(int)
    for point in X:
        binned_point = np.cast[np.int32](point / bin_size)
        bin_sizes[tuple(binned_point)] += 1

    bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32)

	# Copyright 2019 Parsely
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	$ ./fb_edges Caltech36 \| comm_algs/louvain_wrapper \| ./label_feature_matrix Caltech36 \| ./infer 10
	0.36
	from datetime import datetime, timedelta

	matlab_datenum = 731965.04835648148
	python_datetime = datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum%1) - timedelta(days = 366)
	import csv
	import gzip
	from contextlib import closing

	def photo_tup_generator(in_filename, bb_left, bb_right, bb_bottom, bb_top):
	"""
	Note: rows yielded are in utf8 (have not been decoded to unicode)
	"""
	with closing(gzip.open(in_filename, mode="rb", compresslevel=6)) as f:
	reader = csv.reader(f, escapechar='\\', doublequote=True, quotechar='"', quoting=csv.QUOTE_MINIMAL)
	# First get distributions of lagged dot products in both directions
	c1_leading_c2_dps = get_lag_dot_products(c1, c2, lagged_timesteps)
	c2_leading_c1_dps = get_lag_dot_products(c2, c1, lagged_timesteps)

	# Check whether distributions are the same using paired t-test
	t_score, pval = scipy.stats.ttest_rel(c1_leading_c2_dps, c2_leading_c1_dps)

	if pval < PVAL_THRESH:
	# Choose direction with larger mean
	if numpy.mean(c1_leading_c2_dps) > numpy.mean(c2_leading_c1_dps):
	#!/usr/bin/env python
	from nose.tools import *
	import networkx as nx
	from itertools import combinations

	def test_overlaping_K5():
	G = nx.Graph()
	G.add_edges_from(combinations(range(5), 2)) # Add a five clique
	G.add_edges_from(combinations(range(2,7), 2)) # Add another five clique
	c = list(nx.k_clique_communities(G, 4))
	mysql> show variables like '%character%';
	+--------------------------+----------------------------+
	\| Variable_name \| Value \|
	+--------------------------+----------------------------+
	\| character_set_client \| utf8 \|
	\| character_set_connection \| utf8 \|
	\| character_set_database \| utf8 \|
	\| character_set_filesystem \| binary \|
	\| character_set_results \| utf8 \|
	\| character_set_server \| utf8 \|
	import numpy as np
	from collections import defaultdict

	def bin_points(X, bin_size, min_bin_freq):
	bin_sizes = defaultdict(int)
	for point in X:
	binned_point = np.cast[np.int32](point / bin_size)
	bin_sizes[tuple(binned_point)] += 1

	bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32)