Gabe gdbassett

## canopy.py
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np

# X shoudl be a numpy matrix, very likely sparse matrix: http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix
# T1 > T2 for overlapping clusters
# T1 = Distance to centroid point to not include in other clusters
# T2 = Distance to centroid point to include in cluster
# T1 > T2 for overlapping clusters
# T1 < T2 will have points which reside in no clusters
# T1 == T2 will cause all points to reside in mutually exclusive clusters

## json_to_jsonld.py
from urllib.parse import quote, unquote
import logging
from uuid import uuid4
from collections import defaultdict

def flatten(l):
    for el in l:
        if type(el) in [list, tuple]:
            yield from flatten(el)
        else:

## MaximumEntropyGraph.py
# Copywrite Gabriel Bassett 2018
# Not licensed for reuse

import copy
import operator
import uuid
import networkx as nx
import logging
import pprint
import simplejson as json

## Quad waffle chart
library(tidyverse)

options(repr.plot.width=16, repr.plot.height=16)

tibble(axis_1 = c(0, 0, 1, 1), axis_2 = c(0, 1, 0, 1),
    count = c(72164, 303490, 129222, 594717)) %>%
    mutate(scale = 581) %>%
    mutate(scaled_count = count/scale) %>%
    mutate(data = purrr::map(scaled_count, ~ tibble(x=rep(1:32, 32), y=rep(1:32, each=32)) %>% head(round(.x)))) %>%
    unnest(data) %>%

## Catmull-Rom Splines in R
# based on  https://github.com/vz-risk/VCDB/commits/master

tj <- function(ti, Pi, Pj, alpha) {
  #xi, yi = Pi
  #xj, yj = Pj
  xi <- Pi[1]
  yi <- Pi[2]
  xj <- Pj[1]
  yj <- Pj[2]
  return( ( ( (xj-xi)^2 + (yj-yi)^2 )^0.5 )^alpha + ti)

## bulk_netflow_import.py
#!/usr/bin/env python
# -*- encoding: utf-8 -*-

"""
 AUTHOR: Gabriel Bassett
 DATE: 11-19-2014
 DEPENDENCIES: py2neo
 Copyright 2014 Gabriel Bassett


## covid_pop.R
# https://population.un.org/wpp/Download/Standard/Population/
pop <- readr::read_csv("~/Documents/Data/covid19/population.csv") %>%
  select(`Region, subregion, country or area *`, `Country code`, `2020`) %>%
  rename(name = `Region, subregion, country or area *`, alpha_3 = `Country code`, population=`2020`) %>%
  mutate(population = as.integer(gsub(" ", "", population))) %>%
  mutate(population = population * 1000) %>%
  glimpse()

# From https://github.com/CSSEGISandData/COVID-19
covid <- readr::read_csv("~/Documents/Development/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%

## tidy_tuesday_simpsons.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gdbassett
                / tidy_tuesday_simpsons.ipynb
            
            
              Created
              August 27, 2019 18:01
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## test_data.csv

          
            enum
            x
            n
            freq
            shape1
            shape2
            x2
            y2

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.005
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.01
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.015
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.02
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.025
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.03
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.035
              1

            
              Web Applications
              581
              2013
              0.2886
              581.5
              1432.5
              0.04
              1

## vega_cdf_bar_data.csv
enum,x,n,freq,method,x1,n1,shape1,shape2,mean,lower,upper,sig,y2,x2
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.005
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.01
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.015
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.02
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.025
Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.49710230643171
	from sklearn.metrics.pairwise import pairwise_distances
	import numpy as np

	# X shoudl be a numpy matrix, very likely sparse matrix: http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.csr_matrix.html#scipy.sparse.csr_matrix
	# T1 > T2 for overlapping clusters
	# T1 = Distance to centroid point to not include in other clusters
	# T2 = Distance to centroid point to include in cluster
	# T1 > T2 for overlapping clusters
	# T1 < T2 will have points which reside in no clusters
	# T1 == T2 will cause all points to reside in mutually exclusive clusters
	from urllib.parse import quote, unquote
	import logging
	from uuid import uuid4
	from collections import defaultdict

	def flatten(l):
	for el in l:
	if type(el) in [list, tuple]:
	yield from flatten(el)
	else:
	# Copywrite Gabriel Bassett 2018
	# Not licensed for reuse

	import copy
	import operator
	import uuid
	import networkx as nx
	import logging
	import pprint
	import simplejson as json
	library(tidyverse)

	options(repr.plot.width=16, repr.plot.height=16)

	tibble(axis_1 = c(0, 0, 1, 1), axis_2 = c(0, 1, 0, 1),
	count = c(72164, 303490, 129222, 594717)) %>%
	mutate(scale = 581) %>%
	mutate(scaled_count = count/scale) %>%
	mutate(data = purrr::map(scaled_count, ~ tibble(x=rep(1:32, 32), y=rep(1:32, each=32)) %>% head(round(.x)))) %>%
	unnest(data) %>%
	# based on https://github.com/vz-risk/VCDB/commits/master

	tj <- function(ti, Pi, Pj, alpha) {
	#xi, yi = Pi
	#xj, yj = Pj
	xi <- Pi[1]
	yi <- Pi[2]
	xj <- Pj[1]
	yj <- Pj[2]
	return( ( ( (xj-xi)^2 + (yj-yi)^2 )^0.5 )^alpha + ti)
	#!/usr/bin/env python
	# -- encoding: utf-8 --

	"""
	AUTHOR: Gabriel Bassett
	DATE: 11-19-2014
	DEPENDENCIES: py2neo
	Copyright 2014 Gabriel Bassett
	# https://population.un.org/wpp/Download/Standard/Population/
	pop <- readr::read_csv("~/Documents/Data/covid19/population.csv") %>%
	select(`Region, subregion, country or area *`, `Country code`, `2020`) %>%
	rename(name = `Region, subregion, country or area *`, alpha_3 = `Country code`, population=`2020`) %>%
	mutate(population = as.integer(gsub(" ", "", population))) %>%
	mutate(population = population * 1000) %>%
	glimpse()

	# From https://github.com/CSSEGISandData/COVID-19
	covid <- readr::read_csv("~/Documents/Development/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
enum	x	n	freq	shape1	shape2	x2	y2
Web Applications	581	2013	0.2886	581.5	1432.5	0	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.005	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.01	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.015	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.02	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.025	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.03	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.035	1
Web Applications	581	2013	0.2886	581.5	1432.5	0.04	1
	enum,x,n,freq,method,x1,n1,shape1,shape2,mean,lower,upper,sig,y2,x2
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.005
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.01
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.015
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.02
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.4971023064317177,0.050000011597819416,1,0.025
	Hacking,1055,2215,0.4763,bayes,1055,2215,1055.5,1160.5,0.4763086642599278,0.45552701193652323,0.49710230643171