Skip to content

Instantly share code, notes, and snippets.

View wizjo's full-sized avatar
🏠
Working from home

Jo Pu wizjo

🏠
Working from home
View GitHub Profile
@wizjo
wizjo / bubblechart_with_legend.r
Created April 7, 2011 16:58
R bubblechart, creating axes and legends from scratch
ds <- read.csv("dataset.csv")
## rainbow palette with gradients!
palette(rainbow(10, alpha=.5))
plot(c(rep(1,5), rep(.2,5)), ds[,4], pch=21, cex=(seq(1,10)-1) %% 5 +1, col=ds[,2]+1, bg=ds[,2]+1, xlim=c(-1,19), ylim=c(-1,7), axes=F,
xlab="x axis name", ylab="y axis name", main="Give a name to your chart")
for(i in 1:8){
par(new=T)
ds <- read.csv("that_csv_file.csv")
names(ds) <- c("test", "time", "city", "renderstart")
ds$time <- as.POSIXct(strptime(ds$time, format="%m/%d/%y %H:%M"))
ds$city <- as.character(ds$city)
ds$test <- as.character(ds$test)
cities <- names(sort(table(ds$city)))
tests <- names(sort(table(ds$test)))
@wizjo
wizjo / line_series.R
Created June 21, 2011 21:54
Multiple line series
# > head(ds)
# hour activated organic viral
# 1 2011-05-22 17:00:00 28 9 19
# 2 2011-05-22 18:00:00 47 12 35
# 3 2011-05-22 19:00:00 42 13 29
# 4 2011-05-22 20:00:00 57 13 44
# 5 2011-05-22 21:00:00 48 19 29
# 6 2011-05-22 22:00:00 86 19 67
# > dim(ds)
@wizjo
wizjo / palettes.R
Created June 21, 2011 22:00
Favorite Palettes
## protovis style color cat10
palette(c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"))
palette(rainbow(11, alpha=.8))
@wizjo
wizjo / tags.py
Created July 2, 2011 06:11
Tags of me with Nodebox
grid = ximport("grid")
words = ["data", "visualization", "mining", "analysis", "design", "UX"]
colormode(RGB, range=255)
colors = [color(255,30,25), color(124,155,220),color(100,100,100), color(197,206,216)]
fontlist = ["Arial Black", "Trebuchet MS", "Georgia"]
def randContent(x, y, w, h, style=None):
for i in range(40):
x1 = random(0,w/2)
@wizjo
wizjo / logo.py
Created July 2, 2011 06:12
IAR logo with Nodebox
scl = 3
r = .3 ## roundness of the rectangles
size(100*scl, 100*scl)
w=10*scl
mat1=[
[0,2,2,0,2,2,2,0],
[1,0,0,1,2,0,0,2],
[1,0,0,1,2,0,0,2],
[1,0,0,1,2,0,0,2],
@wizjo
wizjo / group_sort.r
Created July 21, 2011 04:46
SQL-like Grouping and Sorting in R
## here's how you do a "SELECT col1,col2,col3,col4, count(1) AS cnt FROM ds_subs_less_table GROUP BY 1,2,3,4" thing in R ##
# first, find out all the unique combos of col1,col2,col3,col4
subs.geo <- data.frame(unique(cbind(
as.character(ds.subs.less$source_tz), as.character(ds.subs.less$source_country),
as.character(ds.subs.less$target_tz), as.character(ds.subs.less$target_country)
)))
names(subs.geo) <- c("source_tz", "source_country", "target_tz", "target_country")
# next, figure out the frequency of each combo shows up
@wizjo
wizjo / expensive_queries_check.sql
Created October 3, 2011 19:49
Most expensive queries
select sub.query_id, sub.result, sub.sql,
avg(sub.time_lapse) as avg_time,
max(sub.started_at) as last_run_at,
count(1) as cnt
from
(
select qe.query_id,
qe.result,
qe.started_at,
(qe.finished_at - qe.started_at) time_lapse,
@wizjo
wizjo / find_first_duplicate_key.sql
Created March 5, 2012 18:31
Find the first_value(id) of dup key
SELECT FIRST_VALUE(pkey_id) OVER(PARTITION BY cnt ORDER BY cnt DESC, pkey_id)
AS first_id
FROM (
SELECT pkey_id,
COUNT(*) AS cnt
FROM dimension_attribute_histories
GROUP BY 1
)sub
LIMIT 1
SELECT sub2.period_id, sub2.time_id, sub2.network_id, COUNT(1) as cnt
FROM (
SELECT sub1.period_id,
sub1.time_id,
sub1.network_id,
SUM(sub1.pct_squared) AS dept_concentration_hhi
FROM (SELECT sub.period_id,
sub.time_id,
sub.network_id,
sub.job_type,