Skip to content

Instantly share code, notes, and snippets.

@tpapp
tpapp / spellfrac.jl
Created December 13, 2016 16:06
spell fractions
using AMDB
using DataStructures
using GZip
using UnicodePlots
using IntervalSets
# if we want to pass around code, extend it with your own machine name
# (gethostname# ()) and path
AMDB_path = get(Dict("tamas" =>
"/home/tamas/research/AMDB/data/AMDB_subsample.jls.gz"),
@tpapp
tpapp / spells.jl
Created January 16, 2017 19:18
spell durations in a year
using AMDB
using DataStructures
using UnicodePlots
using IntervalSets # need to Pkg.checkout the latest version
# i made a new function to read the data, replace path for your own machine
records = deserialize_gz(expanduser("~/research/AMDB/data/AMDB_subsample.jls.gz"))
# suppose we are counting in 2005
date_interval = Date(2005,1,1)..Date(2005,12,31)
@tpapp
tpapp / code_20170118.jl
Created January 18, 2017 19:47
AMP analysis of durations
using AMDB
using DataStructures
using IntervalSets
"""
Call `f` with each AMP spell in `records`. Useful for accumulators.
"""
function traverse_AMP_spells(f, records)
for data in values(records)
foreach(f, data.AMP_spells)
using AMDB
using DataStructures
using IntervalSets
using Plots
plotlyjs() # I like this backend, feel free to change it
records = deserialize_gz(expanduser("~/Documents/Thesis/AMDB_subsample.jls.gz"))
function spell_durations_in_year(records, interval)
@tpapp
tpapp / code_20170207.jl
Created February 7, 2017 21:12
code for plotting (almost works)
using AMDB
using DataStructures
using IntervalSets
using Plots
using DataFrames # you may need to add these libraries
using StatPlots # with Pkg.add
records = deserialize_gz(expanduser("~/research/AMDB/data/AMDB_subsample.jls.gz"))
plotly() # this makes plots appear in your browser, you can use other backends
@tpapp
tpapp / plot_example.tex
Created February 28, 2017 16:29
centered large plot in beamer
\documentclass{beamer}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{booktabs}
\usepackage{mathtools}
\mathtoolsset{showonlyrefs}
%% no idiotic nav buttons
\setbeamertemplate{navigation symbols}{}
######################################################################
# MWE for tabulating large CSV files, using plain Julia and IterateTables
# See the discussion at
# https://discourse.julialang.org/t/iterate-delimited-file-as-namedtuples/3616/3
#
# Some simplifications:
# - using the index of iterated tuples to make it comparable,
# but would prefer to use the column name in real code.
# - not using compressed files (because CSV does not support Zlib streams ATM)
######################################################################
@tpapp
tpapp / ess-sampling.jl
Last active June 12, 2017 15:35
Exploring sampling size variation in effective sample size estimates
"""
Lag-`k` autocorrelation of `x` from a variogram. `v` is the variance
of `x`, used when supplied.
"""
function ρ(x, k, v=var(x))
x1 = @view(x[1:(end-k)])
x2 = @view(x[(1+k):end])
V = sum((x1 .- x2).^2)/length(x1)
1-V/(2*v)
end
@tpapp
tpapp / log1p_error.jl
Last active September 13, 2017 10:25
log1p accuracy in Julia (compared with Base.Math.JuliaLibm)
# consistent random numbers
srand(UInt32[0xfd909253, 0x7859c364, 0x7cd42419, 0x4c06a3b6])
"""
err(x, [prec])
Return two values, which are the log2 relative errors for calculating
`log1p(x)`, using `Base.log1p` and `Base.Math.JuliaLibm.log1p`.
The errors are calculated by compating to `BigFloat` calculations with the given
@tpapp
tpapp / read_UInt8_lines.jl
Last active September 28, 2017 10:13
reading UInt8 lines from a gzip compressed file in Julia
######################################################################
# context: reading UInt8 lines from a gzipped stream
# - the real dataset has about 5e10 lines, this is a self-contaned MWE
# - the real dataset lines are then processed, this MEW is just about optimizing reading
# - line length can be bounded (relevant for buffered reading)
######################################################################
using CodecZlib
"""