Skip to content

Instantly share code, notes, and snippets.

View xiaodaigh's full-sized avatar

evalparse xiaodaigh

View GitHub Profile
@xiaodaigh
xiaodaigh / code.jl
Created August 30, 2020 05:57
df1[B] = df2[B] where df1 and df2 are DataFrames B is boolean array
using DataFrames
df1 = DataFrame(a = repeat([1], 100), b = "a")
df2 = DataFrame(a = repeat([2], 100), b = "b")
B = Array{Bool, 2}(undef, 100, 2)
df1[B] # doesn't work
# Let's overload get index get index
@xiaodaigh
xiaodaigh / julia-slow-serialization.jl
Created September 13, 2019 05:27
Julia serialization is dog-slow
using JDF
using CSV, DataFrames
using Serialization:serialize,deserialize
using BufferedStreams
@time a = CSV.read("C:/Users/ZJ.DAI/Documents/git/format-wars/data/Performance_2016Q4.txt", delim = '|', header = false);
io = BufferedOutputStream(open("c:/data/bin.bin","w"))
@time serialize(io, a)
close(io)
@xiaodaigh
xiaodaigh / Dockerfile
Created December 30, 2019 04:14
Dockerfile for minimal r and python docker with arrow
FROM python:3.7-alpine3.10
RUN apk add --no-cache \
build-base \
cmake \
bash \
boost-dev \
autoconf \
zlib-dev \
flex \
@xiaodaigh
xiaodaigh / data.table_vs_disk.frame.r
Created September 22, 2019 01:28
Benchmarking data.table vs disk.frame
library(data.table)
library(disk.frame)
setup_disk.frame()
bench_disk.frame_data.table_group_by <- function(data1,n) {
setDT(data1)
a.sharded.df = as.disk.frame(data1, shardby = c("year", "month", "day"))
a.not_sharded.df = as.disk.frame(data1)
@xiaodaigh
xiaodaigh / benchmarks.jl
Created September 17, 2019 13:21
Benchmark R vs Julia dataframe on disk format
using CSV, Feather
#using JLD2
#using JLD#, JLSO
using JDF, FileIO, Blosc, StatsPlots, RCall
using DataFrames, WeakRefStrings # required for JLD2, JDF
Blosc.set_num_threads(6)
gen_benchmark(dirpath, largest_file, outpath, data_label; delim = ',', header=true) = begin
if !isdir(outpath)
mkpath(outpath)
@xiaodaigh
xiaodaigh / 0-benchmarks.jl
Last active September 15, 2019 07:00
Julia On-disk formats for saving DataFrames
using CSV, Feather, JLD2, JLSO, JDF, FileIO, Blosc, StatsPlots
using DataFrames, WeakRefStrings # required for JLD2, JDF
Blosc.set_num_threads(6)
gen_benchmark(dirpath, largest_file, outpath, data_label; delim = ',', header=true) = begin
if !isdir(outpath)
mkpath(outpath)
end
@xiaodaigh
xiaodaigh / example_rayshader_4walls_hole.r
Created March 12, 2019 22:31
Example rotating sunangle
library(rayshader)
library(av)
library(future)
plan(multiprocess)
# set up an elevation matrix with a wall around the outside and a gap on each of
# the walls.
elmat1 = matrix(0, 500, 500)
elmat1[1:100, 1:200] = 3000
@xiaodaigh
xiaodaigh / ok.r
Created March 12, 2019 05:23
3d sun around a pole
library(rayshader)
library(av)
elmat1 = matrix(0, 500, 500)
elmat1[200:400, 200:400] = 971.0
sunangle = 33
for(sunangle in 140:360) {
elmat1 %>%
sphere_shade %>%
add_shadow(ray_shade(elmat1,zscale=3,maxsearch = 300, sunangle = sunangle),0.5) %>%
@xiaodaigh
xiaodaigh / cpu_bitonic_sort.jl
Last active January 29, 2019 12:46
CPU bitonic sort
shared = rand(16)
bisort(shared) = begin
NUM = UInt(length(shared))
k = UInt(2)
while (k <= NUM)
j = div(k,2)
while(j >=1)
for tid in UInt(0):UInt(NUM-1)