Skip to content

Instantly share code, notes, and snippets.

@catawbasam
Created September 19, 2012 12:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save catawbasam/3749504 to your computer and use it in GitHub Desktop.
Save catawbasam/3749504 to your computer and use it in GitHub Desktop.
first crack at Julia
##############################################################
# julia first test = test.jl
# running on Windows 64 with Commit 7076ab06f1 (2012-10-15)
#
# Julia runs in the crappy Windows terminal (autohotkey is one way out)
# - make sure QuickEdit is on
# - select with mouse and <enter> to copy
# - right click with mouse to paste
# AutoHotKey script -- ctl-F5 copies cur line; ctl-f6 pastes clipboard to console
#
# doc codes: M=minimal
#
# /base modules:
# array.jl: array subtypes (e.g. Vector/Matrix) and array methods
# boot.jl: builtins and base types
# base.jl: error types, some conversions, introspection, gc, Array constructors
# bool.jl: boolean ops
# buffer.jl: byte string manipulation with io style ops
# client.jl: REPL; startup options
# datafmt.jl: csv/delimited text read/write
# dict.jl: dictionary
# ***export.jl: categorized modules, types, keywords, and functions***
# float.jl / floatfuncs.jl : floating point ops
# file.jl file and os stuff -- UNIX only?
# io.jl : streams
# inference.jl: lots of introspection and typing stuff (ack!)
# int.jl / intfuncs.jl : integer ops
# libc.jl: time function, pid, host (unix only?)
# math.jl: generic math functions
# math_libm.jl : ccall to c math functions
# number.jl : generic number ops
# operators.jl: operators, pipelining, currying, composition
# pcre.jl: regex ccall
# pkg.jl: git-based declarative package manager
# random.jl: random number generation
# reduce.jl reduce (sum/prod etc) and mapreduce
# regex.jl higher level regex ops
# serialize.jl: serialize/deserialize
# show.jl show and dump = readable text version of data structure
# M signal.jl filtering, convolution, correlation
# sort.jl sorting, binary search
# statistics.jl mean, var, hist, cov, quantile, decile etc.
# stream.jl process, pipe, socket, stream, spawn
# string.jl string ops
# ***sysimg.jl imports, keywords etc for system image
# util.jl: timing, load/require, introspection, help, editor
#
# /extras (select)
# bigint.jl,bigfloat.jl: large numbers
# bitarray.jl : bitarray, bitvectory
# color.jl : named colors, RGB
# distributions.jl : R-style. rv + moments, pdf/cdf,
# image.jl : base image, .ppm support, manipulations
# julia_web_base/julia_web.jl: web server (for web repl)
# sparse.jl/linalg_sparse.jl: sparse matrices
# linprog/glpk.jl : linear programming, matlab style
# nearequal.jl : approximate equality
# openlib.jl : ( TO DO-- looks Linux-specific -- ".so" as suffix )
# options.jl : optional function arguments
# plot.jl : plotting interface based on Winston
# profile.jl : profiler -- some nice code introspection for functions
# Rmath.jl : ccalls to R lib rng. ?Basis for reading RData?
# specfun.jl : special functions gamma, beta, bessel etc.
# strpack.jl : byte packing to match C structs
# test.jl : test framework -- lots of quote/eval
# trie.jl : ordered prefix tree
# winston.jl : plotting
#
# next:
# include modules. JuliaData, Winston (cairo dll not installed!)
# datetime/timestamp
#
# /doc candidates
# overview of source file contents
# /base, /extras file overview
# /extras: inifile, color, distributions,
# /base/util: require() , edit() configuration, function_loc(), getmethods(),
# whicht(), less() --needs Windows version, find_in_path(fname)
#
# see also C:\Temp\Julia\julia-17f50ea4e0\lib\julia\extras, \base
#
# require("winston.jl") #stdout undefined; no CAIRO dll
# client.jl -- try include(strcat(ENV["HOME"],"/.juliarc.jl")) end
##############################################################
# repl utilities
##########################################################
whos() # current objects and types
ans # most recent answer
load("lib/julia/base/util.jl")
load("bigint.jl")
load("c:/keithc/julia/rosettacode/fizzbuzz.jl")
load("test.jl") # load and run
exit()
apropos("rand") # topic help
help(randi) # function help
#_jl_help_category_list
#_jl_help_category_dict
#_jl_help_function_dict
function_loc(load) # filename and line number where function is defined
edit("test.jl") # edit and reload (if configured)
isinteractive() # true/false -- for __name__=="main" type testing?
import Base # Base is a Module
names(Base) # object names as an array
# console IO
##########################################################
print(s)
println(s)
println("$s, dude") #variable substitution
show(a) # readable view of value
dump(a) # detailed view of value, including shape
write(STDOUT,"BLAH")
# simple types
#################################################################################
# scalar literal
e # 2.718...
pi # 3.14159...
x=3
y=2.0
s="hello"
c='t'
WORD_SIZE #system wordsize (32)
# types
##########################################
# Int, Float, String, Any, ASCIIString, UTF8String, Char, IOStream, Int8, Uint8, Int16, Uint16, Int32, Uint32, Int64, Uint64, Float32, Float64
# Nothing, Expr, LambdaStaticData = parsed code tree; Module, CompositeKind, BitsKind, FuncKind, TypeVar, Array, Vector, Matrix
v = 1000
typeof(v) #Int32
v = int16(100) #typeof v = Int16
v2 = convert(Int64, v) #typeof v2 = Int64
(1.+2.)::Float
(1+2)::Int
isa(v, Int) #true
isimmutable(23) #true
isgeneric(23) #false
isbuiltin(println) #false
isbuiltin(Symbol) #true
# files and system
###############################################################################################
ENV #hash of current environment vars
CURRENT_OS # == :Windows, :Linux, or :OSX
JULIA_HOME # path to executable "C:\\Temp\\Julia\\julia-17f50ea4e0\\bin"
LOAD_PATH # currently /lib/julia/extras and /lib/julia/ui
CPU_CORES # 8
VERSION # v"0.0.0+94063912.r17f5"
ENV["HOME"] # not defined. --> /keithc/julia ???
#global _jl_package_list = Dict{ByteString,Float64}() #loaded files and load times
help_file = "$JULIA_HOME/lib/julia/helpdb.jl" #how is this constructed?
startup_file = strcat( cwd(), "/startup.jl" )
config_file = strcat( ENV["HOME"], "/.juliarc.jl" )
cwd() #get working directory at startup = "C:\\Temp\\Julia\\julia-17f50ea4e0"
cd("c:/keithc/julia") #set working directory
system("dir") #run a shell command -- in this case list files in working directory
system("start http://www.yahoo.com") #loads to browser (windows sniffs argument to decide the app)
system("start c://keithc//julia//rosettacode//fizzbuzz.jl") #loads in notepad++
system("start c://keithc//julia//julia_categories.pdf") #load in acrobat
system("start c://keithc//julia//help_2.png") #load in image viewer
system("echo %time%") # local time of date in centi-seconds, e.g. 15:28:36.18
system("echo %date%") # local date , e.g. 10/04/2012
isdir("c:/temp") #true
isfile("c:/keithc/julia/test1.jl") #true
function ls()
system("dir")
end
ls() #list files
time() #system time in seconds since unix epoch
p=getenv("PATH")
setenv("HOME","c:/keithc/julia")
f1 = open("c:\\temp\\f.py")
f1t = readall(fl) # load all into a single String
close(f1)
f2 = open("c:/temp/f.py") #returns an IOStream
f2t = readlines(f2) # load all into an array of strings (with eol chars)
f2t_clean = map(chomp, f2t) #remove eol chars from each element
close(f2)
f3 = open("c:/temp/f.py")
for l in EachLine(f3) #iterator
print(l)
end
ca = csvread("c:/temp/result.csv") # 398x13 Array of Any
ca[1,2] # "\"FLIGHT_DATE\""
ca[1,:] # first row as array (of strings)
ca[:,4] # fourth column (element1=String, the rest are floats)
csvwrite("c:/temp/jlwrite.csv", ca)
# strings
#########################################################
s1="blah"
s2="quer"
cs = cstring(s1) # convert to a C string
s1[1] #first char as char
s1[1:1] #first char as string
s1[end] #last char
s1[1:2] # first 2 chars
"$s1 $s2" #string interpolation
s1, s2 #returns a tuple
"abc"<"xyz" #true
"abc"=="cde" #false
strchr("xylophone", 'p') #index of char 'p' in string (find character)
search("here is text","is") #(6,8) 6=index of found string. 8=index after found string
function str_replace(mystr::String, from::String, to::String)
sp = split(mystr, from)
return join(sp,to)
end
t1="replace this"
str_replace(t1,"this","that")
t2="my|three|sons"
str_replace(t2, "|", ",")
strlen("abc") #3
split("my string"," ") # returns ["my","string"]
strip(" asdf ") #"asdf"
chomp("blah blah \n") #removes trailing newline
sa=["first" "second"] #array of strings
strcat(sa...) #concatenate
join(sa, ',') #join on delimiter
lowercase("Blah") #"blah"
uppercase("Blah") #"BLAH"
#quote hack -- how to return a string? -- see REPL code
julia> quote
`hello "world"`
end
#>>quote # line 2:
#>> @@cmd "hello \"world\""
#>>end
#short strings --
y= :`hello "world",'d'=4`
typeof(y) #Expr
sy = string(y)
y=:( @@cmd \"hello \"world\",'d'=4" )
julia> print(":$y")
#::( @@cmd "hello \"world\",'d'=4" )
s=y.args[2] #get delimited string
#> s
# "hello \"world\",'d'=4"
function tf(o)
print(o)
end
# doesn't support string interpolation
e = :`my "weird" aunt's string`
# or quote ` ` end
function strescape(myExpr::Expr)
estr = myExpr.args[2]
if typeof(estr)==ASCIIString
return estr
else
estr2=strip( estr.args[2] )
return estr2
end
end
s1=:`my "weird" aunt's string`
strescape(:`my "weird" aunt's s1string`)
#> "my \"weird\" aunt's string"
strescape(:`select 'meh' as "A Field" from mytable where "Blah Date"<to_char('01/01/2012','MM/DD/YYYY')`)
#> "select 'meh' as \"A Field\" from mytable where \"Blah Date\"<to_char('01/01/2012','MM/DD/YYYY')"
sql=quote
`select 'blah' as "My Field"
from "My Table" x
where "My Date"<to_char('01/01/2012','MM/DD/YYYY');`
end
strescape(sql)
sc=quote `
"name","count"
Jim,20
` end
julia> print( strescape(sc) )
"name","count"
Jim,20
# arrays
#########################################################
a=[1. 4. 6.] #single type array
b={1, 2., 'a', "blah"} #cell array = array of Any
a' #' transpose
m=[1 2 3; 4 5 6; 7 8 9];
m2=[ 1 2 3
4 5 6
7 8 9 ];
# array addressing
a[1] # = 1.0 (1 based addressing)
a[2:3] # = [4. 6.]
a[2:end] # = [4. 6.]
# elementwise ops
map(uppercase, sa) #apply uppercase function to each element of sa
a + 1
a -12
a * 2
a / 2
a .^ 2 # power
a .== 2 # -> bool array
a .> 2 # -> bool array
ceil(a)
floor(a)
abs(a)
sign(a)
log(a)
sqrt(a)
cumsum(a)
# reduce array ops
length(a)
min(a)
max(a)
sum(a)
prod(a)
std(a)
var(a)
mean(a)
median(a)
any( a .> 1 ) #true
all( a .> 1 ) #false
# sort, filter, group, distinct/unique
a2=[ 9.0 5.0 7.0 5.0 1.0]' #' transpose required by sort
sort(a2) # Sort = [1.0 5.0 7.0 9.0]
order([ 4,3,6 ]) # 2,1,3 = ordering/rank
a2[ a2 .> 5 ] # Filter =[9.0 7.0]
s2 = Set( a2... ) # unique/distinct using conversion to Set. How
ua2 = elements(s2) # convert back to array
function unique(a)
#return array elements as
return elements( Set(a...) )
end
unique(a2)
unique(['a','a','b'])
unique(["asdf","ASDF","asdf"])
x=[ 1,2,3,4,510,1,2,3,1,9]
y=[sqrt(a) | a in x ] #comprehension
y=[sqrt(a) | a in x[x%2==0] ] #comprehension with filter
# queuelike array ops
aa=[1,4,7]
aa.push(2) #to end of array
pop(aa) # returns 2, leaves aa=[1,4,7]
# composite types = structs
type Foo
bar # Any
baz::Int
qux::Float64
end
foo1 = Foo("Hello, world.", 23, 1.5)
typeof(foo1) # =Foo
foo1.baz = 45
# Cell Arrays, Dicts, Sets, Ranges
#################################################################
#cell array (1-based)
ca = {'a', "stuff", 4, 33.33} #mixed types, like Python list
ca[2] #returns "stuff"
push(ca,"meh") #appends "meh"
pop(ca) #returns and removes "meh"
length(ca) #4
#dictionary
md = Dict()
md["city"]="Washington"
md["city"] #returns "Washington"
has(md,"city") #true
get(md,"cityx","not found") #returns "not found"
mydict={ 1=>'d', 2=>"STUFF", 'a'=>22, "akey"=>77.7 } #dictionary literal
del(mydict,2)
keys(mydict)
values(mydict)
pairs(mydict) #like items in python
d = { 1=>'a',2=>'g'}
d2= { 3=>'a',2=>'h'}
merge(d,d2) #{2=>'h',1=>'a',3=>'a'}
#filter(fn, mydict) #see dict.jl
#set
s=Set( 2,3,5,4,4) #(2,5,3,4)
si=intset( 2,3,5,4,4) #intset(2,5,3,4) -- ORDERED
add(s,7)
s2=Set(7,6,5,4)
s | s2 # (7,2,5,3,4,6) = union(s,s2)
s & s2 # (7,5,4) = inter(s,s2)
s - s2 # (2,3) = diff(s,s2)
isempty(s) #false
has(s,2) #true
elements(s) # array [7, 2, 5, 3, 4]
# ranges
r = 2:4
r2= 3:6
rstep = 2:2:8
intersect(r,r2) # 3:4
min(r) # =2
max(r) # =4
len(r) # =3
reverse(r) # =3:-1:2
sum(r) # = 2+3+4 = 9
r.start
r.step # default =1
r.len # number of steps
# floating ranges
r0 = 1.1:4.3 # 1.1:4.1 because default step=1
r = 1.1:0.1:4.2 # 1.1:0.1:4.2 as expected
r2 = 1.1:0.1:4.8 # 1.1:0.1:4.7 ??? FLOATING POINT ERROR ???
# functions
################################################################
# call function
n=5
rand(n) #array of uniform rv
diag(m)
# define a function
function foo()
x::Int8 = 20
return(x)
end
# define a function with typed arguments
function multiply(a::Number,b::Number)
return a*b
end
# terse function definition = 'assignment form'
fn(x) = x+2
fn(3) # = 5
# anonymous function definition
x -> x+2
type fna
n::Int
f1::Function
end
fx=fns(3,foo)
fx.n #3
# functional, composition, chaining/pipelines, map, filter
a=[ "my","Dog","has","fleas" ]
map(uc, a)
vlc = map(lc) # curried lowercase = vectorized
vlc(a) # same as calling map(lc,a)
s="My String "
s | lc # pipeline, same as lc(s) ="my string "
s | strip | lc # pipeline, same as lc(strip(s)) ="my string"
s | strip | lc | s->s[1] # ="m"
ar=[ 1,2,3,4 ]
filter( x->x>2, ar) #3,4
lws = lc * strip #compose functions. this converts to lowercase and strips whitespace
lws(s) # "my string"
# eval, quote, macro, expr
#########################################################
function string_as_varname(s::String,v::Any)
# from https://groups.google.com/forum/?fromgroups=#!topic/julia-dev/z-dyQ_jUgk8
s=symbol(s)
@eval (($s) = ($v))
end
string_as_varname("varname",42); varname == 42 # true
parse_input_line("j=99") #from client.jl
# returns Expr j=99rea
# more system -- Windows commands
# set http://commandwindows.com/windows7-commands.htm for other options, e.g. file system
# DEL, DIR, COPY, EXPAND (compression), FC (file compare),
"
ASSOC Displays or modifies file extension associations.
ATTRIB Displays or changes file attributes.
CD Displays the name of or changes the current directory.
CHDIR Displays the name of or changes the current directory.
CLIP Redirects output of another command to the Windows clipboard.
COMP Compares the contents of two files or sets of files byte-by-byte
COMPACT Displays or alters the compression of files on NTFS partitions.
COPY Copies one or more files to another location.
DATE Displays or sets the date.
DEL Deletes one or more files.
DIR Displays a list of files and subdirectories in a directory.
EXPAND Expands one or more compressed files.
FC Compares two files or sets of files, and displays the differences between them.
FIND Searches for a text string in a file or files.
FINDSTR Searches for strings in files.
FORFILES Selects files in a folder for batch processing.
HELP Provides Help information for Windows commands.
IPCONFIG Displays all current TCP/IP network configuration values
MKDIR Creates a directory.
MOVE Moves one or more files from one directory to another directory.
PATH Displays or sets a search path for executable files.
RMDIR Removes a directory.
RENAME Renames a file or files.
SETX Sets environment variables.
SC Displays or configures services (background processes).
SCHTASKS Schedules commands and programs to run on a computer
SORT Sorts input.
START Starts a separate window to run a specified program or command.
SYSTEMINFO Displays machine specific properties and configuration.
TASKLIST Displays all currently running tasks including services.
TASKKILL Kill or stop a running process or application.
TIMEOUT Pauses the command processor for the specified number of seconds.
WHERE Displays the location of files that match a search pattern.
XCOPY Copies files and directory trees.
"
# Distributions -- example of module use
##########################################################################
require("distributions.jl") # module = Distributions
whos(Distributions) #object list
N=Distributions.Normal(3,2) # without import we must qualify the name
# Bernoulli, Beta, Binomial, Categorical, Cauchy, Chisq, Dirichlet, Exponential,
# FDist, Gamma, Geometric, HyperGeometric, Logistic, Multinomial, NegativeBinomial,
# NoncentralBeta, NoncentralChisq, NoncentralF, NoncentralT, Normal, Poisson, TDist,
# Uniform, Weibull              
import Distributions.* # bring into main namespace
pdf(N, 0.5)
cdf(N, 0.5)
mean(N)
std(N)
var(N)
rv=rand(N,10) #generate an array of R.V.s
C=Cauchy(5.,6.)
# web repl
###############################################################
ctl-c : interrupt current command
# <up> <down> = command history
plot([4,5,6,9,12],"bar")
plot([4,5,6,9,12],"line")
# JuliaData
###############################################################
cd("C:\\keithc\\julia\\JuliaData")
load("src\\init.jl") #loads up member files
df = csvDataFrame("demo/toy_example.csv")
nrow(df) #
ncol(df)
show(df) #display it
#DataFrame (6,3)
# A B C
#[1,] 2.5 "One" 3.0
#[2,] 3.6 "One" 5.0
#[3,] 3.5 "Two" 3.0
#[4,] 4.5 "Two" 5.0
#[5,] 4.5 "Three" 3.0
#[6,] 5.5 "Three" 5.0
df["A"] # [2.5,3.6,3.5,4.5,4.5,5.5]
df[1] # first column
df["A"] .* 2
df["A"] + 2
df[1, :] # row 1
df[1:3, :] # rows 1-3
df[1:3, "A"] # [2.5, 3.6, 3.5] = first 3 rows of column "A"
df[1:2, ["A", "B"]] # first 2 rows, first 2 cols
with(df, :(A + C)) # expression wrt dataframe
head(df)
tail(df)
df[:( A .> 4.0 )]
df[df["A"] .> 4.0, :]
# Make a new column using within.
df2 = within(df, :( D = A + C ))
nareplace( df["A"],0.0 )
# Create a new DataFrame based on operations on another DataFrame.
# This is similar to plyr's summarise().
df3 = based_on(df, quote
ct = cut(nareplace(A,0.0), 3) # cut() doesn't operator on DataVecs yet; no NAs here
sum_A = sum(A)
end)
df = DataFrame(quote
a = shuffle(LETTERS[1:10])
b = letters[randi(5,50)]
x = randn(50)
end)
unique(df)
duplicated(df)
array(df) #any array with 1 element per column, each containing an array of values
# Group by
#############################################################################
# Grouping by column b, find the sum and length of each group.
by(df, "b", :( x_sum = sum(x); x_len = length(x) ))
# Group by a and b:
by(df, ["a", "b"], :( x_sum = sum(x); x_len = length(x) ))
# by is a shortcut for the following:
based_on(groupby(df, "b"), :( x_sum = sum(x); x_len = length(x) ))
# You can also use the piping operator for the same thing.
df | groupby("b") | :( x_sum = sum(x); x_len = length(x) )
gd = groupby(df, "b")
for i in 1:length(gd)
g=gd[i]
print(( max(g["b"]), sum(g["x"]) ))
end
df[:( a .> "G" )] | show
# merge
################################################################################
# simple merge (single column key)
df1 = DataFrame(quote
a = shuffle([1:10])
b = ["A","B"][randi(2,10)]
v1 = randn(10)
end)
df2 = DataFrame(quote
a = shuffle(reverse([1:5]))
b2 = ["A","B","C"][randi(3,5)]
v2 = randn(5)
end)
# simple merge (single column key)
m1 = merge(df1, df2, "a") # inner join is default
m2 = merge(df1, df2, "a", "inner")
m3 = merge(df1, df2, "a", "left")
m4 = merge(df1, df2, "a", "right")
m5 = merge(df1, df2, "a", "outer")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment