Mateo Restrepo Mejía cuckookernel

## matlabtojulia.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              7 stars
            
          
                cuckookernel
                / matlabtojulia.md
            
            
              Last active
              March 18, 2018 20:20
            
              
                MATLAB to Julia quick translation/conversion reference guide
              
          
    A quick and dirty MATLAB to Julia translation/conversion reference guide.

This is not meant as a reference to the language. For that you should read the manual
Important Diferences

The first few are drawn from here

Use brackets to index into vectors and matrices, i.e. do v[i] instead of v(i).
Array assigment is done by reference, i.e after A = B modifying A will modify B!
One dimensional vectors are column vectors by default.
[17, 42, 25] and [17;42;25] both create a column vector. To create a row vector do [17 42 25], this is really a 1-by-3 (two-dimensional matrix).


## titanic_train.py
rfc = RandomForestClassifier( n_estimators=100, max_depth=5 )
rfc.fit( train_df, train_Y)
train_pred = rfc.predict( train_df )
accuracy = (train_pred == train_Y).sum() / len( train_pred )

## rf_explain.py
tc_exps, p0  = rf_explain( rfc, train_df)

## as_pyplot_figure.py
as_pyplot_figure( tc_exps[1], p0, train_df.columns, 'Passenger 1')

## titanic_data_load.py
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# tcxp.py and titanic_preproc.py are available at:
#    https://github.com/YuxiGlobal/data-analytics/tree/master/tree_classif_explain
from tcxp import rf_explain, as_pyplot_figure
from titanic_preproc import preproc

train_df = pd.read_csv( "c:/tmp/titanic/train.csv" )
train_df, train_Y = preproc( train_df )

## stalker_stalkee_pandas.py
checkins_by_loc = (checkins_df[['user_id', 'checkin_ts', 'location_id']]
                      .set_index('location_id') )

chin_pairs = checkins_by_loc.join( checkins_by_loc, lsuffix='_ee', rsuffix='_er' )

pairs_filtered = (chin_pairs[(chin_pairs.checkin_ts_ee < chin_pairs.checkin_ts_er) &
                             (chin_pairs.user_id_ee != chin_pairs.user_id_er )]
                      .rename( columns= {"user_id_er" : "stalker",
                                         "user_id_ee" : "stalkee" })
                      .reset_index()

## stalkee_stalker_turicreate_1.py
import turicreate as tc

checkins = ( tc.SFrame.read_csv( 'Gowalla_totalCheckins.txt',
                                 delimiter='\t', header=False )
                .rename( {'X1': 'user_id', 'X2' : 'checkin_ts',
                          'X3': 'lat', 'X4' : 'lon',
                          'X5': 'location_id'} )
  [["user_id", "location_id", "checkin_ts"]] )

## stalkee_stalker_turicreate_2.py
chin_ps = ( checkins.join( checkins, on = 'location_id' )
                    .rename( {'checkin_ts'   : 'checkin_ts_ee',
                              'checkin_ts.1' : 'checkin_ts_er',
                              'user_id'      : 'stalkee' ,
                              'user_id.1'    : 'stalker' } ) )

pairs_filtered = chin_ps[ (chin_ps['checkin_ts_ee'] < chin_ps['checkin_ts_er']) &
                          (chin_ps['stalkee'] != chin_ps['stalker]) ]


## stalkee_stalker_turicreate_output.txt
Inferred types from first 100 line(s) of file as
column_type_hints=[int,str,float,float,int]
------------------------------------------------------
Read 870755 lines. Lines per second: 520097
Finished parsing file /home/ubuntu/dask_experiment/Gowalla_totalCheckins.txt
Parsing completed. Parsed 6442892 lines in 5.43026 secs.

+---------+-----------+----------------+
| stalkee |  stalker  | location_count |
+---------+-----------+----------------+

## stalkee_stalker_turicreate_3.py
final_result = ( pairs_filtered[['stalkee', 'stalker', 'location_id']]
                    .unique()
                    .groupby( ['stalkee','stalker'] ,
                               {"location_count" : agg.COUNT })
                    .topk( 'location_count', k=5 )
                    .materialize() )

print( final_result )
	rfc = RandomForestClassifier( n_estimators=100, max_depth=5 )
	rfc.fit( train_df, train_Y)
	train_pred = rfc.predict( train_df )
	accuracy = (train_pred == train_Y).sum() / len( train_pred )
	import pandas as pd
	from sklearn.ensemble import RandomForestClassifier

	# tcxp.py and titanic_preproc.py are available at:
	# https://github.com/YuxiGlobal/data-analytics/tree/master/tree_classif_explain
	from tcxp import rf_explain, as_pyplot_figure
	from titanic_preproc import preproc

	train_df = pd.read_csv( "c:/tmp/titanic/train.csv" )
	train_df, train_Y = preproc( train_df )
	checkins_by_loc = (checkins_df[['user_id', 'checkin_ts', 'location_id']]
	.set_index('location_id') )

	chin_pairs = checkins_by_loc.join( checkins_by_loc, lsuffix='_ee', rsuffix='_er' )

	pairs_filtered = (chin_pairs[(chin_pairs.checkin_ts_ee < chin_pairs.checkin_ts_er) &
	(chin_pairs.user_id_ee != chin_pairs.user_id_er )]
	.rename( columns= {"user_id_er" : "stalker",
	"user_id_ee" : "stalkee" })
	.reset_index()
	import turicreate as tc

	checkins = ( tc.SFrame.read_csv( 'Gowalla_totalCheckins.txt',
	delimiter='\t', header=False )
	.rename( {'X1': 'user_id', 'X2' : 'checkin_ts',
	'X3': 'lat', 'X4' : 'lon',
	'X5': 'location_id'} )
	[["user_id", "location_id", "checkin_ts"]] )
	chin_ps = ( checkins.join( checkins, on = 'location_id' )
	.rename( {'checkin_ts' : 'checkin_ts_ee',
	'checkin_ts.1' : 'checkin_ts_er',
	'user_id' : 'stalkee' ,
	'user_id.1' : 'stalker' } ) )

	pairs_filtered = chin_ps[ (chin_ps['checkin_ts_ee'] < chin_ps['checkin_ts_er']) &
	(chin_ps['stalkee'] != chin_ps['stalker]) ]
	Inferred types from first 100 line(s) of file as
	column_type_hints=[int,str,float,float,int]
	------------------------------------------------------
	Read 870755 lines. Lines per second: 520097
	Finished parsing file /home/ubuntu/dask_experiment/Gowalla_totalCheckins.txt
	Parsing completed. Parsed 6442892 lines in 5.43026 secs.

	+---------+-----------+----------------+
	\| stalkee \| stalker \| location_count \|
	+---------+-----------+----------------+
	final_result = ( pairs_filtered[['stalkee', 'stalker', 'location_id']]
	.unique()
	.groupby( ['stalkee','stalker'] ,
	{"location_count" : agg.COUNT })
	.topk( 'location_count', k=5 )
	.materialize() )

	print( final_result )