Mateo Restrepo Mejía cuckookernel

## pythontojulia.md

      
              2 files
            
          
              3 forks
            
          
              1 comment
            
          
              11 stars
            
          
                cuckookernel
                / pythontojulia.md
            
            
              Last active
              March 20, 2023 04:02
            
              
                Python to Julia Quick translation / conversion reference Guide
              
          
    A quick and dirty syntax translation / conversion reference guide to ease the transition between Python and Julia. This is not meant as a reference to the language. For that you should read the manual.
Some important differences


Arrays in Julia are indexed starting from 1.
In Julia classes (i.e. types) don't own methods. Methods are implementations of generic functions and are invoked in a "static style", i.e. instead of Python's str1.rstrip(), we will have rstrip( str1 ), instead of file1.close(), close( file1 ).

Some important similarities.


## matlabtojulia.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              7 stars
            
          
                cuckookernel
                / matlabtojulia.md
            
            
              Last active
              March 18, 2018 20:20
            
              
                MATLAB to Julia quick translation/conversion reference guide
              
          
    A quick and dirty MATLAB to Julia translation/conversion reference guide.

This is not meant as a reference to the language. For that you should read the manual
Important Diferences

The first few are drawn from here

Use brackets to index into vectors and matrices, i.e. do v[i] instead of v(i).
Array assigment is done by reference, i.e after A = B modifying A will modify B!
One dimensional vectors are column vectors by default.
[17, 42, 25] and [17;42;25] both create a column vector. To create a row vector do [17 42 25], this is really a 1-by-3 (two-dimensional matrix).


## titanic_data_load.py
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# tcxp.py and titanic_preproc.py are available at:
#    https://github.com/YuxiGlobal/data-analytics/tree/master/tree_classif_explain
from tcxp import rf_explain, as_pyplot_figure
from titanic_preproc import preproc

train_df = pd.read_csv( "c:/tmp/titanic/train.csv" )
train_df, train_Y = preproc( train_df )

## titanic_train.py
rfc = RandomForestClassifier( n_estimators=100, max_depth=5 )
rfc.fit( train_df, train_Y)
train_pred = rfc.predict( train_df )
accuracy = (train_pred == train_Y).sum() / len( train_pred )

## rf_explain.py
tc_exps, p0  = rf_explain( rfc, train_df)

## as_pyplot_figure.py
as_pyplot_figure( tc_exps[1], p0, train_df.columns, 'Passenger 1')

## stalker_stalkee_pandas.py
checkins_by_loc = (checkins_df[['user_id', 'checkin_ts', 'location_id']]
                      .set_index('location_id') )

chin_pairs = checkins_by_loc.join( checkins_by_loc, lsuffix='_ee', rsuffix='_er' )

pairs_filtered = (chin_pairs[(chin_pairs.checkin_ts_ee < chin_pairs.checkin_ts_er) &
                             (chin_pairs.user_id_ee != chin_pairs.user_id_er )]
                      .rename( columns= {"user_id_er" : "stalker",
                                         "user_id_ee" : "stalkee" })
                      .reset_index()

## stalkee_stalker_turicreate_1.py
import turicreate as tc

checkins = ( tc.SFrame.read_csv( 'Gowalla_totalCheckins.txt',
                                 delimiter='\t', header=False )
                .rename( {'X1': 'user_id', 'X2' : 'checkin_ts',
                          'X3': 'lat', 'X4' : 'lon',
                          'X5': 'location_id'} )
  [["user_id", "location_id", "checkin_ts"]] )

## stalkee_stalker_turicreate_2.py
chin_ps = ( checkins.join( checkins, on = 'location_id' )
                    .rename( {'checkin_ts'   : 'checkin_ts_ee',
                              'checkin_ts.1' : 'checkin_ts_er',
                              'user_id'      : 'stalkee' ,
                              'user_id.1'    : 'stalker' } ) )

pairs_filtered = chin_ps[ (chin_ps['checkin_ts_ee'] < chin_ps['checkin_ts_er']) &
                          (chin_ps['stalkee'] != chin_ps['stalker]) ]


## stalkee_stalker_turicreate_3.py
final_result = ( pairs_filtered[['stalkee', 'stalker', 'location_id']]
                    .unique()
                    .groupby( ['stalkee','stalker'] ,
                               {"location_count" : agg.COUNT })
                    .topk( 'location_count', k=5 )
                    .materialize() )

print( final_result )
	import pandas as pd
	from sklearn.ensemble import RandomForestClassifier

	# tcxp.py and titanic_preproc.py are available at:
	# https://github.com/YuxiGlobal/data-analytics/tree/master/tree_classif_explain
	from tcxp import rf_explain, as_pyplot_figure
	from titanic_preproc import preproc

	train_df = pd.read_csv( "c:/tmp/titanic/train.csv" )
	train_df, train_Y = preproc( train_df )
	rfc = RandomForestClassifier( n_estimators=100, max_depth=5 )
	rfc.fit( train_df, train_Y)
	train_pred = rfc.predict( train_df )
	accuracy = (train_pred == train_Y).sum() / len( train_pred )
	checkins_by_loc = (checkins_df[['user_id', 'checkin_ts', 'location_id']]
	.set_index('location_id') )

	chin_pairs = checkins_by_loc.join( checkins_by_loc, lsuffix='_ee', rsuffix='_er' )

	pairs_filtered = (chin_pairs[(chin_pairs.checkin_ts_ee < chin_pairs.checkin_ts_er) &
	(chin_pairs.user_id_ee != chin_pairs.user_id_er )]
	.rename( columns= {"user_id_er" : "stalker",
	"user_id_ee" : "stalkee" })
	.reset_index()
	import turicreate as tc

	checkins = ( tc.SFrame.read_csv( 'Gowalla_totalCheckins.txt',
	delimiter='\t', header=False )
	.rename( {'X1': 'user_id', 'X2' : 'checkin_ts',
	'X3': 'lat', 'X4' : 'lon',
	'X5': 'location_id'} )
	[["user_id", "location_id", "checkin_ts"]] )
	chin_ps = ( checkins.join( checkins, on = 'location_id' )
	.rename( {'checkin_ts' : 'checkin_ts_ee',
	'checkin_ts.1' : 'checkin_ts_er',
	'user_id' : 'stalkee' ,
	'user_id.1' : 'stalker' } ) )

	pairs_filtered = chin_ps[ (chin_ps['checkin_ts_ee'] < chin_ps['checkin_ts_er']) &
	(chin_ps['stalkee'] != chin_ps['stalker]) ]
	final_result = ( pairs_filtered[['stalkee', 'stalker', 'location_id']]
	.unique()
	.groupby( ['stalkee','stalker'] ,
	{"location_count" : agg.COUNT })
	.topk( 'location_count', k=5 )
	.materialize() )

	print( final_result )