Aaron Richter rikturr

## parallel-post-fit-scheduler-issues.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rikturr
                / parallel-post-fit-scheduler-issues.ipynb
            
            
              Created
              July 1, 2021 19:36
            
              
                parallel-post-fit-scheduler-issues
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## sklearn-n-jobs-estimators.py
from sklearn.utils import all_estimators
import inspect

has_n_jobs = []
for est in all_estimators():
    s = inspect.signature(est[1])
    if 'n_jobs' in s.parameters:
        has_n_jobs.append(est)
print(has_n_jobs)

## dask-joblib-issues.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rikturr
                / dask-joblib-issues.ipynb
            
            
              Created
              February 5, 2021 21:43
            
              
                dask-scikit-learn-joblib-issues
              
          
    Sorry, this is too big to display.
  
 
## aaron-job-search.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                rikturr
                / aaron-job-search.ipynb
            
            
              Last active
              May 20, 2021 12:43
            
              
                aaron-job-search-analysis
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## aaron-job-search.csv
organization,org_type,position,source,referral,end_stage,num_interviews,date_applied,date_interview1,date_interview2,date_interview3,date_rejected,date_declined,date_accepted,notes
Twitter,Public,Staff machine learning engineer,Referral,1,Reject-resume,0,4/8/2020,,,,4/29/2020,,,
Wikimedia foundation,Non-profit,Machine learning engineer,Search,0,Reject-ghosted,0,4/19/2020,,,,,,,
UNOPS,Government,Predictive Analytics Technical Specialist,Search,0,Reject-ghosted,0,4/19/2020,,,,,,,
Noom,Startup,Senior data scientist,Search,0,Reject-resume,0,4/20/2020,,,,4/23/2020,,,
Memorial Sloan Kettering Cancer Center,Private-L,Lead data scientist,Search,0,Reject-ghosted,0,4/20/2020,,,,,,,
Memorial Sloan Kettering Cancer Center,Private-L,Senior data engineer,Search,0,Reject-resume,0,4/20/2020,,,,5/13/2020,,,
Memorial Sloan Kettering Cancer Center,Private-L,Data engineer,Search,0,Decline-resume,0,4/20/2020,,,,,6/17/2020,,Took other job
Prominent Edge,Consulting,Lead data scientist,Search,0,Reject-ghosted,0,4/21/2020,,,,,,,
Open

## dask-rapids.py
# notice "dask" in these imports
import dask_cudf
from cuml.dask.ensemble import RandomForestClassifier

taxi = dask_cudf.read_csv(
    's3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv',
    parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime'],
    storage_options={'anon': True},
    assume_missing=True,
)

## saturn-gpu-cluster.py
from dask.distributed import Client
from dask_saturn import SaturnCluster

cluster = SaturnCluster(
  n_workers=3,
  scheduler_size='medium',
  worker_size='g4dnxlarge'
)
client = Client(cluster)

## rapids-random-forest.py
from cuml.ensemble import RandomForestClassifier

# see notebook for prep_df function
taxi_train = prep_df(taxi)

rfc = RandomForestClassifier(n_estimators=100, max_depth=10, seed=42)
rfc.fit(taxi_train[features], taxi_train[y_col])

## rapids-load-data.py
import cudf
import s3fs
s3 = s3fs.S3FileSystem(anon=True)

taxi = cudf.read_csv(
    s3.open('s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv', mode='rb'),
    parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime']
)

## rf-rapids-issues.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rikturr
                / rf-rapids-issues.ipynb
            
            
              Last active
              August 6, 2020 15:00
            
              
                cuml rf predict inf
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from sklearn.utils import all_estimators
	import inspect

	has_n_jobs = []
	for est in all_estimators():
	s = inspect.signature(est[1])
	if 'n_jobs' in s.parameters:
	has_n_jobs.append(est)
	print(has_n_jobs)
	organization,org_type,position,source,referral,end_stage,num_interviews,date_applied,date_interview1,date_interview2,date_interview3,date_rejected,date_declined,date_accepted,notes
	Twitter,Public,Staff machine learning engineer,Referral,1,Reject-resume,0,4/8/2020,,,,4/29/2020,,,
	Wikimedia foundation,Non-profit,Machine learning engineer,Search,0,Reject-ghosted,0,4/19/2020,,,,,,,
	UNOPS,Government,Predictive Analytics Technical Specialist,Search,0,Reject-ghosted,0,4/19/2020,,,,,,,
	Noom,Startup,Senior data scientist,Search,0,Reject-resume,0,4/20/2020,,,,4/23/2020,,,
	Memorial Sloan Kettering Cancer Center,Private-L,Lead data scientist,Search,0,Reject-ghosted,0,4/20/2020,,,,,,,
	Memorial Sloan Kettering Cancer Center,Private-L,Senior data engineer,Search,0,Reject-resume,0,4/20/2020,,,,5/13/2020,,,
	Memorial Sloan Kettering Cancer Center,Private-L,Data engineer,Search,0,Decline-resume,0,4/20/2020,,,,,6/17/2020,,Took other job
	Prominent Edge,Consulting,Lead data scientist,Search,0,Reject-ghosted,0,4/21/2020,,,,,,,
	Open
	# notice "dask" in these imports
	import dask_cudf
	from cuml.dask.ensemble import RandomForestClassifier

	taxi = dask_cudf.read_csv(
	's3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv',
	parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime'],
	storage_options={'anon': True},
	assume_missing=True,
	)
	from dask.distributed import Client
	from dask_saturn import SaturnCluster

	cluster = SaturnCluster(
	n_workers=3,
	scheduler_size='medium',
	worker_size='g4dnxlarge'
	)
	client = Client(cluster)
	from cuml.ensemble import RandomForestClassifier

	# see notebook for prep_df function
	taxi_train = prep_df(taxi)

	rfc = RandomForestClassifier(n_estimators=100, max_depth=10, seed=42)
	rfc.fit(taxi_train[features], taxi_train[y_col])
	import cudf
	import s3fs
	s3 = s3fs.S3FileSystem(anon=True)

	taxi = cudf.read_csv(
	s3.open('s3://nyc-tlc/trip data/yellow_tripdata_2019-01.csv', mode='rb'),
	parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime']
	)