Skip to content

Instantly share code, notes, and snippets.

@baatout
baatout / tor_handler.py
Created Jul 12, 2018
A part from the crawler tutorial
View tor_handler.py
from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen
from stem import Signal
from stem.control import Controller
class TorHandler:
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'}
@baatout
baatout / ip_renewal.py
Created Jul 12, 2018
A part of the crawler tutorial
View ip_renewal.py
...
wait_time = 2
number_of_ip_rotations = 3
tor_handler = TorHandler()
ip = tor_handler.open_url('http://icanhazip.com/')
print('My first IP: {}'.format(ip))
# Cycle through the specified number of IP addresses via TOR
View run_add_float.py
from adder import add
z = add(4.0, 6.0)
@baatout
baatout / adder.py
Created Jul 1, 2018
Typed add function
View adder.py
def add(a: int, b: int) -> int:
return a + b
View run_add_int.py
from adder import add
x = add(4, 6)
@baatout
baatout / adder.py
Last active Jul 1, 2018
A function with no types
View adder.py
def add(a, b):
return a + b
View run_pickle.py
# run this anywhere and change the pipeline.pk path
import dill
from pandas import read_csv
url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
View pickle_export.py
# with X_train, X_test, Y_train, Y_test
import dill
from sklearn_pandas import DataFrameMapper
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = Pipeline([
View pmml_failure.py
# with X_train, X_test, Y_train, Y_test
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = PMMLPipeline([
("mapper", DataFrameMapper([
View pmml_output.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.3">
<Header>
<Application name="JPMML-SkLearn" version="1.5.6"/>
<Timestamp>2018-09-08T11:13:03Z</Timestamp>
</Header>
<MiningBuildTask>
<Extension>PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False,
features=[(['mass'], FunctionTransformer(accept_sparse=False, func=&lt;ufunc 'log1p'&gt;,
inv_kw_args=None, inverse_func=None, kw_args=None,