Skip to content

Instantly share code, notes, and snippets.

@baatout
baatout / tor_handler.py
Created July 12, 2018 22:01
A part from the crawler tutorial
from urllib.request import ProxyHandler, build_opener, install_opener, Request, urlopen
from stem import Signal
from stem.control import Controller
class TorHandler:
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'}
@baatout
baatout / ip_renewal.py
Created July 12, 2018 21:58
A part of the crawler tutorial
...
wait_time = 2
number_of_ip_rotations = 3
tor_handler = TorHandler()
ip = tor_handler.open_url('http://icanhazip.com/')
print('My first IP: {}'.format(ip))
# Cycle through the specified number of IP addresses via TOR
from adder import add
z = add(4.0, 6.0)
@baatout
baatout / adder.py
Created July 1, 2018 12:11
Typed add function
def add(a: int, b: int) -> int:
return a + b
from adder import add
x = add(4, 6)
@baatout
baatout / adder.py
Last active July 1, 2018 12:05
A function with no types
def add(a, b):
return a + b
@baatout
baatout / run_pickle.py
Last active September 8, 2018 11:24
Run pickle
# run this anywhere and change the pipeline.pk path
import dill
from pandas import read_csv
url = "https://raw.githubusercontent.com/baatout/ml-in-prod/master/pima-indians-diabetes.csv"
features = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']
label = 'label'
dataframe = read_csv(url, names=features + [label])
X = dataframe[features]
Y = dataframe[label]
@baatout
baatout / pickle_export.py
Last active September 8, 2018 11:22
Pickle export
# with X_train, X_test, Y_train, Y_test
import dill
from sklearn_pandas import DataFrameMapper
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = Pipeline([
@baatout
baatout / pmml_failure.py
Last active September 8, 2018 11:18
PMML failure
# with X_train, X_test, Y_train, Y_test
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
def is_adult(x): return x > 18
clf = PMMLPipeline([
("mapper", DataFrameMapper([
@baatout
baatout / pmml_output.xml
Last active September 8, 2018 11:13
PMML output
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" xmlns:data="http://jpmml.org/jpmml-model/InlineTable" version="4.3">
<Header>
<Application name="JPMML-SkLearn" version="1.5.6"/>
<Timestamp>2018-09-08T11:13:03Z</Timestamp>
</Header>
<MiningBuildTask>
<Extension>PMMLPipeline(steps=[('mapper', DataFrameMapper(default=False, df_out=False,
features=[(['mass'], FunctionTransformer(accept_sparse=False, func=&lt;ufunc 'log1p'&gt;,
inv_kw_args=None, inverse_func=None, kw_args=None,