Nicolas nkthiebaut

## pyspark_dataframe_register.py
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.sql import DataFrame as SparkDataFrame

sc = SparkConf()
sc.set("spark.driver.memory", "4g")
ss = SparkSession.builder.master("local[4]").config(conf=sc).getOrCreate()

df = ss.createDataFrame([(1, "kevin"), (2, "steph")], ["id", "name"])
df.createOrReplaceTempView("players")

## test_rollbar_setup.py
from unittest.mock import patch

import rollbar

@patch("rollbar.log.exception")
def test_rollbar_connection(log_exception_mock):
    """
    Test sending an exception to Rollbar. The Rollbar Python SDK reports requests
    errors through exceptions logs. Here we check that the exception function
    of the logging module has NOT been called after reporting an exception to Rollbar.

## xkcd-trend.py
"""Based on https://github.com/Pold87/academic-keyword-occurrence"""
import re
import urllib
from functools import partial
from typing import Iterable
from urllib.parse import urlencode
from urllib.request import Request, build_opener

import matplotlib
import matplotlib.pyplot as plt

## plot_top_k_accuracies
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

plt.xkcd()

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)

## soc
SOC_MAJOR_GROUPS = {
    "11": "Management Occupations",
    "13": "Business and Financial Operations Occupations",
    "15": "Computer and Mathematical Occupations",
    "17": "Architecture and Engineering Occupations",
    "19": "Life, Physical, and Social Science Occupations",
    "21": "Community and Social Service Occupations",
    "23": "Legal Occupations",
    "25": "Education, Training, and Library Occupations",
    "27": "Arts, Design, Entertainment, Sports, and Media Occupations",

## commit.APPLESCRIPT
on run
	set result to text returned of (display dialog "Enter github password:" default answer "" with hidden answer)
	do shell script "cd /Users/nicolas/Google\\ Drive/notes && git add . && git commit -m new_commit && git push https://nkthiebaut:" & result & "@github.com/nkthiebaut/notes.git"
	say "Notes successfully committed and pushed"
end run

## hyperopt.py
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


def hyperopt_train_test(params):
    t = params['type']
    del params['type']
    if t == 'gb':
        clf = GradientBoostingClassifier(**params)

## plot_graph.py
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import networkx as nx


def plot_graph(df, directed=False):
    graph_engine = nx.DiGraph() if directed else None
    G = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr=True, create_using=graph_engine)
    costs = G.edges.data('cost')

## pandas_json_to_dataframe.py
# inspired by https://stackoverflow.com/a/50658993/5174617
import pandas as pd
import json

df = pd.DataFrame([['0 {"a":"1","b":"2","c":"3"}'],['1 {"a" :"4","b":"5","c":"6"}']], columns=['json'])
exploded_df = df['json'].apply(json.loads).apply(pd.Series)

## logger.py
# Official doc: https://docs.python.org/3/howto/logging-cookbook.html
import os

import logging

from logging import StreamHandler
from logging import Formatter

LOGS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../logs')
if not os.path.exists(LOGS_DIR):
	from pyspark import SparkConf
	from pyspark.sql import SparkSession
	from pyspark.sql import DataFrame as SparkDataFrame

	sc = SparkConf()
	sc.set("spark.driver.memory", "4g")
	ss = SparkSession.builder.master("local[4]").config(conf=sc).getOrCreate()

	df = ss.createDataFrame([(1, "kevin"), (2, "steph")], ["id", "name"])
	df.createOrReplaceTempView("players")
	from unittest.mock import patch

	import rollbar

	@patch("rollbar.log.exception")
	def test_rollbar_connection(log_exception_mock):
	"""
	Test sending an exception to Rollbar. The Rollbar Python SDK reports requests
	errors through exceptions logs. Here we check that the exception function
	of the logging module has NOT been called after reporting an exception to Rollbar.
	"""Based on https://github.com/Pold87/academic-keyword-occurrence"""
	import re
	import urllib
	from functools import partial
	from typing import Iterable
	from urllib.parse import urlencode
	from urllib.request import Request, build_opener

	import matplotlib
	import matplotlib.pyplot as plt
	import matplotlib.pyplot as plt
	from sklearn.datasets import load_digits
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split

	plt.xkcd()

	X, y = load_digits(return_X_y=True)
	X_train, X_test, y_train, y_test = train_test_split(X, y)
	SOC_MAJOR_GROUPS = {
	"11": "Management Occupations",
	"13": "Business and Financial Operations Occupations",
	"15": "Computer and Mathematical Occupations",
	"17": "Architecture and Engineering Occupations",
	"19": "Life, Physical, and Social Science Occupations",
	"21": "Community and Social Service Occupations",
	"23": "Legal Occupations",
	"25": "Education, Training, and Library Occupations",
	"27": "Arts, Design, Entertainment, Sports, and Media Occupations",
	on run
	set result to text returned of (display dialog "Enter github password:" default answer "" with hidden answer)
	do shell script "cd /Users/nicolas/Google\\ Drive/notes && git add . && git commit -m new_commit && git push https://nkthiebaut:" & result & "@github.com/nkthiebaut/notes.git"
	say "Notes successfully committed and pushed"
	end run
	from sklearn.model_selection import cross_val_score
	from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
	from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


	def hyperopt_train_test(params):
	t = params['type']
	del params['type']
	if t == 'gb':
	clf = GradientBoostingClassifier(**params)
	import matplotlib as mpl
	import matplotlib.pyplot as plt
	%matplotlib inline
	import networkx as nx


	def plot_graph(df, directed=False):
	graph_engine = nx.DiGraph() if directed else None
	G = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr=True, create_using=graph_engine)
	costs = G.edges.data('cost')
	# inspired by https://stackoverflow.com/a/50658993/5174617
	import pandas as pd
	import json

	df = pd.DataFrame([['0 {"a":"1","b":"2","c":"3"}'],['1 {"a" :"4","b":"5","c":"6"}']], columns=['json'])
	exploded_df = df['json'].apply(json.loads).apply(pd.Series)
	# Official doc: https://docs.python.org/3/howto/logging-cookbook.html
	import os

	import logging

	from logging import StreamHandler
	from logging import Formatter

	LOGS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../logs')
	if not os.path.exists(LOGS_DIR):