Skip to content

Instantly share code, notes, and snippets.

View tugloo1's full-sized avatar

Piyush Gupta tugloo1

View GitHub Profile
@tugloo1
tugloo1 / .py
Created January 19, 2018 00:34
df['date_info'] = pd.to_datetime(df['year'] + ' ' + df['periodName'], format="%Y %B")
df['num_of_employees'] = pd.to_numeric(df['value'])
df = df.drop('footnotes', 1)
df = df.drop('period', 1)
df = df.drop('periodName', 1)
df = df.drop('value', 1)
df = df.drop('year', 1)
df.plot(x='date_info', y='num_of_employees')
import arrow
import pprint
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import requests
from matplotlib.axes import Axes
def get_bls_key():
"""
import requests
import pandas as pd
import matplotlib.pyplot as plt
def get_bls_key():
"""
This method will get the BLS API key that is expected in api-key.txt file. That file is in the .gitignore and
each user should have their own API key
/System/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7 /Applications/PyCharmOld.app/Contents/helpers/pydev/pydevd.py --multiproc --qt-support --client 127.0.0.1 --port 54837 --file /Users/piyushgupta/projects/udacityintroML/final_project/poi_id.py
pydev debugger: process 6180 is connecting
Connected to pydev debugger (build 143.2371)
/Library/Python/2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
total number of data points: 146
total number of people in dataset: 146
total number of poi: 18
total number of not poi: 128
import sys
from datetime import date
import quandl
'''sys.argv is a module that has all the inputs of a command line in a list
so for example if you run
python main.py "2018-1-25"
sys.argv would be [main.py', '2018-1-25']
selk = SelectKBest(k=5)
dtc = DecisionTreeClassifier(max_depth=6, random_state=42, min_samples_split=3)
pipline_steps = [
('best_feature', selk),
('decision_tree', dtc)
]
clf = Pipeline(pipline_steps)
from pandas import DataFrame, Series
a = [1, 2, 3, 4]
b = a.append(5)
print(a)
# [1, 2, 3, 4, 5]
print(b)
# None
@tugloo1
tugloo1 / stat.py
Created March 23, 2018 21:59
stats
if obs_mean < null_mean:
pval = 2 * (null_vals < obs_mean).mean()
else:
pval = 2 * (null_vals > obs_mean).mean()
pval
def calculate_path_weight(node, seen_nodes, current_weight):
# Case when we've seen all the nodes
if set(seen_nodes) == set(input_graph.keys()):
return current_weight
to_return_path_weight = 10000
for path_node, path_weight in input_graph[node]:
if path_node not in seen_nodes:
new_seen_nodes = seen_nodes + [path_node]
total_path_weight = calculate_path_weight(path_node, new_seen_nodes, current_weight + path_weight)
if total_path_weight < to_return_path_weight:
@tugloo1
tugloo1 / whatevs.py
Created April 4, 2018 21:32
whatevssss
def get_nodes_parent(tree, node_to_evaluate):
for i, row in enumerate(tree):
if row[node_to_evaluate] == 1:
return i
else:
return None