Skip to content

Instantly share code, notes, and snippets.

View raghavrv's full-sized avatar

(Venkat) Raghav, Rajagopalan raghavrv

View GitHub Profile
@raghavrv
raghavrv / OvO_with_is_fitted_prop.ipynb
Last active December 28, 2015 16:11
Decorator Function for fitted parameter
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@raghavrv
raghavrv / part_fit_test.ipynb
Last active December 28, 2015 16:12
Test for non-reset upon partial fit/reset upon fit
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@raghavrv
raghavrv / silhouette_plot_kmeans.py
Created January 10, 2015 13:07
The Silhouette plot for different n_cluster values when clustered using KMeans
# Generating the sample data from make_blobs
X, y = make_blobs(n_samples=100,
n_features=2,
centers=4,
cluster_std=1.0,
center_box=(-10.0, 10.0),
shuffle=True,
random_state=0) # For reproducibility
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.patches import Arc
from math import sqrt
fig = plt.figure()
ax = fig.add_subplot(111)
fig.set_size_inches(10, 5)
@raghavrv
raghavrv / codechef_spoj_hackerearth_userstats.py
Last active August 25, 2016 13:50
Get n_solved / n_submitted by scraping the competitive coding websites
import requests
import bs4
def get_codechef_userstats(user_id):
soup = bs4.BeautifulSoup(
requests.get('https://www.codechef.com/users/%s' % user_id).content)
# Parse the data in the first td tag of the 2nd tr tag of the table with id "problem_stats"
stats_row = soup.find('table', {'id': 'problem_stats'}).findAll('tr')[1].findAll('td')
n_solved = int(stats_row[0].text)
n_submitted = int(stats_row[2].text)
@raghavrv
raghavrv / .gitignore
Last active February 22, 2016 14:16
RF Missing Value Benchmark script
*swp
*ipynb_checkpoints
*build
*.dat
@raghavrv
raghavrv / display_cpp_in_ipynb.py
Created February 17, 2016 13:59
Display CPP code with syntax highlighting in Jupyter Notebook
from IPython.display import Markdown, HTML, display_markdown
from glob import glob
cpp_markdown_template = """```cpp\n%s\n```\n<hr>"""
for i, problem in enumerate(
glob("/media/rvraghav93/code/projects/competitive_programming/codechef/*.cpp")):
with open(problem) as f:
code = f.read().splitlines()
url = code[0]
@raghavrv
raghavrv / adult_snippet_2.py
Last active February 17, 2016 15:34
Snippet to load and vectorize the adult dataset with missing values - https://archive.ics.uci.edu/ml/datasets/Adult
# Load categories of categorical features from descr
descr = """@attribute Age real [17.0, 90.0]
@attribute Workclass {Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked}
@attribute Fnlwgt real [12285.0, 1490400.0]
@attribute Education {Bachelors, Some-college, 11th, HS-grad, Prof-school, Assoc-acdm, Assoc-voc, 9th, 7th-8th, 12th, Masters, 1st-4th, 10th, Doctorate, 5th-6th, Preschool}
@attribute Education-num real [1.0, 16.0]
@attribute Marital-status {Married-civ-spouse, Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse}
@attribute Occupation {Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces}
@raghavrv
raghavrv / census_income_dataset.py
Created February 22, 2016 14:38
Loading the Census Income Dataset in python
# The census income dataset with 0.2 million datapoints
# http://sci2s.ugr.es/keel/dataset.php?cod=195
# Load categories of categorical features from descr
descr = """@attribute Atr-0 integer[0,90]
@attribute Atr-1 {Self-employed-not_incorporated,Not_in_universe,Private,Local_government,Federal_government,Self-employed-incorporated,State_government,Never_worked,Without_pay}
@attribute Atr-2 integer[0,51]
@attribute Atr-3 integer[0,46]
@attribute Atr-4 {Some_college_but_no_degree,10th_grade,Children,Bachelors_degree(BA_AB_BS),High_school_graduate,Masters_degree(MA_MS_MEng_MEd_MSW_MBA),Less_than_1st_grade,Associates_degree-academic_program,7th_and_8th_grade,12th_grade_no_diploma,Associates_degree-occup_/vocational,Prof_school_degree_(MD_DDS_DVM_LLB_JD),5th_or_6th_grade,11th_grade,Doctorate_degree(PhD_EdD),9th_grade,1st_2nd_3rd_or_4th_grade}
@raghavrv
raghavrv / value_dropper.py
Last active June 13, 2016 15:23
Generate random missingness in the data based on MCAR/MNAR strategy. (May get merged into scikit-learn soon)
from sklearn.utils import check_X_y, check_random_state
import numpy as np
def drop_values(X, y=None, missing_mask=None,
missing_values="NaN",
missing_fraction=0.1,
label_correlation=1.0,
n_labels=1,
labels=None,
missing_mask_only=False,