Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import randint
from sklearn.neighbors import NearestNeighbors
import math
import random
#Read in data
@jmsword
jmsword / naive_bayes.py
Created February 11, 2017 21:41
Naive Bayes
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
#I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3'
#when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally
df = pd.read_csv('ideal_weight.csv')
#Remove single '' from coulmn names
df.rename(columns=lambda x: x.replace("'", ""), inplace=True)
@jmsword
jmsword / random_forest.py
Created February 11, 2017 20:04
Random Forest
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as skm
import pylab as pl
#Read in the column names for the dataset
feat = pd.read_csv('features.txt', delim_whitespace=True, header=None, index_col=False)
#Name the columns to isolate all dataset column names in one column
@jmsword
jmsword / cross_validation.py
Created February 5, 2017 21:46
Cross Validation Help
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
#Remove '%' from 'Interest.Rate' column and contert to number
@jmsword
jmsword / overfitting.py
Created January 31, 2017 15:31
Over-fitting practice
import numpy as np
import statsmodels.formula.api as smf
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
#Set seed for reproducable results (what does this mean?)
np.random.seed(414)
@jmsword
jmsword / education.py
Created January 28, 2017 17:56
Education
from bs4 import BeautifulSoup
import requests
import pandas as pd
import sqlite3 as lite
import csv
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
@jmsword
jmsword / temperature.py
Created January 24, 2017 16:22
temperature
import requests
import sqlite3 as lite
import time
import datetime
import collections
import pandas as pd
#Cities to analyze
cities = {"Los_Angeles": "34.0522,-118.2437",
"Miami": "25.7617,-80.1918",
@jmsword
jmsword / citibike.py
Created January 19, 2017 14:01
citibike help
import requests
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3 as lite
import time
from dateutil.parser import parse
import collections
@jmsword
jmsword / time_series.py
Created January 13, 2017 04:33
Time Series Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
df = pd.read_csv('LoanStats3b.csv', header=1, low_memory=False)
df['issue_d_format'] = pd.to_datetime(df['issue_d'])
dfts = df.set_index('issue_d_format')
@jmsword
jmsword / multivariant.py
Created January 12, 2017 04:11
multivariant anal;ysis
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
df = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
df['annual_inc'] = df['Monthly.Income'].map(lambda x: x * 12)
df['int_rate'] = df['Interest.Rate'].map(lambda x: round(float(x.rstrip('%')) / 100, 4))
df['home_ownership'] = df['Home.Ownership']