Skip to content

Instantly share code, notes, and snippets.

View AdroitAnandAI's full-sized avatar

Anand P V AdroitAnandAI

View GitHub Profile
@AdroitAnandAI
AdroitAnandAI / pre.ipynb
Last active December 14, 2018 00:37
crime rate
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Load haberman.csv into a pandas dataFrame.
crimeData = pd.read_csv("crime_v2.csv")
# See the input data.
crimeData.head(5)
@AdroitAnandAI
AdroitAnandAI / analysis
Created December 15, 2018 00:15
analysis
# Check characteristics of data.
crimeData.info()
@AdroitAnandAI
AdroitAnandAI / analysis2.py
Created December 15, 2018 00:16
analysis2
# To find stats of each feature.
crimeData.describe()
# This row has to be dropped because of data anomaly.
crimeData[crimeData['prbarr'] > 1]
# The location cannot be both west and central together.
crimeData[crimeData['west']+crimeData['central'] > 1]
@AdroitAnandAI
AdroitAnandAI / analysis1.py
Created December 15, 2018 00:17
analysis1
# Check characteristics of data.
crimeData.info()
@AdroitAnandAI
AdroitAnandAI / analysis3.py
Created December 15, 2018 00:18
analysis3
# To check how many zeros in each column
(crimeData==0).sum()
# Numerical distribution of dependant variable: Crime Rate
sns.distplot(crimeData['crmrte'], color='g', bins=100, hist_kws={'alpha': 0.4})
crimeData.hist(figsize=(16, 20), bins=40, xlabelsize=8, ylabelsize=8);
plt.figure(figsize=(25,25))
for idx, col in enumerate(crimeData.columns[2:]): #excluding county & year
counts, bin_edges = np.histogram(crimeData[col], bins=10, density = True)
pdf = counts/(sum(counts))
cdf = np.cumsum(pdf)
plt.subplot(7, 4, idx+1)
plt.plot(bin_edges[1:],pdf, label='PDF')
plt.plot(bin_edges[1:],cdf, label='CDF')
plt.title(col + " CDF",fontsize=15)
# To plot the correlation of all features against crime rate
for i in range(0, len(crimeData.columns), 5):
sns.pairplot(data=crimeData,
x_vars=crimeData.columns[i:i+5],
y_vars=['crmrte'])