Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# importing the dataset
import pandas as pd
import matplotlib.pyplot as plt
housing = pd.read_csv("housing.csv")
# looking at the dataset
print(housing.head())
print(housing.info())
print(housing.ocean_proximity.value_counts())
print(housing.describe())
# spliting the data into training and test
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(housing, test_size=0.2, random_state=42)
# visualizing the histogram for income categories
import numpy as np
housing['income_cat'] = pd.cut(housing['median_income'], bins=[0., 1.5, 3.0, 4.5, 6., np.inf], labels=[1, 2, 3, 4, 5])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment