gchavez2/Dataframe_exploration.py

## Dataframe_exploration.py
df = pd.read_csv(FILENAME, names=["id", "title", "text"],
                 escapechar='\\', encoding='utf-8', header=0)

print(df.describe()) # Summary statistics (count, unique, top, freq) for every column of the dataframe
print(df.info()) # Information about each column on the dataframe, and memory usage

print(df.head()) # Show first 5 rows of dataframe
print(df.head(3)) # Show first 3 rows of dataframe

# Head with only a few columns, with random sampling of 10 rows
df[['col1', 'col2']].sample(10)

print(df.columns) # Name of the columns of the dataframe

print(df.shape) # Size of the dataframe [nrows x ncols]

# Display all columns on row 2, by index location (iloc)
print(df.iloc[[2,:]])
	df = pd.read_csv(FILENAME, names=["id", "title", "text"],
	escapechar='\\', encoding='utf-8', header=0)

	print(df.describe()) # Summary statistics (count, unique, top, freq) for every column of the dataframe
	print(df.info()) # Information about each column on the dataframe, and memory usage

	print(df.head()) # Show first 5 rows of dataframe
	print(df.head(3)) # Show first 3 rows of dataframe

	# Head with only a few columns, with random sampling of 10 rows
	df[['col1', 'col2']].sample(10)

	print(df.columns) # Name of the columns of the dataframe

	print(df.shape) # Size of the dataframe [nrows x ncols]

	# Display all columns on row 2, by index location (iloc)
	print(df.iloc[[2,:]])