masterdezign/pandas101.py

## pandas101.py
import pandas as pd
# .read_csv()
# .shape
# .head(N) .tail(N)
# .dtypes
# .loc[3, 'sepal_length'] .iloc
# .to_csv()


# pd.set_option('max_columns', 2)
# pd.options.display.float_format = '{:,.2f}'.format

# .isna()
# .cumsum(skipna=False)

# df['Profit'] = df.apply(lambda x:..., axis=1)
# df['Xx'].map()
# df.applymap(lambda x: len(str(x)))  # To every element


# .pivot .stack


# .plot()
# .plot.area(stacked=False)
# .boxplot()
# .describe()
# .corr()


# Packages
# - pandas_profiling
# from pandas_profiling import ProfileReport
# profile = ProfileReport(df, title="Title")
# profile.to_notebook_iframe()


# Dask - parallel computing
# import dask.dataframe as dd
# df = dd.read_csv('...')

# Koalas -> Pandas API to Apache Spark
	import pandas as pd
	# .read_csv()
	# .shape
	# .head(N) .tail(N)
	# .dtypes
	# .loc[3, 'sepal_length'] .iloc
	# .to_csv()


	# pd.set_option('max_columns', 2)
	# pd.options.display.float_format = '{:,.2f}'.format

	# .isna()
	# .cumsum(skipna=False)

	# df['Profit'] = df.apply(lambda x:..., axis=1)
	# df['Xx'].map()
	# df.applymap(lambda x: len(str(x))) # To every element


	# .pivot .stack


	# .plot()
	# .plot.area(stacked=False)
	# .boxplot()
	# .describe()
	# .corr()


	# Packages
	# - pandas_profiling
	# from pandas_profiling import ProfileReport
	# profile = ProfileReport(df, title="Title")
	# profile.to_notebook_iframe()


	# Dask - parallel computing
	# import dask.dataframe as dd
	# df = dd.read_csv('...')

	# Koalas -> Pandas API to Apache Spark