Skip to content

Instantly share code, notes, and snippets.

@jmquintana79
Last active December 13, 2020 20:41
Show Gist options
  • Save jmquintana79/7f6f5aa391157c58ecb8 to your computer and use it in GitHub Desktop.
Save jmquintana79/7f6f5aa391157c58ecb8 to your computer and use it in GitHub Desktop.
create pandas dataframe
import numpy as np
import pandas as pd
# alternative faster: Pandas on Ray
import ray.dataframe as pd
## ONE NUMPY ARRAY TO PANDAS DATAFRAME
# data
ldata = [[1,2],[7,3]]
# list of list to numpy array
npdata = np.array(ldata)
# np array to df pandas
DF = pd.DataFrame(npdata, index=["row1","row2"], columns=["col1","col2"])
# get dtypes
DF.dtypes
## TWO NUMPY ARRYES TO PANDAS DATAFRAME
# data
x = np.array([1,2,3])
y = np.array([4,5,6])
# np arrays to df pandas
DF = pd.DataFrame({'x':x, 'y':y})
# SET INDEX
indexed_df = df.set_index(['A', 'B'])
# SET FORMAT
df.index = df.index.astype(str) # str or other format
df.index = pd.to_datetime(df.index) # datetime format
# RENAME INDEX
DF.index.rename(name, inplace=True)
# GET LIST OF INDEX OF DATAFRAME
lindex = DF.index.tolist()
# RESET INDEX (and drop if it is required)
DF = DF.reset_index(drop=False,inplace=False)
# display head (first lines) of DATAFRAME
DF.head(n=5)
# Getting last rows
DF.tail(n=5)
# DROP COLUMN OF DATAFRAME PANDAS
df.drop('column_name', axis=1, inplace=True)
df.drop(df.columns[[0, 1, 3]], axis=1) # by index
# RENAME COLUMNS OF DATAFRAME PANDAS
df.rename(columns={'old1': 'new1', 'old2': 'new1'}, inplace=True)
# Changing column labels.
df.columns = ['water_year','rain_octsep', 'outflow_octsep',
'rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug']
# Create CATEGORICAL object
pd.Categorical(array)
# Create CONTINGENCE TABLE from 2 arrays
CONTINGENCE_TABLE = pd.crosstab(array1,array2, margins = True)
# Create array of consecutive of dates
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D') # format: Pandas Timestamp
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D').to_pydatetime() # format: Python Datetime
"""
Possible frequencies:
Y: yearly
M: monthly
W: weekly
D: daily
H: hourly
'30min': each 30 minutes (and so on)
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment