Skip to content

Instantly share code, notes, and snippets.

@mcpar-land
Created March 1, 2023 19:43
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mcpar-land/c6ada4a4bb87dc4855351b80a1de8a0d to your computer and use it in GitHub Desktop.
Save mcpar-land/c6ada4a4bb87dc4855351b80a1de8a0d to your computer and use it in GitHub Desktop.
Pandas Unique Values By Column
def uniques_by_col(df: pd.DataFrame, drop_columns=[]) -> pd.DataFrame:
u = {}
for col in df.columns.tolist():
if str(col) not in drop_columns:
u[col] = pd.Series([(i[0], v) for i, v in dftxt[[col]].value_counts().iteritems()], dtype="object")
df2 = pd.DataFrame.from_dict(u, orient="index").T.replace({None: np.nan})
col_order = []
for col in df2.columns.tolist():
if str(col) not in drop_columns:
df2[[(str(col), "Val"), (str(col), "Count")]] = pd.DataFrame(df2[col].tolist(), index=df2.index)
df2[(str(col), "Count")] = df2[(str(col), "Count")].astype("Int64")
col_order.append((str(col), "Val"))
col_order.append((str(col), "Count"))
df2 = df2[col_order]
df2.columns = pd.MultiIndex.from_tuples(df2.columns, names=["Column", ""])
return df2.dropna(how="all")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment