Last active
January 23, 2018 15:49
-
-
Save bwv988/b2ad7bbf98211d83a9e82c66cf228981 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# I want to clean col2. This records equipemnts. | |
data = {'col1': [1, 2, 4], 'col2': ["e70", "e70 e80 ua", "b20"]} | |
# Create a Pandas data frame. | |
df = pd.DataFrame(data) | |
# Quick helper function to flatten a nested list. | |
# FIXME: Should consider using itertools for Python >= 2.6 | |
# https://docs.python.org/3/library/itertools.html#itertools.product | |
flatten = lambda l: [item for sublist in l for item in sublist] | |
equip_raw = df["col2"] | |
# Split strings on spaces. | |
# FIXME: Might need to consider more than one space; remove empty strings. | |
equip_raw = [e.split(" ") for e in equip_raw] | |
# Get unique equipment column names. | |
equip_cols = np.unique(flatten(equip_raw)) | |
# Now add a column for each eq. type. | |
tmp = df.reindex(columns = equip_cols) | |
tmp.fillna(value = False, inplace = True) | |
# Convert to boolean. | |
tmp = tmp.astype(np.bool) | |
# Merge | |
df.join(tmp) | |
# Now set to true if in equip in col2 | |
# FIXME: Perhaps not the most elegant solution. | |
for col in equip_cols: | |
df[col] = df["col2"].str.contains(col) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment