Skip to content

Instantly share code, notes, and snippets.

@sampathweb
Created April 20, 2016 04:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sampathweb/cca49a6ceeb93318f1aafb27addf794c to your computer and use it in GitHub Desktop.
Save sampathweb/cca49a6ceeb93318f1aafb27addf794c to your computer and use it in GitHub Desktop.
df = pd.DataFrame({
"col_1": ["Movie1", "Movie2", "Movie3"],
"films": ["film1, film1.1", "film2", "film3"],
"books": ["book1, book1.1", "book2", "book3, book3.1"]
})
def split_values(row, row_accumulator):
"""
Split the rows into multiple Rows based on Books array.
Extends Films by repeating the last value to the length of Books list.
Based on SO Post - http://stackoverflow.com/questions/12680754/split-pandas-dataframe-string-entry-to-separate-rows
"""
books = row["books"].split(",")
films = row["films"].split(",")
if len(films) < len(books):
# Extend films by copying the last value
films.extend([films[-1] for idx in range(len(books) - len(films))])
for book, film in zip(books[::-1], films[::-1]): # Traverse in Reverse
new_row = row.to_dict()
new_row["books"] = book
new_row["films"] = film
row_accumulator.append(new_row)
new_rows = []
df.apply(split_values, axis="columns", args=(new_rows,))
new_df = pd.DataFrame(new_rows)
new_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment