Skip to content

Instantly share code, notes, and snippets.

@devamitranjan
Created December 12, 2020 19:14
Show Gist options
  • Save devamitranjan/f41a1ca6ea8824bfd757bcc191987b6c to your computer and use it in GitHub Desktop.
Save devamitranjan/f41a1ca6ea8824bfd757bcc191987b6c to your computer and use it in GitHub Desktop.
import re
import pandas as pd
def create_df_author_comments1(data):
author_comment = []
author=[]
for item in data:
s = re.sub('\\n[0-9]+ (days|weeks|months|years|week|month|year|hours|hour|day|minutes|minute) ago' ,'',item)
s = re.sub('\\nREPLY','',s)
s = re.sub('\.\\n[0-9][\.]*[0-9]*[A-Z]*','',s)
t = s.split('\n')
t[0] = re.sub('\(edited\)','',t[0])
author.append(t[0])
s = "\n".join(t[1:])
author_comment.append(s)
df = pd.DataFrame(author_comment,index=author,columns=["Comments"])
return df
author_comment = create_df_author_comments1(data)
author_comment
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment