def create_corpus(result): | |
unique_docid=result['docid'].unique() | |
condition=df['docid'].isin(unique_docid) | |
corpus=df[condition].reset_index(drop=True) | |
corpus=corpus.drop(columns='url') | |
print('Number of Rows=>',len(corpus)) | |
return corpus | |
training_corpus=create_corpus(training_result) | |
training_corpus.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment