Skip to content

Instantly share code, notes, and snippets.

@keitazoumana
Last active September 15, 2022 00:05
Show Gist options
  • Save keitazoumana/4404b4ec2f251ffde3477d797e159239 to your computer and use it in GitHub Desktop.
Save keitazoumana/4404b4ec2f251ffde3477d797e159239 to your computer and use it in GitHub Desktop.
import pandas as pd
def preprocess_data(data_path, sample_size):
# Read the data from specific path
data = pd.read_csv(data_path, low_memory=False)
# Drop articles without Abstract
data = data.dropna(subset = ['abstract']).reset_index(drop = True)
# Get "sample_size" random articles
data = data.sample(sample_size)[['abstract']]
return data
# Read data & preprocess it
data_path = "./data/cord19_source_data.csv"
source_data = preprocess_data(data_path, 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment