cydal/clean_data.py

## clean_data.py
import pandas as pd
import numpy as np
import nltk
import string
from cleantext import clean

nltk.download('stopwords')

def clean_text(text):
  clean(text, all=False, extra_spaces=True, lowercase=True, numbers=True, punct=True)
  for word in text.split():
    for eachkey in keywords:
      if word in keywords[eachkey]:
        keycount[eachkey][word] += 1

core_df = core_df["abstract"].apply(lambda x: clean_text(x))
	import pandas as pd
	import numpy as np
	import nltk
	import string
	from cleantext import clean

	nltk.download('stopwords')

	def clean_text(text):
	clean(text, all=False, extra_spaces=True, lowercase=True, numbers=True, punct=True)
	for word in text.split():
	for eachkey in keywords:
	if word in keywords[eachkey]:
	keycount[eachkey][word] += 1

	core_df = core_df["abstract"].apply(lambda x: clean_text(x))