amankharwal/keyword.py Secret

## keyword.py
import re
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

stop_words = set(stopwords.words('english'))
##Creating a list of custom stopwords
new_words = ["fig","figure","image","sample","using",
             "show", "result", "large",
             "also", "one", "two", "three",
             "four", "five", "seven","eight","nine"]
stop_words = list(stop_words.union(new_words))

def pre_process(text):

    # lowercase
    text=text.lower()

    #remove tags
    text=re.sub("&lt;/?.*?&gt;"," &lt;&gt; ",text)

    # remove special characters and digits
    text=re.sub("(\\d|\\W)+"," ",text)

    ##Convert to list from string
    text = text.split()

    # remove stopwords
    text = [word for word in text if word not in stop_words]

    # remove words less than three letters
    text = [word for word in text if len(word) >= 3]

    # lemmatize
    lmtzr = WordNetLemmatizer()
    text = [lmtzr.lemmatize(word) for word in text]

    return ' '.join(text)
docs = df['paper_text'].apply(lambda x:pre_process(x))
	import re
	from nltk.corpus import stopwords
	from nltk.stem.wordnet import WordNetLemmatizer

	stop_words = set(stopwords.words('english'))
	##Creating a list of custom stopwords
	new_words = ["fig","figure","image","sample","using",
	"show", "result", "large",
	"also", "one", "two", "three",
	"four", "five", "seven","eight","nine"]
	stop_words = list(stop_words.union(new_words))

	def pre_process(text):

	# lowercase
	text=text.lower()

	#remove tags
	text=re.sub("</?.*?>"," <> ",text)

	# remove special characters and digits
	text=re.sub("(\\d\|\\W)+"," ",text)

	##Convert to list from string
	text = text.split()

	# remove stopwords
	text = [word for word in text if word not in stop_words]

	# remove words less than three letters
	text = [word for word in text if len(word) >= 3]

	# lemmatize
	lmtzr = WordNetLemmatizer()
	text = [lmtzr.lemmatize(word) for word in text]

	return ' '.join(text)
	docs = df['paper_text'].apply(lambda x:pre_process(x))