srang992/clean_desc.py

## clean_desc.py
def clean_desc(s):
    s = str(s)
    s = s.lower()
    s = re.sub(r'[^a-zA-Z]', ' ', s)
    return s

# make a copy of the main data and do the preprocessing steps on that data
netflix_data_copy['clean_desc'] = netflix_data_copy['description'].apply(cleaning)

#tokenizing the words for lemmatization and removing stopwords
netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(word_tokenize)
netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(
  lambda x:[word for word in x if word not in set(stopwords.words('english'))]
)

# joining the words after lemmatization and stopword removal
netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(lambda x: ' '.join(x))
	def clean_desc(s):
	s = str(s)
	s = s.lower()
	s = re.sub(r'[^a-zA-Z]', ' ', s)
	return s

	# make a copy of the main data and do the preprocessing steps on that data
	netflix_data_copy['clean_desc'] = netflix_data_copy['description'].apply(cleaning)

	#tokenizing the words for lemmatization and removing stopwords
	netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(word_tokenize)
	netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(
	lambda x:[word for word in x if word not in set(stopwords.words('english'))]
	)

	# joining the words after lemmatization and stopword removal
	netflix_data_copy['clean_desc'] = netflix_data_copy['clean_desc'].apply(lambda x: ' '.join(x))