kshirsagarsiddharth/Clean Social Media Data Spacy.py

## Clean Social Media Data Spacy.py
import spacy

# Load the English model
nlp = spacy.load("en_core_web_sm")

def clean_social_media_data(text):
    # Process the text
    doc = nlp(text)

    # Extract the lemmas and remove stop words
    tokens = [token.lemma_ for token in doc if not token.is_stop]

    # Remove punctuation and non-alphabetic characters
    tokens = [token for token in tokens if token.isalpha()]

    # Remove words that are shorter than three characters
    tokens = [token for token in tokens if len(token) > 2]

    # Join the tokens into a single string
    clean_text = ' '.join(tokens)

    return clean_text

# Test the function
text = 'I had a great time at the party last night! 😎 #party #friends @siddharth @sid'
clean_social_media_data(text)
	import spacy

	# Load the English model
	nlp = spacy.load("en_core_web_sm")

	def clean_social_media_data(text):
	# Process the text
	doc = nlp(text)

	# Extract the lemmas and remove stop words
	tokens = [token.lemma_ for token in doc if not token.is_stop]

	# Remove punctuation and non-alphabetic characters
	tokens = [token for token in tokens if token.isalpha()]

	# Remove words that are shorter than three characters
	tokens = [token for token in tokens if len(token) > 2]

	# Join the tokens into a single string
	clean_text = ' '.join(tokens)

	return clean_text

	# Test the function
	text = 'I had a great time at the party last night! 😎 #party #friends @siddharth @sid'
	clean_social_media_data(text)