Skip to content

Instantly share code, notes, and snippets.

@vikeshsingh37
Last active April 5, 2020 14:22
Show Gist options
  • Save vikeshsingh37/288738a67a602e5900130dc7991e0c52 to your computer and use it in GitHub Desktop.
Save vikeshsingh37/288738a67a602e5900130dc7991e0c52 to your computer and use it in GitHub Desktop.
from snorkel.labeling import labeling_function
from textblob import TextBlob
import re
@labeling_function()
def lf_keyword_my(x):
"""Many spam comments talk about 'my channel', 'my video', etc."""
return SPAM if "my" in x.text.lower() else ABSTAIN
@labeling_function()
def lf_regex_check_out(x):
"""Spam comments say 'check out my video', 'check it out', etc."""
return SPAM if re.search(r"check.*out", x.text, flags=re.I) else ABSTAIN
@labeling_function()
def lf_regex_check_out(x):
"""Spam comments say 'check out my video', 'check it out', etc."""
return SPAM if re.search(r"check.*out", x.text, flags=re.I) else ABSTAIN
@labeling_function()
def lf_textblob_polarity(x):
"""
We use a third-party sentiment classification model, TextBlob.
We combine this with the heuristic that non-spam comments are often positive.
"""
return NOT_SPAM if TextBlob(x.text).sentiment.polarity > 0.3 else ABSTAIN
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment