Skip to content

Instantly share code, notes, and snippets.

@ashim888
Last active July 6, 2016 11:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashim888/797cae73c921e282a56db0dc477455e1 to your computer and use it in GitHub Desktop.
Save ashim888/797cae73c921e282a56db0dc477455e1 to your computer and use it in GitHub Desktop.
Child Safety Detection
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pymongo import MongoClient
import pprint
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from microsofttranslator import Translator
import langid
porn_list=["anal","hentai","anus","arse","butt","arsehole","ass","fcuk","fuck","naked","xvideos","porn", "sex", "porno", "free porn", "porn tube", "porn videos", "streaming porn","Free porn", "sex videos","pussy","Porn hub", "xxx" "porn", "sex" ]
def safety_check(domain):
tokenizer = RegexpTokenizer(r'\w+')
client = MongoClient('mongodb://192.168.1.10:27017/',27017)
db = client.cutestat_v3
try:
cursor = db.WebInfo.find_one({"domain":domain})
if cursor!=None:
stop = stopwords.words('english')
overall_text=str(cursor['title'])+str(cursor['metaDescription']) + str(cursor['metaTags'])
# Language Detect
language_prediction=langid.classify(overall_text)
if language_prediction!=None:
language = language_prediction[0]
# TRANSLATE LANGUAGE
translator = Translator('<Your Client ID>', '<Your Client Secret>')
if language!='en':
print 'Another language Found: '+ language
overall_text= translator.translate(overall_text, "en")
overall_text=set([i.lower() for i in tokenizer.tokenize(overall_text) if i not in stop])
# check if any key matches with each other
count=overall_text.intersection(set(porn_list))
if len(count)>0:
print domain+" SAFETY CHECK FAIL"
print "Total Abusive Keywords Found:", len(count)
print '\n'
else:
print domain+" SAFETY CHECK PASS"
print '\n'
else:
print domain + ' Not Found In Database' +'\n'
except TypeError as exc:
print domain + " Not found"
except UnboundLocalError as exc:
print exc
except Exception as exc:
print exc
safety_check("www.befuck.com")
safety_check("baidu.com")
safety_check("gioia.it")
safety_check("partyporn.co.il")
safety_check("x-nxx.co.il")
safety_check("jw.org")
safety_check("xhamster.com")
safety_check("www.xnxx.com")
safety_check("ratopati.com")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment