Skip to content

Instantly share code, notes, and snippets.

@zenosxx
Created July 30, 2019 03:21
Show Gist options
  • Save zenosxx/0809d9f6a77cbda894c66e130358e05a to your computer and use it in GitHub Desktop.
Save zenosxx/0809d9f6a77cbda894c66e130358e05a to your computer and use it in GitHub Desktop.
import requests,re,os,schedule,time,subprocess
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail
url=str(input("Enter domain:\n"))
#using request module to fetch html page
def fetch(url):
try:
data = requests.get(url,timeout=10)
print(url)
return data.text
except requests.exceptions.RequestException as e:
return "None"
#using html parser to grab script tag src and saving into List
def grab_js(url):
data = []
content=fetch(url)
soup = BeautifulSoup(content, 'html.parser')
soup = soup.find_all('script')
for url in soup:
data.append(url.get('src'))
data = list(filter(None,data))
print(data)
return data
#using regex to find and add domain to relative url
def check():
url_check=grab_js(url)
for i in range(0,len(url_check)):
regex=re.search('https?|//',url_check[i])
if regex == None:
url_check[i] = url+url_check[i]
return url_check
#sudo npm install -g diff2html-cli
#using diff to compare & diff2html-cli for templating
def report(original,duplicate,reportname):
cmd="diff -u "+ original +" "+ duplicate + "| diff2html -i stdin -F data/report/"+reportname
output = subprocess.check_output(cmd, shell=True)
if "support" in output.decode("utf-8"):
print("No New Changes Detected..")
else:
report = subprocess.check_output('cat data/report/'+reportname, shell=True)
report = report.decode("utf-8")
send_report(report)
print("reporting done")
url_list=check()
def save_list():
save = open("list.txt","w")
for i in range(0,len(url_list)):
save.write(url_list[i]+"\n")
def original():
for i in range(0,len(url_list)):
#print(url_list[i])
data=fetch(url_list[i])
file = open('data/original/'+os.path.basename(url_list[i]), "w")
file.write(data)
def duplicate():
for i in range(0,len(url_list)):
#print(url_list[i])
data=fetch(url_list[i])
file = open('data/duplicate/'+os.path.basename(url_list[i]), "w")
file.write(data)
original()
#save_list()
def job():
print("Schedule Job Started running...")
duplicate()
for i in range(0,len(url_list)):
filename=os.path.basename(url_list[i])
timestamp=str(int(time.time()))
report('data/original/'+filename,'data/duplicate/'+filename,timestamp+filename+".html")
original()
def send_report(data_report):
message = Mail(from_email='',
to_emails='',
subject='Sending with Twilio SendGrid is Fun',
html_content=data_report)
try:
sg = SendGridAPIClient('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')
response = sg.send(message)
print(response.status_code)
except Exception as e:
print(str(e))
schedule.every(5).seconds.do(job)
while True:
schedule.run_pending()
time.sleep(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment