Created
July 6, 2020 16:45
-
-
Save Dunnomix/24dca276a7144bf9981bb402025c00cc to your computer and use it in GitHub Desktop.
Servientrega Selenium Scraper Tracking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.7 | |
""" | |
This program checks a tracking ID on servientrega.com | |
The Outcome will be sent via Telegram to the TELEGRAM_CONTACT_ID of your preference | |
Change Debug to False to avoid printing information on the shell | |
Set your desired WAIT_MINUTES time for the loop | |
And make sure you use your desired SERVIENTREGA_TRACKING_ID | |
Javier Mejias, 2020 | |
""" | |
import json | |
import os | |
import requests | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
# Globals | |
# EDIT ZONE ################################################################### | |
# Debug mode | |
DEBUG = True | |
# Telegram constants | |
TELEGRAM_BOT_KEY = "botid:the-rest-of-your-telegram-bot-key" | |
telegram_url = f"https://api.telegram.org/bot{TELEGRAM_BOT_KEY}" | |
TELEGRAM_CONTACT_ID = 13351 | |
# Scraper constants | |
SERVIENTREGA_TRACKING_ID = 2071988217 | |
WAIT_MINUTES = 20 | |
FILE_NAME = "servientrega-status.json" | |
# EDIT ZONE END ############################################################### | |
chrome_options = Options() | |
# headless | |
chrome_options.add_argument("--headless") | |
# Critical Errors only | |
chrome_options.add_argument('log-level=3') | |
driver = webdriver.Chrome(options=chrome_options) | |
HAVE_OLD_DATA = False | |
def send_message(msg, contact_id=TELEGRAM_CONTACT_ID): | |
data = { | |
"chat_id": contact_id, | |
"text": msg | |
} | |
try: | |
r = requests.post(telegram_url + "/sendMessage", data=data) | |
except Exception as e: | |
print("Error: ", e) | |
finally: | |
if r.status_code != 200: | |
print("Telegram /sendMessage error:", r.status_code) | |
print(r.content) | |
if r.status_code == 404: | |
print("Probably there is something wrong with your TELEGRAM_BOT_KEY") | |
return r | |
def get_updates(track_id): | |
url = f"https://www.servientrega.com/wps/portal/Colombia/transacciones-personas/rastreo-envios/detalle?id={track_id}" | |
driver.get(url) | |
i = driver.find_element_by_xpath("//iframe") | |
driver.switch_to.frame(i) | |
rows = driver.find_elements_by_xpath('//ul[@class="list_historial"]/li') | |
updates = list() | |
for row in rows: | |
data = row.find_elements_by_xpath(".//div") | |
locations = data[0].find_elements_by_xpath(".//p") | |
date = locations[0].find_element_by_xpath(".//b").get_attribute("innerHTML") | |
status_text = locations[1].get_attribute("innerHTML") | |
location = status_text.split(" - ")[1] | |
status_text = status_text.split(" - ")[0] | |
date_time = data[1].find_element_by_xpath(".//p/b").get_attribute("innerHTML") | |
if DEBUG: | |
print(f'{date} {date_time}\t{location}\t{status_text}') | |
update = { | |
"date": date, | |
"time": date_time, | |
"location": location, | |
"status": status_text | |
} | |
updates.append(update) | |
return updates | |
def main(track_id): | |
global HAVE_OLD_DATA | |
# 1 read the file if it exists | |
if os.path.isfile(FILE_NAME): | |
# set this flag to True | |
HAVE_OLD_DATA = True | |
with open(FILE_NAME) as fp: | |
old_data = json.loads(fp.read()) | |
# 2 get the updates | |
updates = get_updates(track_id) | |
# 3 save the file with all of the results | |
with open(FILE_NAME, "w") as fp: | |
fp.write(json.dumps(updates, indent=4)) | |
# 4 compare the old data with the new one | |
if HAVE_OLD_DATA and len(old_data) > 0: | |
if old_data[0] == updates[0]: | |
# no changes, no update | |
if DEBUG: | |
print("Keep trying") | |
# send_message("KeepTrying") | |
else: | |
if DEBUG: | |
print("You've got updates") | |
message = f'{updates[0]["date"]} {updates[0]["time"]}\t{updates[0]["location"]}\t{updates[0]["status"]}' | |
send_message(message) | |
else: | |
print("This is the first time you run the script") | |
print(f"I will go to sleep now, but in {WAIT_MINUTES} Minutes I will run again, forever") | |
if __name__ == "__main__": | |
while True: | |
main(SERVIENTREGA_TRACKING_ID) | |
time.sleep(WAIT_MINUTES * 60) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment