Last active
July 23, 2020 11:40
-
-
Save ruanpetterson/30b94204639e6dc492f9c5c263ffffe3 to your computer and use it in GitHub Desktop.
Scraping daily rates of Movida Rent a Car
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup as bs | |
from datetime import date, datetime, timedelta | |
import random, requests | |
class Movida(object): | |
possible_locations = ['RBR', 'MCZ', 'MAA', 'MCP', 'MAO', 'MOVMAN', 'MOVCMR', 'FEC', 'IOS', 'SSA', 'BPS', 'MOVSVD', 'VDC', 'FOR', 'FORC', 'JDOC01', 'BSB', 'MOVTAG', 'MOVLNR', 'VIX', 'MOVANP', 'MOVAG', 'GYN', 'MOVHCG', 'MOVRVD', 'IMP', 'SLZ', 'CC', 'CGB', 'CGR', 'MOVCGC', 'MOVDOC', 'MOVBHC', 'MOVBHZ', 'PLU', 'MOVBHCL', 'MOVBHR', 'MOVBET', 'MOVCTG', 'JDFC', 'IZA', 'CNF', 'MOC', 'MOVPAL', 'UBA', 'UDI', 'JPA', 'CPV', 'JPAC01', 'MOVCCV', 'MOVCWB', 'IGU', 'LDB', 'MGF', 'MOVPTG', 'CWB', 'BEL', 'MOVBLC', 'MAB', 'CRJ', 'MOVCKS', 'PNZ', 'REC', 'RECC02', 'MOVRFR', 'THE', 'MOVCPG', 'MOVMCE', 'MOVNTR', 'MOVQRZ', 'MOVRJB', 'MOVCOPA', 'GIG', 'MOVJPG', 'MOVMDR', 'MOVRNR', 'MOVSHH', 'SDU', 'MOVMSR', 'MOVNTC', 'NAT', 'MOVCNS', 'CXJ', 'MOVNHG', 'MOVPLT', 'POA', 'POABOX', 'MOVPOAC', 'PAS', 'PVH', 'BVB', 'MOVBLM', 'FLN', 'JOI', 'MOVJOI', 'NVT', 'MOVAME', 'MOVARB', 'MOVAPH', 'MOVBAU', 'VCP', 'MOVCPQC', 'MOVGRV', 'MOVFRA', 'GUJ', 'GRU', 'MOVGRUO', 'MOVIND', 'MOVITA', 'MOVJND', 'MOVMRL', 'MOVMGC', 'MOVOSC', 'MOVPRC', 'MOVPRP', 'MOVRPT', 'RAO', 'MOVSTA', 'MOVLSSZ', 'MOVSBCK', 'MOVSBF', 'MOVSCS', 'MOVSCL', 'MOVSJP', 'SJP', 'MOVSJC', 'SAOC25', 'MOVPBF', 'MOVBRA', 'MOVCNP', 'CGH', 'MOVCSL', 'MOVFRO', 'MOVJKK', 'MOVMOM', 'MOVPEN', 'MOVRDT', 'MOVSAU', 'SHA', 'MOVBUT', 'MOVSCN', 'MOVSSC', 'MOVTAT', 'MOVVLP', 'MOVSRB', 'MOVSUZ', 'MOVTBS', 'MOVTBT', 'AJU', 'AAR', 'PMW', 'MOVPLM'] | |
def __init__(self, pickup_location = None, pickup_datetime = None, dropoff_location = None, dropoff_datetime = None): | |
# Setting values | |
if pickup_location is not None: self.pickup_location = pickup_location | |
if pickup_datetime is not None: self.pickup_datetime = pickup_datetime | |
if dropoff_location is not None: self.dropoff_location = dropoff_location | |
if dropoff_datetime is not None: self.dropoff_datetime = dropoff_datetime | |
@property | |
def pickup_location(self): | |
return self._pickup_location | |
@pickup_location.setter | |
def pickup_location(self, p): | |
if type(p) is not str: raise TypeError('Pick-up location must be string') | |
if p not in self.possible_locations: raise ValueError('Pick-up location not found') | |
self._pickup_location = p | |
@property | |
def dropoff_location(self): | |
return self._dropoff_location | |
@dropoff_location.setter | |
def dropoff_location(self, d): | |
if type(d) is not str: raise TypeError('Drop-off location must be string') | |
if d not in self.possible_locations: raise ValueError('Drop-off location not found') | |
self._dropoff_location = d | |
@property | |
def pickup_datetime(self): | |
return self._pickup_datetime | |
@pickup_datetime.setter | |
def pickup_datetime(self, p): | |
try: self._pickup_datetime = datetime.strptime(p, '%Y-%m-%d %H:%M') | |
except: raise TypeError('Incorrect dat formate, should be YYYY-MM-DD HH:mm') | |
@property | |
def dropoff_datetime(self): | |
return self._dropoff_datetime | |
@dropoff_datetime.setter | |
def dropoff_datetime(self, d): | |
try: self._dropoff_datetime = datetime.strptime(d, '%Y-%m-%d %H:%M') | |
except: raise TypeError('Incorrect date format, should be YYYY-MM-DD HH:mm') | |
def _validate(self): | |
# Attributes validation | |
if not hasattr(self, 'pickup_location'): raise AttributeError('Pick-up location is not defined') | |
if not hasattr(self, 'pickup_datetime'): raise AttributeError('Pick-up date and time is not defined') | |
if not hasattr(self, 'dropoff_location'): raise AttributeError('Drop-off location is not defined') | |
if not hasattr(self, 'dropoff_datetime'): raise AttributeError('Drop-off date and time is not defined') | |
# Drop-off and pick-up datetimes validation | |
if self.dropoff_datetime <= self.pickup_datetime: raise ValueError('Drop-off date must be greater than pick-up date') | |
if datetime.now() >= self.pickup_datetime: raise ValueError('Pick-up date must be in future') | |
def daily_rates(self): | |
self._validate() | |
# POST request | |
form_data = { | |
'iata_retirada' : self.pickup_location, | |
'data_retirada' : self.pickup_datetime.strftime('%d/%m/%Y'), | |
'hora_retirada' : self.pickup_datetime.strftime('%H:%M'), | |
'iata_devolucao': self.dropoff_location, | |
'data_devolucao': self.dropoff_datetime.strftime('%d/%m/%Y'), | |
'hora_devolucao': self.dropoff_datetime.strftime('%H:%M'), | |
'cupom': '', | |
} | |
r = requests.post('https://www.movida.com.br/reserva/itinerario-troca', data=form_data) | |
# HTML parsing | |
parsed_html = bs(r.text, 'html.parser') | |
car_choice = parsed_html.find('section', class_='car-choice') | |
# Data scraping | |
return [ | |
{ | |
'group_name': car_group.find('div', class_='title-group_walk').text, | |
'prices': [ | |
float(price['title'].replace(',', '.')) for price in car_group.find_all('span', class_='clube-price__value-discount--size_walk') | |
] | |
} for car_group in car_choice.find_all('form')[1].find_all('div', class_='block-car') | |
] | |
if __name__ == "__main__": | |
while True: | |
answer = input('Random picku-up and drop-off? [Y/n] ') | |
if answer in {'Y', 'YE', 'YES', 'y', 'ye', 'yes', ''}: | |
next_friday = date.today() + timedelta((4 - date.today().weekday()) % 7) | |
pickup_location = random.choice(Movida.possible_locations) | |
pickup_datetime = f'{next_friday.strftime("%Y-%m-%d")} 12:00' | |
dropoff_location = pickup_location | |
dropoff_datetime = f'{(next_friday + timedelta(3)).strftime("%Y-%m-%d")} 12:00' | |
break | |
elif answer in {'N', 'NO', 'n', 'no'}: | |
pickup_location = input('Set your pick-up location: ') # eg 'MOVLSSZ' | |
pickup_datetime = input('Set your pick-up date and time: ') # eg '2019-11-01 12:00' | |
dropoff_location = input('Set your drop-off location: ') # eg 'MOVLSSZ' | |
dropoff_datetime = input('Set your drop-off date and time: ') # eg '2019-11-04 12:00' | |
break | |
else: | |
continue | |
try: | |
movida = Movida(pickup_location, pickup_datetime, dropoff_location, dropoff_datetime) | |
print() | |
print(f'Pick-up in {movida.pickup_location} at {movida.pickup_datetime}') | |
print(f'Drop-off in {movida.dropoff_location} at {movida.dropoff_datetime}') | |
print() | |
print('Note: daily rates do not include fees and insurance') | |
print() | |
for rate in movida.daily_rates(): | |
print(f'{rate["group_name"]} ({rate["prices"][1]:,.2f} BRL)') | |
except Exception as e: | |
print(f'Error: {e}') | |
exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is only a proof of concept and I do not recommend to use this in production without mutual agreement because Movida's
robots.txt
disallow/reserva
path. Be careful!