Skip to content

Instantly share code, notes, and snippets.

@brianraila
Last active October 17, 2018 08:51
Show Gist options
  • Save brianraila/d015bee1865d85d277924214a6151276 to your computer and use it in GitHub Desktop.
Save brianraila/d015bee1865d85d277924214a6151276 to your computer and use it in GitHub Desktop.
New cars
import requests
import csv
from bs4 import BeautifulSoup
max_pgs = 540
min_pgs = 0
url = 'https://www.sgcarmart.com/new_cars/newcars_listing.php?BRSR={}&RPG=60'
graph_url = 'https://www.sgcarmart.com/new_cars/acars_pricehistory.php?CarCode={}&Subcode={}'
def get_links(page_url):
r = requests.get(page_url)
response = r.text
print('got page')
soup = BeautifulSoup(response, 'lxml')
links = soup.find_all('a')
return links
def get_codes(links):
codes = []
for link in links:
if 'Subcode' in link.get('href'):
if 'specs' in link.get('href'):
# print(link.get('href'))
l = link.get('href')
car_code = l.split('?')[1].split('&')[0].split('=')[1]
sub_code = l.split('?')[1].split('&')[1].split('=')[1]
codes.append([car_code, sub_code])
print('Code : {}\nSubcode : {}'.format(car_code, sub_code))
return codes
def fetch_and_save(codes):
all_values = []
for code in codes:
url = graph_url.format(str(code[0]), str(code[1]))
r = requests.get(url)
response = r.text
soup = BeautifulSoup(response, 'lxml')
values = soup.find_all('td', attrs={'class': 'Auth_Car_NormalText'})
title = soup.find('div', {'style':'color:#086DAD;font-weight:bold;font-size:16px;'})
for value in values:
if '-' not in value.text:
all_values.append((value.text).split(" ")[0])
if title:
all_values.insert(0, title.text[:-12])
with open('task1.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(all_values)
all_values = []
print('Saved {}'.format(title.text[:-12]))
all_links = []
for i in range(0,9):
cursor = i * 60
page_links = get_links(url.format(str(cursor)))
print("Page {} links fetched".format(str(i + 1)))
all_links = all_links + page_links
all_car_codes = get_codes(all_links)
fetch_and_save(all_car_codes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment