Skip to content

Instantly share code, notes, and snippets.

@niyumard
Created February 22, 2021 18:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niyumard/db3f18b138f357d3f225376573f6b7d4 to your computer and use it in GitHub Desktop.
Save niyumard/db3f18b138f357d3f225376573f6b7d4 to your computer and use it in GitHub Desktop.
Gets a text file with urls and extracts info for renting houses from divar.ir
#!/usr/bin/env python3
from bs4 import BeautifulSoup
import requests
import convert_numbers
from csv import writer
URL_List_as_txt = open('url.lst', 'r')
URLs = URL_List_as_txt.read().splitlines()
for URL in URLs:
print("\n-------------------\nLet's start!")
# Row data sits here and finally gets appended into a csv file as another row
row_data=[]
try:
# Downloads the webpage
ad_webpage = requests.get(URL)
print("Got it! Successful with code "+str(ad_webpage.status_code)+":'"+URL+"'")
except:
# Make an error if you can't
print("Error Number "+str(ad_webpage.status_code)+": "+URL)
pass
soup = BeautifulSoup(ad_webpage.content, 'html.parser')
# Ad Title is the first item in our row:
print(soup.title.string)
row_data.append(soup.title.string)
print(row_data)
raw_basic_info=soup.find_all('div', class_='kt-group-row-item kt-group-row-item--info-row')
for i in raw_basic_info:
for q,j in enumerate(list(i.children)):
if q%2!=0:
row_data.append(j.get_text())
raw_other_info=soup.find_all('div', class_='kt-base-row kt-base-row--large kt-unexpandable-row')
for i in raw_other_info:
for q,j in enumerate(list(i.children)):
if q%2!=0:
row_data.append(j.get_text())
for i in [1,2,3,4,5]:
row_data[i]=convert_numbers.persian_to_english(row_data[i])
row_data.append(list(soup.find_all('p', class_='kt-description-row__text post-description kt-description-row__text--primary'))[0].get_text())
row_data.append(URL)
print(row_data)
# Appends the row to the end of a csv file named divar.csv
with open('divar.csv', 'a') as f_object:
writer_object = writer(f_object)
writer_object.writerow(row_data)
f_object.close()
print("Okay! Next!\n-------------------\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment