Last active
November 8, 2023 12:14
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
class AllSpider(scrapy.Spider): | |
name = "all" | |
start_urls = ["https://directory.ntschools.net/#/schools"] | |
headers = { | |
"Accept": "application/json", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Accept-Language": "en-US,en;q=0.9,hi;q=0.8,lb;q=0.7", | |
"Referer": "https://directory.ntschools.net/", | |
"Sec-Fetch-Mode": "cors", | |
"Sec-Fetch-Site": "same-origin", | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", | |
"X-Requested-With": "Fetch", | |
} | |
def parse(self, response): | |
yield scrapy.Request( | |
url="https://directory.ntschools.net/api/System/GetAllSchools", | |
callback=self.parse_json, | |
headers=self.headers | |
) | |
def parse_json(self, response): | |
data = response.json() # Newer version of Scrapy come with shortcut to get JSON data | |
for i,school in enumerate(data): | |
school_code = school["itSchoolCode"] | |
yield scrapy.Request( | |
f"https://directory.ntschools.net/api/System/GetSchool?itSchoolCode={school_code}", | |
callback=self.parse_school, | |
headers=self.headers, | |
dont_filter=True # Many schools have the same code, same page, but listed more than once | |
) | |
def parse_school(self, response): | |
data = response.json() # Newer version of Scrapy come with shortcut to get JSON data | |
yield { | |
"name": data["name"], | |
"telephoneNumber": data["telephoneNumber"], | |
"mail": data["mail"], | |
"physicalAddress": data["physicalAddress"]["displayAddress"], | |
"postalAddress": data["postalAddress"]["displayAddress"], | |
} |
Hii Tobitheprof,
Yes, we can move to the next page by pagination concept and also get the data from the other pages.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi sir, I have a similar issue, I was wondering if there's a way I can move to another page with this type of code because the site I am scraping has multiple pages.