Skip to content

Instantly share code, notes, and snippets.

@imvladikon
Created March 4, 2024 22:58
Show Gist options
  • Save imvladikon/48831bf5a97915d63a1003d220675787 to your computer and use it in GitHub Desktop.
Save imvladikon/48831bf5a97915d63a1003d220675787 to your computer and use it in GitHub Desktop.
data viewer for https://data.gov.il/
from itertools import chain
import requests
from datasets import Dataset
class DataFetcher:
def __init__(
self,
resource_id,
base_url = "https://data.gov.il",
):
self.base_url = base_url
self.total = None
self.offset = None
self.limit = None
self.total_was_estimated = False
self._next_url = f"{self.base_url}/api/3/action/datastore_search?resource_id={resource_id}"
self.resource_id = None
def __iter__(self):
return self
def __next__(self):
data = self._get_data(self._next_url)
if self.total is None:
self.total = data["result"]["total"]
self.total_was_estimated = data["result"]["total_was_estimated"]
self.offset = 0
self._next_url = data["result"]["_links"]["next"]
self.resource_id = data["result"]["resource_id"]
elif self.offset >= self.total or self._next_url is None or self.total_was_estimated:
raise StopIteration()
if self.limit is None:
self.limit = data["result"]["limit"]
query = data["result"]["_links"]["next"]
self._next_url = f"{self.base_url}{query}"
self.offset += self.limit
return data["result"]["records"]
def _get_data(self, link):
response = requests.get(link)
response = response.json()
return response
if __name__ == '__main__':
record_fetcher = DataFetcher('fd56bf5b-7918-4906-99e4-b0e5102ae268')
ds = Dataset.from_list(list(chain.from_iterable(record_fetcher)))
for record in ds:
print(record)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment