Skip to content

Instantly share code, notes, and snippets.

@sirex
Created July 15, 2020 13:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sirex/300c1bf94c48068042a6b6eeee41aa50 to your computer and use it in GitHub Desktop.
Save sirex/300c1bf94c48068042a6b6eeee41aa50 to your computer and use it in GitHub Desktop.
import itertools
import json
import urllib.parse
from operator import itemgetter
import click
import lxml.etree
import requests
import requests_cache
import tqdm
from lxml import etree
from lxml.etree import XMLSyntaxError
@click.group()
def main():
pass
@main.command()
def update():
with open('data.jsonl', 'w') as f:
s = requests_cache.CachedSession()
for sesija in pbar(lrs(s, 'ad_seimo_sesijos', ar_visos='T').xpath('//SeimoSesija'), "Sesijos"):
for posedis in pbar(lrs(s, 'ad_seimo_posedziai', sesijos_id=sesija.attrib['sesijos_id']).xpath('//SeimoPosėdis'), "Posėdžiai"):
pradzia = posedis.attrib['pradžia'].split()[0]
for klausimas in lrs(s, 'ad_seimo_pos_darb', posedzio_id=posedis.attrib['posėdžio_id']).xpath('//DarbotvarkėsKlausimas'):
row = {
'pradzia': pradzia,
'pavadinimas': klausimas.attrib['pavadinimas'],
'klausimai': [
{
'darbotvarkės_klausimo_id': ks.attrib['darbotvarkės_klausimo_id'],
'pavadinimas': ks.attrib['pavadinimas'],
'dokumento_nuoroda': ks.attrib['dokumento_nuoroda'],
}
for ks in klausimas.findall('KlausimoStadija')
]
}
print(json.dumps(row), file=f)
@main.command()
@click.argument('q')
def search(q):
for day, rows in itertools.groupby(itersearch(q), key=itemgetter('pradzia')):
day = day.replace('-', '')
print(f"- https://www.lrs.lt/sip/portal.show?p_r=35727&p_k=1&p_a=sale_darbotvarke&p_moment={day}")
for row in rows:
print(f" {row['pavadinimas']}")
print()
def itersearch(q: str):
with open('data.jsonl') as f:
for line in f:
row = json.loads(line.strip())
if q in row['pavadinimas']:
yield row
def lrs(s: requests.Session, method: str, **kwargs) -> etree.ElementTree:
url = 'http://apps.lrs.lt/sip/p2b.%s' % method
if kwargs:
url += '?' + urllib.parse.urlencode(kwargs)
resp = s.get(url)
resp.raise_for_status()
try:
return lxml.etree.fromstring(resp.content)
except XMLSyntaxError as e:
print()
print()
print(f"Error while parsing {url}")
print(e)
return etree.XML('<?xml version="1.0"?><root></root>')
def pbar(*args):
return tqdm.tqdm(*args, ascii=True, leave=False)
if __name__ == "__main__":
main()
click
lxml
requests
requests-cache
tqdm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment