Skip to content

Instantly share code, notes, and snippets.

@jarek
Created August 21, 2017 18:45
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jarek/d73c672d8dd4ddb48d80bffc4d8038ba to your computer and use it in GitHub Desktop.
Save jarek/d73c672d8dd4ddb48d80bffc4d8038ba to your computer and use it in GitHub Desktop.
SV parser beginnings
# -*- coding: utf-8 -*-
# for https://github.com/tmrowco/electricitymap/issues/678
from bs4 import BeautifulSoup
from pprint import pprint
import requests
import json
url = 'http://estadistico.ut.com.sv/OperacionDiaria.aspx'
s = requests.Session()
pagereq = s.get(url)
soup = BeautifulSoup(pagereq.content, 'html.parser')
viewstategenerator = soup.find("input", attrs = {'id': '__VIEWSTATEGENERATOR'})['value']
viewstate = soup.find("input", attrs = {'id': '__VIEWSTATE'})['value']
eventvalidation = soup.find("input", attrs = {'id': '__EVENTVALIDATION'})['value']
DXCss = '1_33,1_4,1_9,1_5,15_2,15_4'
DXScript = '1_232,1_134,1_225,1_169,1_187,15_1,1_183,1_182,1_140,1_147,1_148,1_142,1_141,1_143,1_144,1_145,1_146,15_0,15_6,15_7'
# this works, but only gives data for current day
callback_param_init = 'c0:{"Task":"Initialize","DashboardId":"OperacionDiaria","Settings":{"calculateHiddenTotals":false},"RequestMarker":0,"ClientState":{}}'
# This should work to get any date, but doesn't work with just any SessionId/Context, or with no SessionId and Context specified.
# It might work after you call to callback_param_init first and extract SessionId and Context from the response to that.
callback_param_specific_date = '__CALLBACKPARAM:c0:{"Task":"ReloadData","DashboardParameters":[{"Name":"FechaConsulta","Value":new Date(2017,7,20)}],"SessionId":"7d34ddcb-9f7c-43ae-899e-53c1f193ec37","Context":"BwAHAAIkYTk4OTllZDktNWE3MS00MGE5LWFkMDMtNWU1OWJhNzViMGU0Ag9PcGVyYWNpb25EaWFyaWECAAIAAAAAAMByQA==","RequestMarker":1,"ClientState":{}}'
callback_param_specific_date = '__CALLBACKPARAM:c0:{"Task":"ReloadData","DashboardParameters":[{"Name":"FechaConsulta","Value":new Date(2017,7,20)}],"RequestMarker":1,"ClientState":{}}'
postdata = {'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategenerator,
'__EVENTVALIDATION': eventvalidation,
'__CALLBACKPARAM': callback_param_init,
'__CALLBACKID':'ASPxDashboardViewer1',
'DXScript': DXScript,
'DXCss': DXCss
}
datareq = s.post(url, data=postdata)
print(datareq.status_code)
with open('response.txt', 'wb') as f:
f.write(datareq.content)
"""
You might get encoding problems reading datareq.text in Python 2. It works fine in Python 3
and I'm sure "the usual hacks" would solve it in Python 2...
"""
double_json = datareq.text[len('0|/*DX*/('):-1]
double_json = double_json.replace('\'', '"')
data = json.loads(double_json)
print(data.keys())
print(data['result'][:1000])
print(data['result'][-1000:])
with open('response.js', 'w') as f:
f.write(data['result'][1:-1])
"""
At this point, response.js contains a dict that approximates but is not quite JSON.
More specifically, it is Javascript. It looks like the "result" key was intended to be fed into JS eval(),
which could deal with single-quotes and process values like `new Date(2017,7,11,0,0,0,0)` as expected.
The easiest way to get some data would be to do string searches on data['result'] to find parts
that have information we want without containing explicit timestamps (`new Date(2017,7,11,0,0,0,0)...`).
Then cut out that bit and feed it into json.loads(). Search for "Hidroel" will get you started.
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment