Skip to content

Instantly share code, notes, and snippets.

@ingenieroariel
Forked from JorgeMartinezG/fetch_from_csw.py
Last active February 24, 2017 13:02
Show Gist options
  • Save ingenieroariel/7da7763a7a84fafe6759c8840773dfee to your computer and use it in GitHub Desktop.
Save ingenieroariel/7da7763a7a84fafe6759c8840773dfee to your computer and use it in GitHub Desktop.
import os
import sys
import requests
import time
from lxml import etree
if __name__ == '__main__':
csw_url = 'https://catalog.data.gov/csw-all'
folder_name = 'data_gov'
max_records = 125000
params = {
'service': 'CSW',
'version': '2.0.2',
'request': 'GetRecords',
'typenames': 'csw:Record',
'elementsetname': 'full',
'resulttype': 'results',
'maxrecords': '10'
}
xml_string = (
'<csw:Transaction xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" '
'xmlns:ows="http://www.opengis.net/ows" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
'xsi:schemaLocation="http://www.opengis.net/cat/csw/2.0.2 '
'http://schemas.opengis.net/csw/3.0.0/CSW-publication.xsd" '
'service="CSW" version="2.0.2" xmlns:dc="http://purl.org/dc/'
'elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" >\n'
' <csw:Insert>\n'
)
end_part = (' </csw:Insert>\n'
'</csw:Transaction>\n'
)
for start_position in range(int(sys.argv[1]), int(sys.argv[1]) + max_records + 1, 10):
print 'Getting records from position {0}'.format(start_position)
params['startposition'] = start_position
try:
content = requests.get(csw_url, params, timeout=30).content
content = etree.tostring(etree.fromstring(content), pretty_print=True)
csw_records = '\n'.join(content.split('\n')[3:-3])
# csw_records = content.split('"full">')[1].split('</csw:SearchResults')[0]
csw_request = xml_string + csw_records + '\n' + end_part
# Saving.
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
with open(os.path.join(folder_name, 'file_{0}.xml'.format(start_position)), 'wb') as f:
f.write(csw_request)
time.sleep(10)
except Exception as e:
print 'Error...', e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment