Created
July 17, 2018 00:14
-
-
Save calum-chamberlain/5687bfab1890c7385dab4bb8e4fd10a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Testing QuakeML parsing.\n", | |
"\n", | |
"Obspy issue [#1910](https://github.com/obspy/obspy/issues/1910) highlights that QuakeML parsing is currently slow in Obspy for large catalogs, which is becoming a bottleneck for packages relying on Obspy for IO. Lion Krischer suggested that parsing the xml to a dictionary immediately then deserializing the dictionary might be a good way to go. This notebook serves as a proof-of-concept for an initial go at this.\n", | |
"\n", | |
"First written 17/07/2018.\n", | |
"\n", | |
"Tests run using Obspy 1.1.0.\n", | |
"\n", | |
"First step is to get the `Unpickler` code in [here](https://github.com/obspy/obspy/blob/Quakeml-dictionary-reading/obspy/io/quakeml/core.py#L85). Note that `_extra` isn't implemented yet because I haven't settled on how to parse namespaces so I'm leaving that until that is decided.\n", | |
"\n", | |
"Apologies for the length..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from lxml import etree\n", | |
"\n", | |
"from obspy.io.quakeml.core import _xml_doc_from_anything\n", | |
"from obspy.core.event import (Amplitude, Arrival, Axis, Catalog, Comment,\n", | |
" CompositeTime, ConfidenceEllipsoid, CreationInfo,\n", | |
" DataUsed, Event, EventDescription,\n", | |
" FocalMechanism, Magnitude, MomentTensor,\n", | |
" NodalPlane, NodalPlanes, Origin, OriginQuality,\n", | |
" OriginUncertainty, Pick, PrincipalAxes,\n", | |
" QuantityError, ResourceIdentifier,\n", | |
" SourceTimeFunction, StationMagnitude,\n", | |
" StationMagnitudeContribution, Tensor, TimeWindow,\n", | |
" WaveformStreamID)\n", | |
"from obspy.core.utcdatetime import UTCDateTime\n", | |
"from obspy.core.util import AttribDict, Enum\n", | |
"\n", | |
"\n", | |
"NSMAP_QUAKEML = {None: \"http://quakeml.org/xmlns/bed/1.2\",\n", | |
" 'q': \"http://quakeml.org/xmlns/quakeml/1.2\"}\n", | |
"\n", | |
"\n", | |
"def _make_dict_from_tree(element_tree):\n", | |
" \"\"\"\n", | |
" Traverse the given XML element tree to convert it into a dictionary.\n", | |
" :param element_tree: An XML element tree\n", | |
" :type element_tree: xml.etree.ElementTree\n", | |
" :rtype: dict\n", | |
" \"\"\"\n", | |
" def internal_iter(tree, accum):\n", | |
" \"\"\"\n", | |
" Recursively iterate through the elements of the tree accumulating\n", | |
" a dictionary result.\n", | |
" :param tree: The XML element tree\n", | |
" :type tree: xml.etree.ElementTree\n", | |
" :param accum: Dictionary into which data is accumulated\n", | |
" :type accum: dict\n", | |
" :rtype: dict\n", | |
" \"\"\"\n", | |
" if tree is None:\n", | |
" return accum\n", | |
" tag = tree.tag.lstrip(\"{\").split(\"}\")[-1]\n", | |
" accum[tag] = {}\n", | |
" if tree.getchildren():\n", | |
" for each in tree.getchildren():\n", | |
" each_tag = each.tag.lstrip(\"{\").split(\"}\")[-1]\n", | |
" result = internal_iter(each, {})\n", | |
" if each_tag == \"waveformID\":\n", | |
" # WaveformID is a special case...\n", | |
" result = {'waveformID': {\n", | |
" 'id': result[each_tag],\n", | |
" 'networkCode': each.get('networkCode'),\n", | |
" 'stationCode': each.get('stationCode'),\n", | |
" 'locationCode': each.get('locationCode'),\n", | |
" 'channelCode': each.get('channelCode'),\n", | |
" }}\n", | |
" if each_tag == \"nodalPlanes\":\n", | |
" result[each_tag].update({\n", | |
" \"preferredPlane\": each.get(\"preferredPlane\")})\n", | |
" if each_tag in accum[tag]:\n", | |
" if not isinstance(accum[tag][each_tag], list):\n", | |
" accum[tag][each_tag] = [\n", | |
" accum[tag][each_tag]\n", | |
" ]\n", | |
" accum[tag][each_tag].append(result[each_tag])\n", | |
" else:\n", | |
" accum[tag].update(result)\n", | |
" public_id = tree.get('publicID')\n", | |
" if public_id is not None:\n", | |
" accum[tag].update({'publicID': public_id})\n", | |
" id_value = tree.get('id')\n", | |
" if id_value is not None:\n", | |
" accum[tag].update({'id': id_value})\n", | |
" namespace = tree.tag.lstrip(\"{\").split(\"}\")[0]\n", | |
" if namespace is not None:\n", | |
" if id_value is not None or public_id is not None:\n", | |
" accum[tag].update({'namespace': namespace})\n", | |
" if not accum[tag]:\n", | |
" # If there is nothing in here, just set to the text\n", | |
" accum[tag] = tree.text\n", | |
" return accum\n", | |
"\n", | |
" qml_dict = internal_iter(tree=element_tree, accum={})\n", | |
" return qml_dict\n", | |
"\n", | |
"\n", | |
"class DictUnpickler(object):\n", | |
" \"\"\"\n", | |
" De-serializes a QuakeML string to an Obspy Catalog object, via an\n", | |
" intermediate dictionary.\n", | |
" \"\"\"\n", | |
" def __init__(self, xml_dict=None):\n", | |
" self.xml_dict = xml_dict\n", | |
"\n", | |
" def load(self, file):\n", | |
" \"\"\"\n", | |
" Reads QuakeML file into ObsPy catalog object.\n", | |
" :type file: str\n", | |
" :param file: File name to read.\n", | |
" :rtype: :class:`~obspy.core.event.Catalog`\n", | |
" :returns: ObsPy Catalog object.\n", | |
" \"\"\"\n", | |
" self.xml_dict = _make_dict_from_tree(\n", | |
" _xml_doc_from_anything(file))\n", | |
" self.xml_root = _xml_doc_from_anything(file)\n", | |
" return self._deserialize()\n", | |
"\n", | |
" def loads(self, string):\n", | |
" \"\"\"\n", | |
" Parses QuakeML string into ObsPy catalog object.\n", | |
" :type string: str\n", | |
" :param string: QuakeML string to parse.\n", | |
" :rtype: :class:`~obspy.core.event.Catalog`\n", | |
" :returns: ObsPy Catalog object.\n", | |
" \"\"\"\n", | |
" self.xml_dict = _make_dict_from_tree(\n", | |
" etree.parse(io.BytesIO(string)))\n", | |
" self.xml_root = etree.parse(io.BytesIO(string))\n", | |
" return self._deserialize()\n", | |
"\n", | |
" def _deserialize(self):\n", | |
" try:\n", | |
" event_list = self.xml_dict['quakeml']['eventParameters']['event']\n", | |
" except KeyError:\n", | |
" raise Exception(\"Not a QuakeML compatible file or string\")\n", | |
" self._quakeml_namespaces = [\n", | |
" ns for ns in self.xml_root.nsmap.values()\n", | |
" if ns.startswith(r\"http://quakeml.org/xmlns/\")]\n", | |
" if not isinstance(event_list, list):\n", | |
" # Might not be a list if only one event present\n", | |
" event_list = [event_list]\n", | |
" catalog = Catalog(force_resource_id=False)\n", | |
" # add any custom namespace abbreviations of root element to Catalog\n", | |
" catalog.nsmap = self.xml_root.nsmap.copy()\n", | |
" catalog.description = self.xml_dict['quakeml']['eventParameters'].get(\n", | |
" 'description')\n", | |
" catalog.comments = self._comments(\n", | |
" self.xml_dict['quakeml']['eventParameters'])\n", | |
" catalog.creation_info = self._creation_info(\n", | |
" self.xml_dict['quakeml']['eventParameters'])\n", | |
" for event_dictionary in event_list:\n", | |
" # create new Event object\n", | |
" event = Event(\n", | |
" force_resource_id=False,\n", | |
" preferred_origin_id=event_dictionary.get('preferredOriginID'),\n", | |
" preferred_magnitude_id=event_dictionary.get(\n", | |
" 'preferredMagnitudeID'),\n", | |
" preferred_focal_mechanism_id=event_dictionary.get(\n", | |
" 'preferredFocalMechanismID'))\n", | |
" event_type = event_dictionary.get('type')\n", | |
" # Change for QuakeML 1.2RC4. 'null' is no longer acceptable as an\n", | |
" # event type. Will be replaced with 'not reported'.\n", | |
" if event_type == \"null\":\n", | |
" event_type = \"not reported\"\n", | |
" # USGS event types contain '_' which is not compliant with\n", | |
" # the QuakeML standard\n", | |
" if isinstance(event_type, str):\n", | |
" event_type = event_type.replace(\"_\", \" \")\n", | |
" try:\n", | |
" event.event_type = event_type\n", | |
" except ValueError:\n", | |
" msg = \"Event type '%s' does not comply \" % event_type\n", | |
" msg += \"with QuakeML standard -- event will be ignored.\"\n", | |
" warnings.warn(msg, UserWarning)\n", | |
" continue\n", | |
" self._set_enum('typeCertainty', event_dictionary, event,\n", | |
" 'event_type_certainty')\n", | |
" event.creation_info = self._creation_info(event_dictionary)\n", | |
" event.event_descriptions = self._event_description(event_dictionary)\n", | |
" event.comments = self._comments(event_dictionary)\n", | |
" # origins\n", | |
" event.origins = []\n", | |
" for origin_el in self._value_to_list('origin', event_dictionary):\n", | |
" # Have to be created before the origin is created to avoid a\n", | |
" # rare issue where a warning is read when the same event is\n", | |
" # read twice - the warnings does not occur if two referred\n", | |
" # to objects compare equal - for this the arrivals have to\n", | |
" # be bound to the event before the resource id is assigned.\n", | |
" arrivals = []\n", | |
" for arrival_el in self._value_to_list('arrival', origin_el):\n", | |
" arrival = self._arrival(arrival_el)\n", | |
" arrivals.append(arrival)\n", | |
"\n", | |
" origin = self._origin(origin_el, arrivals=arrivals)\n", | |
"\n", | |
" # append origin with arrivals\n", | |
" event.origins.append(origin)\n", | |
" # magnitudes\n", | |
" event.magnitudes = []\n", | |
" for magnitude_el in self._value_to_list(\n", | |
" 'magnitude', event_dictionary):\n", | |
" magnitude = self._magnitude(magnitude_el)\n", | |
" event.magnitudes.append(magnitude)\n", | |
" # station magnitudes\n", | |
" event.station_magnitudes = []\n", | |
" for magnitude_el in self._value_to_list(\n", | |
" 'stationMagnitude', event_dictionary):\n", | |
" magnitude = self._station_magnitude(magnitude_el)\n", | |
" event.station_magnitudes.append(magnitude)\n", | |
" # picks\n", | |
" event.picks = []\n", | |
" for pick_el in self._value_to_list('pick', event_dictionary):\n", | |
" pick = self._pick(pick_el)\n", | |
" event.picks.append(pick)\n", | |
" # amplitudes\n", | |
" event.amplitudes = []\n", | |
" for el in self._value_to_list('amplitude', event_dictionary):\n", | |
" amp = self._amplitude(el)\n", | |
" event.amplitudes.append(amp)\n", | |
" # focal mechanisms\n", | |
" event.focal_mechanisms = []\n", | |
" for fm_el in self._value_to_list(\n", | |
" 'focalMechanism', event_dictionary):\n", | |
" fm = self._focal_mechanism(fm_el)\n", | |
" event.focal_mechanisms.append(fm)\n", | |
" # finally append newly created event to catalog\n", | |
" event.resource_id = event_dictionary.get('publicID')\n", | |
" self._extra(event_dictionary, event)\n", | |
" catalog.append(event)\n", | |
" # return catalog\n", | |
" catalog.resource_id = self.xml_dict['quakeml']['eventParameters']['publicID']\n", | |
" return catalog\n", | |
"\n", | |
" def _attribute_to_value(self, key, dictionary, convert_to=str):\n", | |
" attribute = dictionary.get(key, None)\n", | |
" if attribute is None:\n", | |
" return None\n", | |
" if convert_to == bool:\n", | |
" if attribute.lower() in [\"true\", \"1\"]:\n", | |
" return True\n", | |
" elif attribute.lower() in [\"false\", \"0\"]:\n", | |
" return False\n", | |
" return None\n", | |
" try:\n", | |
" return convert_to(attribute)\n", | |
" except Exception:\n", | |
" msg = \"Could not convert %s to type %s. Returning None.\"\n", | |
" warnings.warn(msg % (attribute, convert_to))\n", | |
" return None\n", | |
"\n", | |
" def _value_to_list(self, key, dictionary):\n", | |
" \"\"\" Simple but repeated list checking. \"\"\"\n", | |
" attribute = dictionary.get(key)\n", | |
" if attribute is None:\n", | |
" return []\n", | |
" if not isinstance(attribute, list):\n", | |
" return [attribute]\n", | |
" return attribute\n", | |
"\n", | |
" def _set_enum(self, key_in, element, obj, key_out):\n", | |
" obj_type = obj._property_dict[key_out]\n", | |
" if not isinstance(obj_type, Enum): # pragma: no cover\n", | |
" raise ValueError\n", | |
" value = element.get(key_in)\n", | |
" try:\n", | |
" setattr(obj, key_out, value)\n", | |
" except ValueError as e:\n", | |
" msg = ('%s. The attribute \"%s\" will not be set and will be missing'\n", | |
" ' in the resulting object.' % (e.args[0], key_out))\n", | |
" warnings.warn(msg)\n", | |
"\n", | |
" def _comments(self, parent):\n", | |
" obj = []\n", | |
" comments = self._value_to_list('comment', parent)\n", | |
" for el in comments:\n", | |
" comment = Comment(force_resource_id=False)\n", | |
" comment.text = el.get('text')\n", | |
" comment.creation_info = self._creation_info(el)\n", | |
" comment.resource_id = el.get('id', None)\n", | |
" self._extra(el, comment)\n", | |
" obj.append(comment)\n", | |
" return obj\n", | |
"\n", | |
" def _station_magnitude_contributions(self, parent):\n", | |
" obj = []\n", | |
" for el in self._value_to_list(\"stationMagnitudeContribution\", parent):\n", | |
" contrib = StationMagnitudeContribution()\n", | |
" contrib.weight = self._attribute_to_value(\"weight\", el, float)\n", | |
" contrib.residual = self._attribute_to_value(\"residual\", el, float)\n", | |
" contrib.station_magnitude_id = \\\n", | |
" self._attribute_to_value(\"stationMagnitudeID\", el, str)\n", | |
" self._extra(el, contrib)\n", | |
" obj.append(contrib)\n", | |
" return obj\n", | |
"\n", | |
" def _creation_info(self, parent):\n", | |
" elements = self._value_to_list(\"creationInfo\", parent)\n", | |
" if len(elements) > 1:\n", | |
" raise NotImplementedError(\"Only one CreationInfo allowed.\")\n", | |
" elif len(elements) == 0:\n", | |
" return None\n", | |
" element = elements[0]\n", | |
" obj = CreationInfo()\n", | |
" obj.agency_uri = element.get('agencyURI')\n", | |
" obj.author_uri = element.get('authorURI')\n", | |
" obj.agency_id = element.get('agencyID')\n", | |
" obj.author = element.get('author')\n", | |
" try:\n", | |
" obj.creation_time = UTCDateTime(element.get('creationTime'))\n", | |
" except TypeError:\n", | |
" pass\n", | |
" obj.version = element.get('version')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _origin_quality(self, parent):\n", | |
" elements = self._value_to_list(\"quality\", parent)\n", | |
" if len(elements) > 1:\n", | |
" raise NotImplementedError(\"Only one OriginQuality allowed.\")\n", | |
" # Do not add an element if it is not given in the XML file.\n", | |
" elif len(elements) == 0:\n", | |
" return None\n", | |
" element = elements[0]\n", | |
" obj = OriginQuality()\n", | |
" obj.associated_phase_count = self._attribute_to_value(\n", | |
" 'associatedPhaseCount', element, int)\n", | |
" obj.used_phase_count = self._attribute_to_value(\n", | |
" 'usedPhaseCount', element, int)\n", | |
" obj.associated_station_count = self._attribute_to_value(\n", | |
" 'associatedStationCount', element, int)\n", | |
" obj.used_station_count = self._attribute_to_value(\n", | |
" 'usedStationCount', element, int)\n", | |
" obj.depth_phase_count = self._attribute_to_value(\n", | |
" 'depthPhaseCount', element, int)\n", | |
" obj.standard_error = self._attribute_to_value(\n", | |
" 'standardError', element, float)\n", | |
" obj.azimuthal_gap = self._attribute_to_value(\n", | |
" 'azimuthalGap', element, float)\n", | |
" obj.secondary_azimuthal_gap = self._attribute_to_value(\n", | |
" 'secondaryAzimuthalGap', element, float)\n", | |
" obj.ground_truth_level = self._attribute_to_value(\n", | |
" 'groundTruthLevel', element)\n", | |
" obj.minimum_distance = self._attribute_to_value(\n", | |
" 'minimumDistance', element, float)\n", | |
" obj.maximum_distance = self._attribute_to_value(\n", | |
" 'maximumDistance', element, float)\n", | |
" obj.median_distance = self._attribute_to_value(\n", | |
" 'medianDistance', element, float)\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _event_description(self, parent):\n", | |
" out = []\n", | |
" descriptions = self._value_to_list('description', parent)\n", | |
" for el in descriptions:\n", | |
" desc = EventDescription()\n", | |
" desc.text = el.get('text')\n", | |
" self._set_enum('type', el, desc, 'type')\n", | |
" out.append(desc)\n", | |
" self._extra(el, out[-1])\n", | |
" return out\n", | |
"\n", | |
" def _value(self, parent, name, quantity_type=float):\n", | |
" try:\n", | |
" el = self._value_to_list(name, parent)[0]\n", | |
" except IndexError:\n", | |
" return None, None\n", | |
"\n", | |
" value = self._attribute_to_value('value', el, quantity_type)\n", | |
" # All errors are QuantityError.\n", | |
" error = QuantityError()\n", | |
" # Don't set the errors if they are not set.\n", | |
" confidence_level = self._attribute_to_value(\n", | |
" 'confidenceLevel', el, float)\n", | |
" if confidence_level is not None:\n", | |
" error.confidence_level = confidence_level\n", | |
" if quantity_type != int:\n", | |
" uncertainty = self._attribute_to_value('uncertainty', el, float)\n", | |
" if uncertainty is not None:\n", | |
" error.uncertainty = uncertainty\n", | |
" lower_uncertainty = self._attribute_to_value(\n", | |
" 'lowerUncertainty', el, float)\n", | |
" if lower_uncertainty is not None:\n", | |
" error.lower_uncertainty = lower_uncertainty\n", | |
" upper_uncertainty = self._attribute_to_value(\n", | |
" 'upperUncertainty', el, float)\n", | |
" if upper_uncertainty is not None:\n", | |
" error.upper_uncertainty = upper_uncertainty\n", | |
" else:\n", | |
" uncertainty = self._attribute_to_value('uncertainty', el, int)\n", | |
" if uncertainty is not None:\n", | |
" error.uncertainty = uncertainty\n", | |
" lower_uncertainty = self._attribute_to_value(\n", | |
" 'lowerUncertainty', el, int)\n", | |
" if lower_uncertainty is not None:\n", | |
" error.lower_uncertainty = lower_uncertainty\n", | |
" upper_uncertainty = self._attribute_to_value(\n", | |
" 'upperUncertainty', el, int)\n", | |
" if upper_uncertainty is not None:\n", | |
" error.upper_uncertainty = upper_uncertainty\n", | |
" return value, error\n", | |
"\n", | |
" def _float_value(self, element, name):\n", | |
" return self._value(element, name, float)\n", | |
"\n", | |
" def _int_value(self, element, name):\n", | |
" return self._value(element, name, int)\n", | |
"\n", | |
" def _time_value(self, element, name):\n", | |
" return self._value(element, name, UTCDateTime)\n", | |
"\n", | |
" def _composite_times(self, parent):\n", | |
" obj = []\n", | |
" for el in self._value_to_list('compositeTime', parent):\n", | |
" ct = CompositeTime()\n", | |
" ct.year, ct.year_errors = self._int_value(el, 'year')\n", | |
" ct.month, ct.month_errors = self._int_value(el, 'month')\n", | |
" ct.day, ct.day_errors = self._int_value(el, 'day')\n", | |
" ct.hour, ct.hour_errors = self._int_value(el, 'hour')\n", | |
" ct.minute, ct.minute_errors = self._int_value(el, 'minute')\n", | |
" ct.second, ct.second_errors = self._float_value(el, 'second')\n", | |
" self._extra(el, ct)\n", | |
" obj.append(ct)\n", | |
" return obj\n", | |
"\n", | |
" def _confidence_ellipsoid(self, element):\n", | |
" obj = ConfidenceEllipsoid()\n", | |
" obj.semi_major_axis_length = self._attribute_to_value(\n", | |
" 'semiMajorAxisLength', element, float)\n", | |
" obj.semi_minor_axis_length = self._attribute_to_value(\n", | |
" 'semiMinorAxisLength', element, float)\n", | |
" obj.semi_intermediate_axis_length = self._attribute_to_value(\n", | |
" 'semiIntermediateAxisLength', element, float)\n", | |
" obj.major_axis_plunge = self._attribute_to_value(\n", | |
" 'majorAxisPlunge', element, float)\n", | |
" obj.major_axis_azimuth = self._attribute_to_value(\n", | |
" 'majorAxisAzimuth', element, float)\n", | |
" obj.major_axis_rotation = self._attribute_to_value(\n", | |
" 'majorAxisRotation', element, float)\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _origin_uncertainty(self, parent):\n", | |
" elements = self._value_to_list(\"originUncertainty\", parent)\n", | |
" if len(elements) > 1:\n", | |
" raise NotImplementedError(\"Only one OriginUncertainty allowed.\")\n", | |
" # Do not add an element if it is not given in the XML file.\n", | |
" elif len(elements) == 0:\n", | |
" return None\n", | |
" element = elements[0]\n", | |
" obj = OriginUncertainty()\n", | |
" self._set_enum('preferredDescription', element,\n", | |
" obj, 'preferred_description')\n", | |
" obj.horizontal_uncertainty = self._attribute_to_value(\n", | |
" 'horizontalUncertainty', element, float)\n", | |
" obj.min_horizontal_uncertainty = self._attribute_to_value(\n", | |
" 'minHorizontalUncertainty', element, float)\n", | |
" obj.max_horizontal_uncertainty = self._attribute_to_value(\n", | |
" 'maxHorizontalUncertainty', element, float)\n", | |
" obj.azimuth_max_horizontal_uncertainty = self._attribute_to_value(\n", | |
" 'azimuthMaxHorizontalUncertainty', element, float)\n", | |
" obj.confidence_level = self._attribute_to_value(\n", | |
" 'confidenceLevel', element, float)\n", | |
" ce_el = self._value_to_list('confidenceEllipsoid', element)\n", | |
" try:\n", | |
" ce_el = ce_el[0]\n", | |
" except IndexError:\n", | |
" obj.confidence_ellipsoid = ConfidenceEllipsoid()\n", | |
" else:\n", | |
" obj.confidence_ellipsoid = self._confidence_ellipsoid(ce_el)\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _waveform_ids(self, parent):\n", | |
" objs = []\n", | |
" for wid_el in self._value_to_list('waveformID', parent):\n", | |
" obj = WaveformStreamID()\n", | |
" obj.network_code = wid_el.get('networkCode')\n", | |
" obj.station_code = wid_el.get('stationCode')\n", | |
" obj.location_code = wid_el.get('locationCode')\n", | |
" obj.channel_code = wid_el.get('channelCode')\n", | |
" obj.resource_uri = wid_el.get('id')\n", | |
" objs.append(obj)\n", | |
" return objs\n", | |
"\n", | |
" def _waveform_id(self, parent):\n", | |
" try:\n", | |
" return self._waveform_ids(parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
"\n", | |
" def _arrival(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an Arrival object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Arrival`\n", | |
" \"\"\"\n", | |
" obj = Arrival(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.pick_id = element.get('pickID', '')\n", | |
" obj.phase = element.get('phase', '')\n", | |
" # optional parameter\n", | |
" obj.time_correction = self._attribute_to_value(\n", | |
" 'timeCorrection', element, float)\n", | |
" obj.azimuth = self._attribute_to_value('azimuth', element, float)\n", | |
" obj.distance = self._attribute_to_value('distance', element, float)\n", | |
" obj.takeoff_angle, obj.takeoff_angle_errors = self._float_value(\n", | |
" element, 'takeoffAngle')\n", | |
" obj.time_residual = self._attribute_to_value(\n", | |
" 'timeResidual', element, float)\n", | |
" obj.horizontal_slowness_residual = self._attribute_to_value(\n", | |
" 'horizontalSlownessResidual', element, float)\n", | |
" obj.backazimuth_residual = self._attribute_to_value(\n", | |
" 'backazimuthResidual', element, float)\n", | |
" obj.time_weight = self._attribute_to_value('timeWeight', element, float)\n", | |
" obj.horizontal_slowness_weight = self._attribute_to_value(\n", | |
" 'horizontalSlownessWeight', element, float)\n", | |
" obj.backazimuth_weight = self._attribute_to_value(\n", | |
" 'backazimuthWeight', element, float)\n", | |
" obj.earth_model_id = self._attribute_to_value('earthModelID', element)\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _pick(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a Pick object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Pick`\n", | |
" \"\"\"\n", | |
" obj = Pick(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.time, obj.time_errors = self._time_value(element, 'time')\n", | |
" obj.waveform_id = self._waveform_id(element)\n", | |
" # optional parameter\n", | |
" obj.filter_id = element.get('filterID')\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.horizontal_slowness, obj.horizontal_slowness_errors = \\\n", | |
" self._float_value(element, 'horizontalSlowness')\n", | |
" obj.backazimuth, obj.backazimuth_errors = \\\n", | |
" self._float_value(element, 'backazimuth')\n", | |
" obj.slowness_method_id = element.get('slownessMethodID')\n", | |
" self._set_enum('onset', element, obj, 'onset')\n", | |
" obj.phase_hint = element.get('phaseHint')\n", | |
" self._set_enum('polarity', element, obj, 'polarity')\n", | |
" self._set_enum('evaluationMode', element, obj, 'evaluation_mode')\n", | |
" self._set_enum('evaluationStatus', element, obj, 'evaluation_status')\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _time_window(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a TimeWindow object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.TimeWindow`\n", | |
" \"\"\"\n", | |
" obj = TimeWindow(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.begin = self._attribute_to_value('begin', element, convert_to=float)\n", | |
" obj.end = self._attribute_to_value('end', element, convert_to=float)\n", | |
" obj.reference = self._attribute_to_value('reference', element,\n", | |
" convert_to=UTCDateTime)\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _amplitude(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a Amplitude object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Amplitude`\n", | |
" \"\"\"\n", | |
" obj = Amplitude(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.generic_amplitude, obj.generic_amplitude_errors = \\\n", | |
" self._float_value(element, 'genericAmplitude')\n", | |
" # optional parameter\n", | |
" obj.type = element.get('type')\n", | |
" self._set_enum('category', element, obj, 'category')\n", | |
" self._set_enum('unit', element, obj, 'unit')\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.period, obj.period_errors = self._float_value(element, 'period')\n", | |
" obj.snr = element.get('snr')\n", | |
" time_window_el = self._value_to_list('timeWindow', element) or None\n", | |
" if time_window_el is not None:\n", | |
" obj.time_window = self._time_window(time_window_el[0])\n", | |
" obj.pick_id = element.get('pickID')\n", | |
" obj.waveform_id = self._waveform_id(element)\n", | |
" obj.filter_id = element.get('filterID')\n", | |
" obj.scaling_time, obj.scaling_time_errors = \\\n", | |
" self._time_value(element, 'scalingTime')\n", | |
" obj.magnitude_hint = element.get('magnitudeHint')\n", | |
" self._set_enum('evaluationMode', element, obj, 'evaluation_mode')\n", | |
" self._set_enum('evaluationStatus', element, obj, 'evaluation_status')\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _origin(self, element, arrivals):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an Origin object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Origin`\n", | |
" .. rubric:: Example\n", | |
" >>> from lxml import etree\n", | |
" >>> XML = b'''<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", | |
" ... <origin>\n", | |
" ... <latitude><value>34.23</value></latitude>\n", | |
" ... </origin>'''\n", | |
" >>> xml_doc = etree.fromstring(XML)\n", | |
" >>> unpickler = Unpickler(xml_doc)\n", | |
" >>> origin = unpickler._origin(xml_doc, arrivals=[])\n", | |
" >>> print(origin.latitude)\n", | |
" 34.23\n", | |
" \"\"\"\n", | |
" obj = Origin(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.time, obj.time_errors = self._time_value(element, 'time')\n", | |
" obj.latitude, obj.latitude_errors = self._float_value(\n", | |
" element, 'latitude')\n", | |
" obj.longitude, obj.longitude_errors = self._float_value(\n", | |
" element, 'longitude')\n", | |
" # optional parameter\n", | |
" obj.depth, obj.depth_errors = self._float_value(element, 'depth')\n", | |
" self._set_enum('depthType', element, obj, 'depth_type')\n", | |
" obj.time_fixed = self._attribute_to_value('timeFixed', element, bool)\n", | |
" obj.epicenter_fixed = self._attribute_to_value(\n", | |
" 'epicenterFixed', element, bool)\n", | |
" obj.reference_system_id = element.get('referenceSystemID')\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.earth_model_id = element.get('earthModelID')\n", | |
" obj.composite_times = self._composite_times(element)\n", | |
" obj.quality = self._origin_quality(element)\n", | |
" self._set_enum('type', element, obj, 'origin_type')\n", | |
" obj.region = element.get('region')\n", | |
" self._set_enum('evaluationMode', element, obj, 'evaluation_mode')\n", | |
" self._set_enum('evaluationStatus', element, obj, 'evaluation_status')\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.origin_uncertainty = self._origin_uncertainty(element)\n", | |
" obj.arrivals = arrivals\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _magnitude(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a Magnitude object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Magnitude`\n", | |
" .. rubric:: Example\n", | |
" >>> from lxml import etree\n", | |
" >>> XML = b'''<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", | |
" ... <magnitude>\n", | |
" ... <mag><value>3.2</value></mag>\n", | |
" ... </magnitude>'''\n", | |
" >>> xml_doc = etree.fromstring(XML)\n", | |
" >>> unpickler = Unpickler(xml_doc)\n", | |
" >>> magnitude = unpickler._magnitude(xml_doc)\n", | |
" >>> print(magnitude.mag)\n", | |
" 3.2\n", | |
" \"\"\"\n", | |
" obj = Magnitude(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.mag, obj.mag_errors = self._float_value(element, 'mag')\n", | |
" # optional parameter\n", | |
" obj.magnitude_type = element.get('type')\n", | |
" obj.origin_id = element.get('originID')\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.station_count = self._attribute_to_value(\n", | |
" 'stationCount', element, int)\n", | |
" obj.azimuthal_gap = self._attribute_to_value(\n", | |
" 'azimuthalGap', element, float)\n", | |
" self._set_enum('evaluationMode', element, obj, 'evaluation_mode')\n", | |
" self._set_enum('evaluationStatus', element, obj, 'evaluation_status')\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.station_magnitude_contributions = \\\n", | |
" self._station_magnitude_contributions(element)\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _station_magnitude(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a StationMagnitude object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.StationMagnitude`\n", | |
" .. rubric:: Example\n", | |
" >>> from lxml import etree\n", | |
" >>> XML = b'''<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", | |
" ... <stationMagnitude>\n", | |
" ... <mag><value>3.2</value></mag>\n", | |
" ... </stationMagnitude>'''\n", | |
" >>> xml_doc = etree.fromstring(XML)\n", | |
" >>> unpickler = Unpickler(xml_doc)\n", | |
" >>> station_mag = unpickler._station_magnitude(xml_doc)\n", | |
" >>> print(station_mag.mag)\n", | |
" 3.2\n", | |
" \"\"\"\n", | |
" obj = StationMagnitude(force_resource_id=False)\n", | |
" # required parameter\n", | |
" obj.origin_id = element.get('originID', '')\n", | |
" obj.mag, obj.mag_errors = self._float_value(element, 'mag')\n", | |
" # optional parameter\n", | |
" obj.station_magnitude_type = element.get('type')\n", | |
" obj.amplitude_id = element.get('amplitudeID')\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.waveform_id = self._waveform_id(element)\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _axis(self, parent, name):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an Axis object.\n", | |
" :type parent: etree.Element\n", | |
" :type name: str\n", | |
" :param name: tag name of axis\n", | |
" :rtype: :class:`~obspy.core.event.Axis`\n", | |
" \"\"\"\n", | |
" obj = Axis()\n", | |
" try:\n", | |
" sub_el = self._value_to_list(name, parent)[0]\n", | |
" except IndexError:\n", | |
" return obj\n", | |
" # required parameter\n", | |
" obj.azimuth, obj.azimuth_errors = self._float_value(sub_el, 'azimuth')\n", | |
" obj.plunge, obj.plunge_errors = self._float_value(sub_el, 'plunge')\n", | |
" obj.length, obj.length_errors = self._float_value(sub_el, 'length')\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _principal_axes(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an PrincipalAxes object.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.PrincipalAxes`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" sub_el = self._value_to_list('principalAxes', parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = PrincipalAxes()\n", | |
" # required parameter\n", | |
" obj.t_axis = self._axis(sub_el, 'tAxis')\n", | |
" obj.p_axis = self._axis(sub_el, 'pAxis')\n", | |
" # optional parameter\n", | |
" obj.n_axis = self._axis(sub_el, 'nAxis')\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _nodal_plane(self, parent, name):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an NodalPlane object.\n", | |
" :type parent: etree.Element\n", | |
" :type name: str\n", | |
" :param name: tag name of sub nodal plane\n", | |
" :rtype: :class:`~obspy.core.event.NodalPlane`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" sub_el = self._value_to_list(name, parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = NodalPlane()\n", | |
" # required parameter\n", | |
" obj.strike, obj.strike_errors = self._float_value(sub_el, 'strike')\n", | |
" obj.dip, obj.dip_errors = self._float_value(sub_el, 'dip')\n", | |
" obj.rake, obj.rake_errors = self._float_value(sub_el, 'rake')\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _nodal_planes(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an NodalPlanes object.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.NodalPlanes`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" sub_el = self._value_to_list('nodalPlanes', parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = NodalPlanes()\n", | |
" # optional parameter\n", | |
" obj.nodal_plane_1 = self._nodal_plane(sub_el, 'nodalPlane1')\n", | |
" obj.nodal_plane_2 = self._nodal_plane(sub_el, 'nodalPlane2')\n", | |
" # optional attribute\n", | |
" try:\n", | |
" obj.preferred_plane = int(sub_el.get('preferredPlane'))\n", | |
" except Exception:\n", | |
" obj.preferred_plane = None\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _source_time_function(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an SourceTimeFunction object.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.SourceTimeFunction`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" sub_el = self._value_to_list('sourceTimeFunction', parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = SourceTimeFunction()\n", | |
" # required parameters\n", | |
" self._set_enum('type', sub_el, obj, 'type')\n", | |
" obj.duration = self._attribute_to_value('duration', sub_el, float)\n", | |
" # optional parameter\n", | |
" obj.rise_time = self._attribute_to_value('riseTime', sub_el, float)\n", | |
" obj.decay_time = self._attribute_to_value('decayTime', sub_el, float)\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _tensor(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an Tensor object.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.Tensor`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" sub_el = self._value_to_list('tensor', parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = Tensor()\n", | |
" # required parameters\n", | |
" obj.m_rr, obj.m_rr_errors = self._float_value(sub_el, 'Mrr')\n", | |
" obj.m_tt, obj.m_tt_errors = self._float_value(sub_el, 'Mtt')\n", | |
" obj.m_pp, obj.m_pp_errors = self._float_value(sub_el, 'Mpp')\n", | |
" obj.m_rt, obj.m_rt_errors = self._float_value(sub_el, 'Mrt')\n", | |
" obj.m_rp, obj.m_rp_errors = self._float_value(sub_el, 'Mrp')\n", | |
" obj.m_tp, obj.m_tp_errors = self._float_value(sub_el, 'Mtp')\n", | |
" self._extra(sub_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _data_used(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a list of DataUsed objects.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: list of :class:`~obspy.core.event.DataUsed`\n", | |
" \"\"\"\n", | |
" obj = []\n", | |
" for el in self._value_to_list('dataUsed', parent):\n", | |
" data_used = DataUsed()\n", | |
" # required parameters\n", | |
" self._set_enum('waveType', el, data_used, 'wave_type')\n", | |
" # optional parameter\n", | |
" data_used.station_count = \\\n", | |
" self._attribute_to_value('stationCount', el, int)\n", | |
" data_used.component_count = \\\n", | |
" self._attribute_to_value('componentCount', el, int)\n", | |
" data_used.shortest_period = \\\n", | |
" self._attribute_to_value('shortestPeriod', el, float)\n", | |
" data_used.longest_period = \\\n", | |
" self._attribute_to_value('longestPeriod', el, float)\n", | |
"\n", | |
" self._extra(el, data_used)\n", | |
" obj.append(data_used)\n", | |
" return obj\n", | |
"\n", | |
" def _moment_tensor(self, parent):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into an MomentTensor object.\n", | |
" :type parent: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.MomentTensor`\n", | |
" \"\"\"\n", | |
" try:\n", | |
" mt_el = self._value_to_list('momentTensor', parent)[0]\n", | |
" except IndexError:\n", | |
" return None\n", | |
" obj = MomentTensor(force_resource_id=False)\n", | |
" # required parameters\n", | |
" obj.derived_origin_id = mt_el.get('derivedOriginID')\n", | |
" # optional parameter\n", | |
" obj.moment_magnitude_id = mt_el.get('momentMagnitudeID')\n", | |
" obj.scalar_moment, obj.scalar_moment_errors = \\\n", | |
" self._float_value(mt_el, 'scalarMoment')\n", | |
" obj.tensor = self._tensor(mt_el)\n", | |
" obj.variance = self._attribute_to_value('variance', mt_el, float)\n", | |
" obj.variance_reduction = \\\n", | |
" self._attribute_to_value('varianceReduction', mt_el, float)\n", | |
" obj.double_couple = self._attribute_to_value('doubleCouple', mt_el, float)\n", | |
" obj.clvd = self._attribute_to_value('clvd', mt_el, float)\n", | |
" obj.iso = self._attribute_to_value('iso', mt_el, float)\n", | |
" obj.greens_function_id = mt_el.get('greensFunctionID')\n", | |
" obj.filter_id = mt_el.get('filterID')\n", | |
" obj.source_time_function = self._source_time_function(mt_el)\n", | |
" obj.data_used = self._data_used(mt_el)\n", | |
" obj.method_id = mt_el.get('methodID')\n", | |
" self._set_enum('category', mt_el, obj, 'category')\n", | |
" self._set_enum('inversionType', mt_el, obj, 'inversion_type')\n", | |
" obj.creation_info = self._creation_info(mt_el)\n", | |
" obj.comments = self._comments(mt_el)\n", | |
" obj.resource_id = mt_el.get('publicID')\n", | |
" self._extra(mt_el, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _focal_mechanism(self, element):\n", | |
" \"\"\"\n", | |
" Converts an etree.Element into a FocalMechanism object.\n", | |
" :type element: etree.Element\n", | |
" :rtype: :class:`~obspy.core.event.FocalMechanism`\n", | |
" .. rubric:: Example\n", | |
" >>> from lxml import etree\n", | |
" >>> XML = b'''<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", | |
" ... <focalMechanism>\n", | |
" ... <methodID>smi:ISC/methodID=Best_double_couple</methodID>\n", | |
" ... </focalMechanism>'''\n", | |
" >>> xml_doc = etree.fromstring(XML)\n", | |
" >>> unpickler = Unpickler(xml_doc)\n", | |
" >>> fm = unpickler._focal_mechanism(xml_doc)\n", | |
" >>> print(fm.method_id)\n", | |
" smi:ISC/methodID=Best_double_couple\n", | |
" \"\"\"\n", | |
" obj = FocalMechanism(force_resource_id=False)\n", | |
" # required parameter\n", | |
" # optional parameter\n", | |
" obj.waveform_id = self._waveform_ids(element)\n", | |
" obj.triggering_origin_id = element.get('triggeringOriginID')\n", | |
" obj.azimuthal_gap = self._attribute_to_value('azimuthalGap', element, float)\n", | |
" obj.station_polarity_count = \\\n", | |
" self._attribute_to_value('stationPolarityCount', element, int)\n", | |
" obj.misfit = self._attribute_to_value('misfit', element, float)\n", | |
" obj.station_distribution_ratio = \\\n", | |
" self._attribute_to_value('stationDistributionRatio', element, float)\n", | |
" obj.method_id = element.get('methodID')\n", | |
" obj.moment_tensor = self._moment_tensor(element)\n", | |
" obj.nodal_planes = self._nodal_planes(element)\n", | |
" obj.principal_axes = self._principal_axes(element)\n", | |
" self._set_enum('evaluationMode', element, obj, 'evaluation_mode')\n", | |
" self._set_enum('evaluationStatus', element, obj, 'evaluation_status')\n", | |
" obj.creation_info = self._creation_info(element)\n", | |
" obj.comments = self._comments(element)\n", | |
" obj.resource_id = element.get('publicID')\n", | |
" self._extra(element, obj)\n", | |
" return obj\n", | |
"\n", | |
" def _extra(self, element, obj):\n", | |
" \"\"\"\n", | |
" Add information stored in custom tags/attributes in obj.extra.\n", | |
" \"\"\"\n", | |
" pass\n", | |
" # # search all namespaces in current scope\n", | |
" # for ns in self._value_to_list('namespace', element):\n", | |
" # # skip the two top-level quakeml namespaces,\n", | |
" # # we're not interested in quakeml defined tags here\n", | |
" # if ns in self._quakeml_namespaces:\n", | |
" # continue\n", | |
" # # process all elements of this custom namespace, if any\n", | |
" # for el in element.iterfind(\"{%s}*\" % ns):\n", | |
" # # remove namespace from tag name\n", | |
" # _, name = el.tag.split(\"}\")\n", | |
" # # check if element has children (nested tags)\n", | |
" # if len(el):\n", | |
" # sub_obj = AttribDict()\n", | |
" # self._extra(el, sub_obj)\n", | |
" # value = sub_obj.extra\n", | |
" # else:\n", | |
" # value = el.text\n", | |
" # try:\n", | |
" # extra = obj.setdefault(\"extra\", AttribDict())\n", | |
" # # Catalog object is not based on AttribDict..\n", | |
" # except AttributeError:\n", | |
" # if not isinstance(obj, Catalog):\n", | |
" # raise\n", | |
" # if hasattr(obj, \"extra\"):\n", | |
" # extra = obj.extra\n", | |
" # else:\n", | |
" # extra = AttribDict()\n", | |
" # obj.extra = extra\n", | |
" # extra[name] = {'value': value,\n", | |
" # 'namespace': '%s' % ns}\n", | |
" # if el.attrib:\n", | |
" # extra[name]['attrib'] = el.attrib\n", | |
" # # process all attributes of custom namespaces, if any\n", | |
" # for key, value in element.attrib.items():\n", | |
" # # no custom namespace\n", | |
" # if \"}\" not in key:\n", | |
" # continue\n", | |
" # # separate namespace from tag name\n", | |
" # ns, name = key.lstrip(\"{\").split(\"}\")\n", | |
" # try:\n", | |
" # extra = obj.setdefault(\"extra\", AttribDict())\n", | |
" # # Catalog object is not based on AttribDict..\n", | |
" # except AttributeError:\n", | |
" # if not isinstance(obj, Catalog):\n", | |
" # raise\n", | |
" # if hasattr(obj, \"extra\"):\n", | |
" # extra = obj.extra\n", | |
" # else:\n", | |
" # extra = AttribDict()\n", | |
" # obj.extra = extra\n", | |
" # extra[name] = {'value': str(value),\n", | |
" # 'namespace': '%s' % ns,\n", | |
" # 'type': 'attribute'}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Set-up tests" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1040 Event(s) in Catalog:\n", | |
"2014-01-07T03:34:22.525036Z | -40.141, +174.228 | 2.653115 M | automatic\n", | |
"2014-01-07T15:33:21.032438Z | -43.656, +172.382 | 2.5531339 M | automatic\n", | |
"...\n", | |
"2014-01-19T09:46:11.766661Z | -30.624, -178.559 | 4.3402718 M | manual\n", | |
"2014-01-19T22:48:40.033184Z | -31.856, +178.373 | 4.5172253 M | manual\n", | |
"To see all events call 'print(CatalogObject.__str__(print_all=True))'\n" | |
] | |
} | |
], | |
"source": [ | |
"from obspy import UTCDateTime\n", | |
"from obspy.clients.fdsn import Client\n", | |
"from obspy.io.quakeml.core import Unpickler\n", | |
"\n", | |
"client = Client(\"GEONET\")\n", | |
"catalog = client.get_events(\n", | |
" starttime=UTCDateTime(2014, 1, 7), endtime=UTCDateTime(2014, 1, 21))\n", | |
"print(catalog)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Initial tests on sub-catalogs - all take a while..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Testing for 1 events\n", | |
"Testing Obspy\n", | |
"165 ms ± 23.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"Testing DictUnpickler\n", | |
"68.1 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"Testing raw pickle\n", | |
"8.71 ms ± 89.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", | |
"Testing for 20 events\n", | |
"Testing Obspy\n", | |
"1.74 s ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing DictUnpickler\n", | |
"613 ms ± 5.34 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing raw pickle\n", | |
"112 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"Testing for 100 events\n", | |
"Testing Obspy\n", | |
"12.6 s ± 421 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing DictUnpickler\n", | |
"4.15 s ± 245 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing raw pickle\n", | |
"873 ms ± 136 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing for 200 events\n", | |
"Testing Obspy\n", | |
"19.3 s ± 487 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing DictUnpickler\n", | |
"8.11 s ± 32.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"Testing raw pickle\n", | |
"1.67 s ± 86.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"# Test against a raw pickle\n", | |
"import pickle\n", | |
"\n", | |
"def unpickle_cat(path):\n", | |
" with open(path, 'rb') as fi:\n", | |
" return pickle.load(fi)\n", | |
"\n", | |
"def pickle_cat(cat, path):\n", | |
" with open(path, 'wb') as fi:\n", | |
" pickle.dump(cat, fi)\n", | |
"\n", | |
"# Write some files\n", | |
"for n_events in [1, 20, 100, 200]:\n", | |
" print(\"Testing for {0} events\".format(n_events))\n", | |
" file_out = \"test_catalog_{0}.xml\".format(n_events)\n", | |
" pkl_out = \"test_catalog_{0}.pkl\".format(n_events)\n", | |
" catalog[0:n_events].write(file_out, format=\"QUAKEML\")\n", | |
" pickle_cat(catalog[0:n_events], pkl_out)\n", | |
" print(\"Testing Obspy\")\n", | |
" %timeit Unpickler().load(file_out)\n", | |
" print(\"Testing DictUnpickler\")\n", | |
" %timeit DictUnpickler().load(file_out)\n", | |
" print(\"Testing raw pickle\")\n", | |
" %timeit unpickle_cat(pkl_out)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This trend continues for larger catalogs. Obspy is slowest, pickle is fastest, but not portable (AFAIK).\n", | |
"\n", | |
"Lion wanted to know where the bottleneck is now - profiling using [snakeviz](https://jiffyclub.github.io/snakeviz/)." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" \n", | |
"*** Profile stats marshalled to file '/tmp/tmpfu2z5cvz'. \n" | |
] | |
} | |
], | |
"source": [ | |
"%load_ext snakeviz\n", | |
"\n", | |
"%snakeviz DictUnpickler().load(\"test_catalog_100.xml\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# SNAKEVIZ results\n", | |
"\n", | |
"I do not know how to include the snakeviz results here, but if you run the above cell you will get an interactive diagram.\n", | |
"\n", | |
"- `_make_dict_from_tree` is ~13% of the time\n", | |
"- `_deserialize` is ~77% of the time\n", | |
"- `_xml_doc_from_anything` is ~5% of the time\n", | |
"\n", | |
"@markcwill noted the [xmltodict](https://github.com/martinblech/xmltodict) package - this could replace `_make_dict_from_tree` and `_xml_doc_from_anything`. We should see if that is faster." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"xmltodict\n", | |
"1.36 s ± 7.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"_make_dict_from_tree\n", | |
"804 ms ± 46.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"import xmltodict\n", | |
"\n", | |
"print(\"xmltodict\")\n", | |
"%timeit xmltodict.parse(open(\"test_catalog_100.xml\").read())\n", | |
"\n", | |
"print(\"_make_dict_from_tree\")\n", | |
"%timeit _make_dict_from_tree(_xml_doc_from_anything(\"test_catalog_100.xml\"))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Conclusions\n", | |
"\n", | |
"Reading to dict first seems like the best option. The custom version of reading from xml to dictionary implemented here seems to be faster than `xmltodict`, but that is only one test, and I don't think my implementation is likely to be very stable (see custom hacks for [WavefromStreamID](https://github.com/obspy/obspy/blob/Quakeml-dictionary-reading/obspy/io/quakeml/core.py#L112))\n", | |
"\n", | |
"I'm sure we could speed up the deserialization as well (seeing as raw unpickling is so much faster)..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment