Skip to content

Instantly share code, notes, and snippets.

@Kronuz
Created September 12, 2012 20:48
Show Gist options
  • Save Kronuz/3709808 to your computer and use it in GitHub Desktop.
Save Kronuz/3709808 to your computer and use it in GitHub Desktop.
Geospatial support backported from 1.3.0 (only python bindings tested/work atm)
diff --git a/xapian-bindings/xapian.i b/xapian-bindings/xapian.i
index 7b608e4..25ea850 100644
--- a/xapian-bindings/xapian.i
+++ b/xapian-bindings/xapian.i
@@ -202,6 +202,7 @@ class ValueIterator {
%ignore Xapian::PostingSource::clone;
%ignore Xapian::PostingSource::serialise;
%ignore Xapian::PostingSource::unserialise;
+%ignore Xapian::PostingSource::unserialise_with_registry;
%ignore Xapian::PostingSource::register_matcher_;
%include <xapian/postingsource.h>
@@ -805,6 +806,15 @@ class Remote {
%feature("director") Xapian::Compactor;
%include <xapian/compactor.h>
+#if defined SWIGPYTHON
+// from xapian/geospatial.h
+%feature("director") Xapian::LatLongMetric;
+%ignore Xapian::LatLongMetric::clone;
+%ignore Xapian::LatLongMetric::unserialise;
+%ignore Xapian::LatLongCoordsIterator::operator++;
+%include <xapian/geospatial.h>
+#endif
+
namespace Xapian {
#if defined SWIGPYTHON || defined SWIGRUBY
diff --git a/xapian-core/Makefile.am b/xapian-core/Makefile.am
index 3beedc3..00fd696 100644
--- a/xapian-core/Makefile.am
+++ b/xapian-core/Makefile.am
@@ -148,6 +148,7 @@ include backends/Makefile.mk
include common/Makefile.mk
include examples/Makefile.mk
include expand/Makefile.mk
+include geospatial/Makefile.mk
include include/Makefile.mk
include languages/Makefile.mk
include matcher/Makefile.mk
diff --git a/xapian-core/api/postingsource.cc b/xapian-core/api/postingsource.cc
index 518f73a..01eaffc 100644
--- a/xapian-core/api/postingsource.cc
+++ b/xapian-core/api/postingsource.cc
@@ -102,6 +102,13 @@ PostingSource::unserialise(const string &) const
throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
}
+PostingSource *
+PostingSource::unserialise_with_registry(const std::string &s,
+ const Registry &) const
+{
+ return unserialise(s);
+}
+
string
PostingSource::get_description() const
{
diff --git a/xapian-core/api/registry.cc b/xapian-core/api/registry.cc
index 0d900ec..cbefb55 100644
--- a/xapian-core/api/registry.cc
+++ b/xapian-core/api/registry.cc
@@ -24,6 +24,7 @@
#include "xapian/registry.h"
#include "xapian/error.h"
+#include "xapian/geospatial.h"
#include "xapian/matchspy.h"
#include "xapian/postingsource.h"
#include "xapian/weight.h"
@@ -154,11 +155,25 @@ Registry::get_match_spy(const string & name) const
RETURN(lookup_object(internal->matchspies, name));
}
+void
+Registry::register_lat_long_metric(const Xapian::LatLongMetric &metric)
+{
+ LOGCALL_VOID(API, "Xapian::Registry::register_lat_long_metric", metric.name());
+ register_object(internal->lat_long_metrics, metric);
+}
+
+const Xapian::LatLongMetric *
+Registry::get_lat_long_metric(const string & name) const
+{
+ LOGCALL(API, const Xapian::MatchSpy *, "Xapian::Registry::get_lat_long_metric", name);
+ RETURN(lookup_object(internal->lat_long_metrics, name));
+}
Registry::Internal::Internal()
: Xapian::Internal::RefCntBase(),
wtschemes(),
- postingsources()
+ postingsources(),
+ lat_long_metrics()
{
add_defaults();
}
@@ -168,6 +183,7 @@ Registry::Internal::~Internal()
clear_weighting_schemes();
clear_posting_sources();
clear_match_spies();
+ clear_lat_long_metrics();
}
void
@@ -190,10 +206,18 @@ Registry::Internal::add_defaults()
postingsources[source->name()] = source;
source = new Xapian::FixedWeightPostingSource(0.0);
postingsources[source->name()] = source;
+ source = new Xapian::LatLongDistancePostingSource(0,
+ Xapian::LatLongCoords(),
+ Xapian::GreatCircleMetric());
+ postingsources[source->name()] = source;
Xapian::MatchSpy * spy;
spy = new Xapian::ValueCountMatchSpy();
matchspies[spy->name()] = spy;
+
+ Xapian::LatLongMetric * metric;
+ metric = new Xapian::GreatCircleMetric();
+ lat_long_metrics[metric->name()] = metric;
}
void
@@ -223,4 +247,13 @@ Registry::Internal::clear_match_spies()
}
}
+void
+Registry::Internal::clear_lat_long_metrics()
+{
+ map<string, Xapian::LatLongMetric *>::const_iterator i;
+ for (i = lat_long_metrics.begin(); i != lat_long_metrics.end(); ++i) {
+ delete i->second;
+ }
+}
+
}
diff --git a/xapian-core/common/output.h b/xapian-core/common/output.h
index 802403a..575c28c 100644
--- a/xapian-core/common/output.h
+++ b/xapian-core/common/output.h
@@ -6,6 +6,7 @@
* Copyright 2002 Ananova Ltd
* Copyright 2002,2003,2004,2007,2009,2011 Olly Betts
* Copyright 2007 Lemur Consulting Ltd
+ * Copyright 2010 Richard Boulton
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -54,6 +55,10 @@ XAPIAN_OUTPUT_FUNCTION(Xapian::ESetIterator)
XAPIAN_OUTPUT_FUNCTION(Xapian::ESet)
XAPIAN_OUTPUT_FUNCTION(Xapian::Enquire)
+#include <xapian/geospatial.h>
+XAPIAN_OUTPUT_FUNCTION(Xapian::LatLongCoord)
+XAPIAN_OUTPUT_FUNCTION(Xapian::LatLongCoords)
+
#include <xapian/stem.h>
XAPIAN_OUTPUT_FUNCTION(Xapian::Stem)
diff --git a/xapian-core/common/registryinternal.h b/xapian-core/common/registryinternal.h
index bfcb6e4..1347350 100644
--- a/xapian-core/common/registryinternal.h
+++ b/xapian-core/common/registryinternal.h
@@ -32,6 +32,7 @@ namespace Xapian {
class Weight;
class PostingSource;
class MatchSpy;
+ class LatLongMetric;
}
class Xapian::Registry::Internal : public Xapian::Internal::RefCntBase {
@@ -46,6 +47,9 @@ class Xapian::Registry::Internal : public Xapian::Internal::RefCntBase {
/// Registered match spies.
std::map<std::string, Xapian::MatchSpy *> matchspies;
+ /// Registered lat-long metrics.
+ std::map<std::string, Xapian::LatLongMetric *> lat_long_metrics;
+
/// Add the standard subclasses provided in the API.
void add_defaults();
@@ -58,6 +62,9 @@ class Xapian::Registry::Internal : public Xapian::Internal::RefCntBase {
/// Clear all registered match spies.
void clear_match_spies();
+ /// Clear all registered lat-long metrics.
+ void clear_lat_long_metrics();
+
public:
Internal();
~Internal();
diff --git a/xapian-core/docs/geospatial.rst b/xapian-core/docs/geospatial.rst
new file mode 100644
index 0000000..a640fcf
--- /dev/null
+++ b/xapian-core/docs/geospatial.rst
@@ -0,0 +1,216 @@
+.. Copyright (C) 2008 Lemur Consulting Ltd
+
+================================
+Geospatial searching with Xapian
+================================
+
+.. contents:: Table of contents
+
+Introduction
+============
+
+This document describes a set of features present in Xapian which are designed
+to allow geospatial searches to be supported. Currently, the geospatial
+support allows sets of locations to be stored associated with each document, as
+latitude/longitude coordinates, and allows searches to be restricted or
+reordered on the basis of distance from a second set of locations.
+
+Three types of geospatial searches are supported:
+
+ - Returning a list of documents in order of distance from a query location.
+ This may be used in conjunction with any Xapian query.
+
+ - Returning a list of documents within a given distance of a query location.
+ This may be used in conjunction with any other Xapian query, and with any
+ Xapian sort order.
+
+ - Returning a set of documents in a combined order based on distance from a
+ query location, and relevance.
+
+Locations are stored in value slots, allowing multiple independent locations to
+be used for a single document. It is also possible to store multiple
+coordinates in a single value slot, in which case the closest coordinate will
+be used for distance calculations.
+
+Metrics
+=======
+
+A metric is a function which calculates the distance between two points.
+
+Calculating the exact distance between two geographical points is an involved
+subject. In fact, even defining the meaning of a geographical point is very
+hard to do precisely - not only do you need to define a mathematical projection
+used to calculate the coordinates, you also need to choose a model of the shape
+of the Earth, and identify a few sample points to identify the coordinates of
+particular locations. Since the Earth is constantly changing shape, these
+coordinates also need to be defined at a particular date.
+
+There are a few standard datums which define all these - a very common datum is
+the WGS84 datum, which is the datum used by the GPS system. Unless you have a
+good reason not to, we recommend using the WGS84 datum, since this will ensure
+that preset parameters of the functions built in to Xapian will have the
+correct values (currently, the only such parameter is the Earth radius used by
+the GreatCircleMetric, but more may be added in future).
+
+Since there are lots of ways of calculating distances between two points, using
+different assumptions about the approximations which are valid, Xapian allows
+user-implemented metrics. These are subclasses of the Xapian::LatLongMetric
+class; see the API documentation for details on how to implement the various
+required methods.
+
+There is currently only one built-in metric - the GreatCircleMetric. As the
+name suggests, this calculates the distance between a latitude and longitude
+based on the assumption that the world is a perfect sphere. The radius of the
+world can be specified as a constructor parameter, but defaults to a reasonable
+approximation of the radius of the Earth. The calculation uses the Haversine
+formula, which is accurate for points which are close together, but can have
+significant error for coordinates which are on opposite sides of the sphere: on
+the other hand, such points are likely to be at the end of a ranked list of
+search results, so this probably doesn't matter.
+
+Indexing
+========
+
+To index a set of documents with location, you need to store serialised
+latitude-longitude coordinates in a value slot in your documents. To do this,
+use the LatLongCoord class. For example, this is how you might store a
+latitude and longitude corresponding to "London" in value slot 0::
+
+ Xapian::Document doc;
+ doc.add_value(0, Xapian::LatLongCoord(51.53, 0.08).serialise());
+
+Of course, often a location is a bit more complicated than a single point - for
+example, postcode regions in the UK can cover a fairly wide area. If a search
+were to treat such a location as a single point, the distances returned could
+be incorrect by as much as a couple of miles. Xapian therefore allows you to
+store a set of points in a single slot - the distance calculation will return
+the distance to the closest of these points. This is often a good enough work
+around for this problem - if you require greater accuracy, you will need to
+filter the results after they are returned from Xapian.
+
+To store multiple coordinates in a single slot, use the LatLongCoords class::
+
+ Xapian::Document doc;
+ Xapian::LatLongCoords coords;
+ coords.insert(Xapian::LatLongCoord(51.53, 0.08));
+ coords.insert(Xapian::LatLongCoord(51.51, 0.07));
+ coords.insert(Xapian::LatLongCoord(51.52, 0.09));
+ doc.add_value(0, coords.serialise());
+
+(Note that the serialised form of a LatLongCoords object containing a single
+coordinate is exactly the same as the serialised form of the corresponding
+LatLongCoord object.)
+
+Searching
+=========
+
+Sorting results by distance
+---------------------------
+
+If you simply want your results to be returned in order of distance, you can
+use the LatLongDistanceKeyMaker class to calculate sort keys. For example, to
+return results in order of distance from the coordinate (51.00, 0.50), based on
+the values stored in slot 0, and using the great-circle distance::
+
+ Xapian::Database db("my_database");
+ Xapian::Enquire enq(db);
+ enq.set_query(Xapian::Query("my_query"));
+ GreatCircleMetric metric;
+ LatLongCoord centre(51.00, 0.50);
+ Xapian::LatLongDistanceKeyMaker keymaker(0, centre, metric);
+ enq.set_sort_by_key(keymaker, False);
+
+Filtering results by distance
+-----------------------------
+
+To return only those results within a given distance, you can use the
+LatLongDistancePostingSource. For example, to return only those results within
+5 miles of coordinate (51.00, 0.50), based on the values stored in slot 0, and
+using the great-circle distance::
+
+ Xapian::Database db("my_database");
+ Xapian::Enquire enq(db);
+ Xapian::Query q("my_query");
+ GreatCircleMetric metric;
+ LatLongCoord centre(51.00, 0.50);
+ double max_range = Xapian::miles_to_metres(5);
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, max_range)
+ q = Xapian::Query(Xapian::Query::OP_FILTER, q, Xapian::Query(ps));
+ enq.set_query(q);
+
+Ranking results on a combination of distance and relevance
+----------------------------------------------------------
+
+To return results ranked by a combination of their relevance and their
+distance, you can also use the LatLongDistancePostingSource. Beware that
+getting the right balance of weights is tricky: there is little solid
+theoretical basis for this, so the best approach is often to try various
+different parameters, evalutate the results, and settle on the best. The
+LatLongDistancePostingSource returns a weight of 1.0 for a document which is at
+the specified location, and a lower, but always positive, weight for points
+further away. It has two parameters, k1 and k2, which control how fast the
+weight decays, which can be specified to the constructor (but aren't in this
+example) - see the API documentation for details of these parameters.::
+
+ Xapian::Database db("my_database");
+ Xapian::Enquire enq(db);
+ Xapian::Query q("my_query");
+ GreatCircleMetric metric;
+ LatLongCoord centre(51.00, 0.50);
+ double max_range = Xapian::miles_to_metres(5);
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, max_range)
+ q = Xapian::Query(Xapian::Query::AND, q, Xapian::Query(ps));
+ enq.set_query(q);
+
+
+Performance
+===========
+
+The location information associated with each document is stored in a document
+value. This allows it to be looked up quickly at search time, so that the
+exact distance from the query location can be calculated. However, this method
+requires that the distance of each potential match is checked, which can be
+expensive.
+
+To gain a performance boost, it is possible to store additional terms in
+documents to identify regions at various scales. There are various ways to
+generate such terms (for example, the O-QTM algorithm referenced below).
+However, the encoding for coodinates that Xapian uses has some nice properties
+which help here. Specifically, the standard encoded form for a coordinate used
+is a 6 byte representation, which identifies a point on the surface of the
+earth to an accuracy of 1/16 of a second (ie, at worst slightly less than 2
+meter accuracy). However, this representation can be truncated to 2 bytes to
+represent a bounding box 1 degree on a side, or to 3, 4 or 5 bytes to get
+successively more accurate bounding boxes.
+
+It would therefore be possible to gain considerable efficiency for range
+restricted searches by storing terms holding each of these successively more
+accurate representations, and to construct a query combining an appropriate set
+of these terms to ensure that only documents which are potentially in a range
+of interest are considered.
+
+It is entirely possible that a more efficient implementation could be performed
+using "R trees" or "KD trees" (or one of the many other tree structures used
+for geospatial indexing - see http://en.wikipedia.org/wiki/Spatial_index for a
+list of some of these). However, using the QTM approach will require minimal
+effort and make use of the existing, and well tested, Xapian database.
+Additionally, by simply generating special terms to restrict the search, the
+existing optimisations of the Xapian query parser are taken advantage of.
+
+References
+==========
+
+The following may be of interest.
+
+The O-QTM algorithm is described in "Dutton, G. (1996). Encoding and handling
+geospatial data with hierarchical triangular meshes. In Kraak, M.J. and
+Molenaar, M. (eds.) Advances in GIS Research II. London: Taylor & Francis,
+505-518." , a copy of which is available from
+http://www.spatial-effects.com/papers/conf/GDutton_SDH96.pdf
+
+Some of the geometry needed to calculate the correct set of QTM IDs to cover a
+particular region is detailed in
+ftp://ftp.research.microsoft.com/pub/tr/tr-2005-123.pdf
+
+Also, see:
+http://www.sdss.jhu.edu/htm/doc/c++/htmInterface.html
diff --git a/xapian-core/geospatial/.gitignore b/xapian-core/geospatial/.gitignore
new file mode 100644
index 0000000..1e2a9f5
--- /dev/null
+++ b/xapian-core/geospatial/.gitignore
@@ -0,0 +1,6 @@
+/.*.sw?
+/*.lo
+/*.obj
+/.deps
+/.libs
+/.dirstamp
diff --git a/xapian-core/geospatial/Makefile b/xapian-core/geospatial/Makefile
new file mode 100644
index 0000000..312b949
--- /dev/null
+++ b/xapian-core/geospatial/Makefile
@@ -0,0 +1,9 @@
+# Makefile for use in directories built by non-recursive make.
+
+SHELL = /bin/sh
+
+all check:
+ cd .. && $(MAKE) $@
+
+clean:
+ rm -f *.o *.obj *.lo
diff --git a/xapian-core/geospatial/Makefile.mk b/xapian-core/geospatial/Makefile.mk
new file mode 100644
index 0000000..07e619d
--- /dev/null
+++ b/xapian-core/geospatial/Makefile.mk
@@ -0,0 +1,13 @@
+EXTRA_DIST += \
+ geospatial/dir_contents \
+ geospatial/Makefile
+
+noinst_HEADERS +=\
+ geospatial/geoencode.h
+
+lib_src += \
+ geospatial/geoencode.cc \
+ geospatial/latlongcoord.cc \
+ geospatial/latlong_distance_keymaker.cc \
+ geospatial/latlong_metrics.cc \
+ geospatial/latlong_posting_source.cc
diff --git a/xapian-core/geospatial/dir_contents b/xapian-core/geospatial/dir_contents
new file mode 100644
index 0000000..8baeff7
--- /dev/null
+++ b/xapian-core/geospatial/dir_contents
@@ -0,0 +1,5 @@
+<Directory>geospatial</Directory>
+
+<Description>
+Support for geospatial matching, and parsing of locations.
+</Description>
diff --git a/xapian-core/geospatial/geoencode.cc b/xapian-core/geospatial/geoencode.cc
new file mode 100644
index 0000000..4eaabb9
--- /dev/null
+++ b/xapian-core/geospatial/geoencode.cc
@@ -0,0 +1,268 @@
+/** @file geoencode.cc
+ * @brief Encodings for geospatial coordinates.
+ */
+/* Copyright (C) 2011 Richard Boulton
+ * Based closely on a python version, copyright (C) 2010 Olly Betts
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <config.h>
+#include "geoencode.h"
+
+#include <cmath>
+
+using namespace std;
+
+/** Angles, split into degrees, minutes and seconds.
+ *
+ * Only designed to work with positive angles.
+ */
+struct DegreesMinutesSeconds {
+ /** Number of degrees.
+ *
+ * Range 0 <= degrees <= 180 for latitude, 0 <= degrees < 360 for
+ * longitude.
+ */
+ int degrees;
+
+ /** Number of minutes: 0 to 59 */
+ int minutes;
+
+ /** Number of seconds: 0 to 59 */
+ int seconds;
+
+ /** Number of 16ths of a second: 0 to 15 */
+ int sec16ths;
+
+ /** Initialise with a (positive) angle, as an integer representing the
+ * number of 16ths of a second, rounding to nearest.
+ *
+ * The range of valid angles is assumed to be 0 <= angle in degrees < 360,
+ * so range of angle_16th_secs is 0..20735999, which fits easily into a 32
+ * bit int. (Latitudes are represented in the range 0 <= angle <= 180,
+ * where 0 is the south pole.)
+ */
+ DegreesMinutesSeconds(int angle_16th_secs) {
+ degrees = angle_16th_secs / (3600 * 16);
+ angle_16th_secs = angle_16th_secs % (3600 * 16);
+ minutes = angle_16th_secs / (60 * 16);
+ angle_16th_secs = angle_16th_secs % (60 * 16);
+ seconds = angle_16th_secs / 16;
+ sec16ths = angle_16th_secs % 16;
+ }
+};
+
+bool
+GeoEncode::encode(double lat, double lon, string & result)
+{
+ // Check range of latitude.
+ if (rare(lat < -90.0 || lat > 90.0)) {
+ return false;
+ }
+
+ // Wrap longitude to range [0,360).
+ lon = fmod(lon, 360.0);
+ if (lon < 0) {
+ lon += 360;
+ }
+
+ int lat_16ths, lon_16ths;
+ lat_16ths = round((lat + 90.0) * 57600.0);
+ if (lat_16ths == 0 || lat_16ths == 57600 * 180) {
+ lon_16ths = 0;
+ } else {
+ lon_16ths = round(lon * 57600.0);
+ if (lon_16ths == 57600 * 360) {
+ lon_16ths = 0;
+ }
+ }
+
+ DegreesMinutesSeconds lat_dms(lat_16ths);
+ DegreesMinutesSeconds lon_dms(lon_16ths);
+
+ size_t old_len = result.size();
+ result.resize(old_len + 6);
+
+ // Add degrees parts as first two bytes.
+ unsigned dd = lat_dms.degrees + lon_dms.degrees * 181;
+ // dd is in range 0..180*360+359 = 0..65159
+ result[old_len] = char(dd >> 8);
+ result[old_len + 1] = char(dd & 0xff);
+
+ // Add minutes next; 4 bits from each in the first byte.
+ result[old_len + 2] = char(((lat_dms.minutes / 4) << 4) |
+ (lon_dms.minutes / 4)
+ );
+
+ result[old_len + 3] = char(
+ ((lat_dms.minutes % 4) << 6) |
+ ((lon_dms.minutes % 4) << 4) |
+ ((lat_dms.seconds / 15) << 2) |
+ (lon_dms.seconds / 15)
+ );
+
+ result[old_len + 4] = char(
+ ((lat_dms.seconds % 15) << 4) |
+ (lon_dms.seconds % 15)
+ );
+
+ result[old_len + 5] = char(
+ (lat_dms.sec16ths << 4) |
+ lon_dms.sec16ths
+ );
+
+ return true;
+}
+
+void
+GeoEncode::decode(const char * value, size_t len,
+ double & lat_ref, double & lon_ref)
+{
+ const unsigned char * ptr
+ = reinterpret_cast<const unsigned char *>(value);
+ unsigned tmp = (ptr[0] & 0xff) << 8 | (ptr[1] & 0xff);
+ lat_ref = tmp % 181;
+ lon_ref = tmp / 181;
+ if (len > 2) {
+ tmp = ptr[2];
+ double lat_m = (tmp >> 4) * 4;
+ double lon_m = (tmp & 0xf) * 4;
+
+ if (len > 3) {
+ tmp = ptr[3];
+ lat_m += (tmp >> 6) & 3;
+ lon_m += (tmp >> 4) & 3;
+ double lat_s = ((tmp >> 2) & 3) * 15;
+ double lon_s = (tmp & 3) * 15;
+
+ if (len > 4) {
+ tmp = ptr[4];
+ lat_s += (tmp >> 4) & 0xf;
+ lon_s += tmp & 0xf;
+
+ if (len > 5) {
+ tmp = ptr[5];
+ lat_s += ((tmp >> 4) / 16.0);
+ lon_s += ((tmp & 0xf) / 16.0);
+ }
+ }
+
+ lat_m += lat_s / 60.0;
+ lon_m += lon_s / 60.0;
+ }
+
+ lat_ref += lat_m / 60.0;
+ lon_ref += lon_m / 60.0;
+ }
+
+ lat_ref -= 90.0;
+}
+
+/// Calc latitude and longitude in integral number of 16ths of a second
+static void
+calc_latlon_16ths(double lat, double lon, int & lat_16ths, int & lon_16ths)
+{
+ lat_16ths = round((lat + 90.0) * 57600.0);
+ lon_16ths = round(lon * 57600.0);
+ if (lon_16ths == 57600 * 360) {
+ lon_16ths = 0;
+ }
+}
+
+GeoEncode::DecoderWithBoundingBox::DecoderWithBoundingBox
+(double lat1, double lon1_, double lat2, double lon2_)
+ : lon1(lon1_), lon2(lon2_),
+ min_lat(lat1), max_lat(lat2),
+ include_poles(false)
+{
+ // Wrap longitudes to range [0,360).
+ lon1 = fmod(lon1, 360.0);
+ if (lon1 < 0) {
+ lon1 += 360;
+ }
+
+ lon2 = fmod(lon2, 360.0);
+ if (lon2 < 0) {
+ lon2 += 360;
+ }
+
+ // Calculate start1
+ int lat_16ths, lon_16ths;
+ calc_latlon_16ths(lat1, lon1, lat_16ths, lon_16ths);
+ if (lat_16ths == 0 || lat_16ths == 57600 * 180) {
+ include_poles = true;
+ }
+ unsigned dd = lat_16ths / (3600 * 16) + (lon_16ths / (3600 * 16)) * 181;
+ start1 = char(dd >> 8);
+
+ calc_latlon_16ths(lat2, lon2, lat_16ths, lon_16ths);
+ if (lat_16ths == 0 || lat_16ths == 57600 * 180) {
+ include_poles = true;
+ }
+ dd = lat_16ths / (3600 * 16) + (lon_16ths / (3600 * 16)) * 181;
+ start2 = char(dd >> 8);
+
+ discontinuous_longitude_range = (lon1 > lon2);
+}
+
+bool
+GeoEncode::DecoderWithBoundingBox::decode(const std::string & value,
+ double & lat_ref,
+ double & lon_ref) const
+{
+ unsigned char start = value[0];
+ if (discontinuous_longitude_range) {
+ // start must be outside range of (start2..start1)
+ // (start2 will be > start1)
+ if (start2 < start && start < start1) {
+ if (!(include_poles && start == 0))
+ return false;
+ }
+ } else {
+ // start must be inside range of [start1..start2] (inclusive of ends).
+ if (start < start1 || start2 < start) {
+ if (!(include_poles && start == 0))
+ return false;
+ }
+ }
+ double lat, lon;
+ GeoEncode::decode(value, lat, lon);
+ if (lat < min_lat || lat > max_lat) {
+ return false;
+ }
+ if (lat == -90 || lat == 90) {
+ // It's a pole, so the longitude isn't meaningful (will be zero)
+ // and we've already checked that the latitude is in range.
+ lat_ref = lat;
+ lon_ref = 0;
+ return true;
+ }
+ if (discontinuous_longitude_range) {
+ if (lon2 < lon && lon < lon1)
+ return false;
+ } else {
+ if (lon < lon1 || lon2 < lon)
+ return false;
+ }
+
+ lat_ref = lat;
+ lon_ref = lon;
+ return true;
+}
diff --git a/xapian-core/geospatial/geoencode.h b/xapian-core/geospatial/geoencode.h
new file mode 100644
index 0000000..3d0308a
--- /dev/null
+++ b/xapian-core/geospatial/geoencode.h
@@ -0,0 +1,159 @@
+/** @file geoencode.h
+ * @brief Encodings for geospatial coordinates.
+ */
+/* Copyright (C) 2011 Richard Boulton
+ * Based closely on a python version, copyright (C) 2010 Olly Betts
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef GEOENCODE_INCLUDED_H
+#define GEOENCODE_INCLUDED_H
+
+#include <string>
+
+namespace GeoEncode {
+
+/** Encode a coordinate and append it to a string.
+ *
+ * @param lat The latitude coordinate in degrees (ranging from -90 to +90)
+ * @param lon The longitude coordinate in degrees (any range is valid -
+ * longitudes will be wrapped). Note that decoding will return
+ * longitudes in the range 0 <= value < 360.
+ * @param result The string to append the result to.
+ *
+ * @returns true if the encoding was successful, false if there was an error.
+ * If there was an error, the result value will be unmodified. The only cause
+ * of error is out-of-range latitudes. If there was no error, the string will
+ * have been extended by 6 bytes.
+ */
+extern bool
+encode(double lat, double lon, std::string & result);
+
+/** Decode a coordinate from a buffer.
+ *
+ * @param value A pointer to the start of the buffer to decode.
+ * @param len The length of the buffer in bytes. The buffer must be at least 2
+ * bytes long (this constraint is not checked).
+ * @param lat_ref A reference to a value to return the latitude in.
+ * @param lon_ref A reference to a value to return the longitude in.
+ *
+ * @returns The decoded coordinate.
+ *
+ * No errors will be returned; any junk at the end of the value (ie, after the
+ * first 6 bytes) will be ignored, and it is possible for invalid inputs to
+ * result in out-of-range longitudes.
+ */
+extern void
+decode(const char * value, size_t len, double & lat_ref, double & lon_ref);
+
+/** Decode a coordinate from a string.
+ *
+ * @param value The string to decode. This must be at least 2 bytes long (this
+ * constraint is not checked).
+ * @param lat_ref A reference to a value to return the latitude in.
+ * @param lon_ref A reference to a value to return the longitude in.
+ *
+ * @returns The decoded coordinate.
+ *
+ * No errors will be returned; any junk at the end of the value (ie, after the
+ * first 6 bytes) will be ignored, and it is possible for invalid inputs to
+ * result in out-of-range longitudes.
+ */
+inline void
+decode(const std::string & value, double & lat_ref, double & lon_ref)
+{
+ return GeoEncode::decode(value.data(), value.size(), lat_ref, lon_ref);
+}
+
+/** A class for decoding coordinates within a bounding box.
+ *
+ * This class aborts decoding if it is easily able to determine that the
+ * encoded coordinate supplied is outside the bounding box, avoiding some
+ * unnecessary work.
+ */
+class DecoderWithBoundingBox {
+ /** Longitude at western edge of bounding box.
+ */
+ double lon1;
+
+ /** Longitude at eastern edge of bounding box.
+ */
+ double lon2;
+
+ /** Minimum latitude in bounding box.
+ */
+ double min_lat;
+
+ /** Maximum latitude in bounding box.
+ */
+ double max_lat;
+
+ /** First byte of encoded form of coordinates with lon1.
+ */
+ unsigned char start1;
+
+ /** First byte of encoded form of coordinates with lon2.
+ */
+ unsigned char start2;
+
+ /** True if either of the poles are included in the range.
+ */
+ bool include_poles;
+
+ /** Flag; true if the longitude range is discontinuous (ie, goes over the
+ * boundary at which longitudes wrap from 360 to 0).
+ */
+ bool discontinuous_longitude_range;
+
+ public:
+ /** Create a decoder with a bounding box.
+ *
+ * The decoder will decode any encoded coordinates which lie inside the
+ * bounding box, and return false for any which lie outside the bounding
+ * box.
+ *
+ * @param lat1 The latitude of the southern edge of the bounding box.
+ * @param lon1 The longitude of the western edge of the bounding box.
+ * @param lat2 The latitude of the northern edge of the bounding box.
+ * @param lon2 The longitude of the eastern edge of the bounding box.
+ */
+ DecoderWithBoundingBox(double lat1, double lon1, double lat2, double lon2);
+
+ /** Decode a coordinate.
+ *
+ * @param value The coordinate to decode.
+ * @param lat_ref A reference to a value to return the latitude in.
+ * @param lon_ref A reference to a value to return the longitude in.
+ *
+ * @returns true if the coordinate was in the bounding box (in which case,
+ * @a result will have been updated to contain the coordinate),
+ * or false if the coordinate is outside the bounding box.
+ *
+ * Note; if this returns false, the values of @a lat_ref and @a lon_ref
+ * may not have been updated, or may have been updated to incorrect
+ * values, due to aborting decoding of the coordinate part-way through.
+ */
+ bool decode(const std::string & value,
+ double & lat_ref, double & lon_ref) const;
+};
+
+}
+
+#endif /* GEOENCODE_INCLUDED_H */
diff --git a/xapian-core/geospatial/latlong_distance_keymaker.cc b/xapian-core/geospatial/latlong_distance_keymaker.cc
new file mode 100644
index 0000000..1a27fd8
--- /dev/null
+++ b/xapian-core/geospatial/latlong_distance_keymaker.cc
@@ -0,0 +1,49 @@
+/** @file latlong_distance_keymaker.cc
+ * @brief LatLongDistanceKeyMaker implementation.
+ */
+/* Copyright 2008 Lemur Consulting Ltd
+ * Copyright 2011 Richard Boulton
+ * Copyright 2012 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#include <config.h>
+
+#include "xapian/geospatial.h"
+#include "xapian/document.h"
+#include "xapian/queryparser.h" // For sortable_serialise.
+
+using namespace Xapian;
+using namespace std;
+
+string
+LatLongDistanceKeyMaker::operator()(const Document &doc) const
+{
+ string val(doc.get_value(slot));
+ if (val.empty()) {
+ return defkey;
+ }
+ LatLongCoords doccoords;
+ doccoords.unserialise(val);
+ double distance = (*metric)(centre, doccoords);
+ return sortable_serialise(distance);
+}
+
+LatLongDistanceKeyMaker::~LatLongDistanceKeyMaker()
+{
+ delete metric;
+}
diff --git a/xapian-core/geospatial/latlong_metrics.cc b/xapian-core/geospatial/latlong_metrics.cc
new file mode 100644
index 0000000..9078e74
--- /dev/null
+++ b/xapian-core/geospatial/latlong_metrics.cc
@@ -0,0 +1,166 @@
+/** @file latlong_metrics.cc
+ * @brief Geospatial distance metrics.
+ */
+/* Copyright 2008 Lemur Consulting Ltd
+ * Copyright 2011 Richard Boulton
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#include <config.h>
+
+#include "xapian/geospatial.h"
+#include "xapian/error.h"
+#include "serialise-double.h"
+
+#include <cmath>
+
+using namespace Xapian;
+using namespace std;
+
+/** Quadratic mean radius of the Earth in metres.
+ */
+#define QUAD_EARTH_RADIUS_METRES 6372797.6
+
+/** Set M_PI if it's not already set.
+ */
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+LatLongMetric::~LatLongMetric()
+{
+}
+
+double
+LatLongMetric::operator()(const LatLongCoords & a,
+ const LatLongCoords &b) const
+{
+ if (a.empty() || b.empty()) {
+ throw InvalidArgumentError("Empty coordinate list supplied to LatLongMetric::operator()().");
+ }
+ double min_dist = 0.0;
+ bool have_min = false;
+ for (LatLongCoordsIterator a_iter = a.begin();
+ a_iter != a.end();
+ ++a_iter)
+ {
+ for (LatLongCoordsIterator b_iter = b.begin();
+ b_iter != b.end();
+ ++b_iter)
+ {
+ double dist = pointwise_distance(*a_iter, *b_iter);
+ if (!have_min) {
+ min_dist = dist;
+ have_min = true;
+ } else if (dist < min_dist) {
+ min_dist = dist;
+ }
+ }
+ }
+ return min_dist;
+}
+
+double
+LatLongMetric::operator()(const LatLongCoords & a,
+ const char * b_ptr, size_t b_len) const
+{
+ if (a.empty() || b_len == 0) {
+ throw InvalidArgumentError("Empty coordinate list supplied to LatLongMetric::operator()().");
+ }
+ double min_dist = 0.0;
+ bool have_min = false;
+ LatLongCoord b;
+ const char * b_end = b_ptr + b_len;
+ while (b_ptr != b_end) {
+ b.unserialise(&b_ptr, b_end);
+ for (LatLongCoordsIterator a_iter = a.begin();
+ a_iter != a.end();
+ ++a_iter)
+ {
+ double dist = pointwise_distance(*a_iter, b);
+ if (!have_min) {
+ min_dist = dist;
+ have_min = true;
+ } else if (dist < min_dist) {
+ min_dist = dist;
+ }
+ }
+ }
+ return min_dist;
+}
+
+
+GreatCircleMetric::GreatCircleMetric()
+ : radius(QUAD_EARTH_RADIUS_METRES)
+{}
+
+GreatCircleMetric::GreatCircleMetric(double radius_)
+ : radius(radius_)
+{}
+
+double
+GreatCircleMetric::pointwise_distance(const LatLongCoord & a,
+ const LatLongCoord & b) const
+{
+ double lata = a.latitude * (M_PI / 180.0);
+ double latb = b.latitude * (M_PI / 180.0);
+
+ double latdiff = lata - latb;
+ double longdiff = (a.longitude - b.longitude) * (M_PI / 180.0);
+
+ double sin_half_lat = sin(latdiff / 2);
+ double sin_half_long = sin(longdiff / 2);
+ double h = sin_half_lat * sin_half_lat +
+ sin_half_long * sin_half_long * cos(lata) * cos(latb);
+ if (rare(h > 1.0)) {
+ // Clamp to 1.0, asin(1.0) = M_PI / 2.0.
+ return radius * M_PI;
+ }
+ return 2 * radius * asin(sqrt(h));
+}
+
+LatLongMetric *
+GreatCircleMetric::clone() const
+{
+ return new GreatCircleMetric(radius);
+}
+
+string
+GreatCircleMetric::name() const
+{
+ return "Xapian::GreatCircleMetric";
+}
+
+string
+GreatCircleMetric::serialise() const
+{
+ return serialise_double(radius);
+}
+
+LatLongMetric *
+GreatCircleMetric::unserialise(const string & s) const
+{
+ const char * p = s.data();
+ const char * end = p + s.size();
+
+ double new_radius = unserialise_double(&p, end);
+ if (p != end) {
+ throw Xapian::NetworkError("Bad serialised GreatCircleMetric - junk at end");
+ }
+
+ return new GreatCircleMetric(new_radius);
+}
diff --git a/xapian-core/geospatial/latlong_posting_source.cc b/xapian-core/geospatial/latlong_posting_source.cc
new file mode 100644
index 0000000..97583de
--- /dev/null
+++ b/xapian-core/geospatial/latlong_posting_source.cc
@@ -0,0 +1,260 @@
+/** @file latlong_posting_source.cc
+ * @brief LatLongPostingSource implementation.
+ */
+/* Copyright 2008 Lemur Consulting Ltd
+ * Copyright 2010,2011 Richard Boulton
+ * Copyright 2012 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#include <config.h>
+
+#include "xapian/geospatial.h"
+
+#include "xapian/error.h"
+#include "xapian/registry.h"
+
+#include "common/serialise.h"
+#include "serialise-double.h"
+#include "str.h"
+
+#include <cmath>
+
+using namespace Xapian;
+using namespace std;
+
+static double
+weight_from_distance(double dist, double k1, double k2)
+{
+ return k1 * pow(dist + k1, -k2);
+}
+
+void
+LatLongDistancePostingSource::calc_distance()
+{
+ dist = (*metric)(centre, *value_it);
+}
+
+/// Validate the parameters supplied to LatLongDistancePostingSource.
+static void
+validate_postingsource_params(double k1, double k2) {
+ if (k1 <= 0) {
+ string msg("k1 parameter to LatLongDistancePostingSource must be "
+ "greater than 0; was ");
+ msg += str(k1);
+ throw InvalidArgumentError(msg);
+ }
+ if (k2 <= 0) {
+ string msg("k2 parameter to LatLongDistancePostingSource must be "
+ "greater than 0; was ");
+ msg += str(k2);
+ throw InvalidArgumentError(msg);
+ }
+}
+
+LatLongDistancePostingSource::LatLongDistancePostingSource(
+ valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric * metric_,
+ double max_range_,
+ double k1_,
+ double k2_)
+ : ValuePostingSource(slot_),
+ centre(centre_),
+ metric(metric_),
+ max_range(max_range_),
+ k1(k1_),
+ k2(k2_)
+{
+ validate_postingsource_params(k1, k2);
+ set_maxweight(weight_from_distance(0, k1, k2));
+}
+
+LatLongDistancePostingSource::LatLongDistancePostingSource(
+ valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric & metric_,
+ double max_range_,
+ double k1_,
+ double k2_)
+ : ValuePostingSource(slot_),
+ centre(centre_),
+ metric(metric_.clone()),
+ max_range(max_range_),
+ k1(k1_),
+ k2(k2_)
+{
+ validate_postingsource_params(k1, k2);
+ set_maxweight(weight_from_distance(0, k1, k2));
+}
+
+LatLongDistancePostingSource::~LatLongDistancePostingSource()
+{
+ delete metric;
+}
+
+void
+LatLongDistancePostingSource::next(double min_wt)
+{
+ ValuePostingSource::next(min_wt);
+
+ while (value_it != db.valuestream_end(slot)) {
+ calc_distance();
+ if (max_range == 0 || dist <= max_range)
+ break;
+ ++value_it;
+ }
+}
+
+void
+LatLongDistancePostingSource::skip_to(docid min_docid,
+ double min_wt)
+{
+ ValuePostingSource::skip_to(min_docid, min_wt);
+
+ while (value_it != db.valuestream_end(slot)) {
+ calc_distance();
+ if (max_range == 0 || dist <= max_range)
+ break;
+ ++value_it;
+ }
+}
+
+bool
+LatLongDistancePostingSource::check(docid min_docid,
+ double min_wt)
+{
+ if (!ValuePostingSource::check(min_docid, min_wt)) {
+ // check returned false, so we know the document is not in the source.
+ return false;
+ }
+ if (value_it == db.valuestream_end(slot)) {
+ // return true, since we're definitely at the end of the list.
+ return true;
+ }
+
+ calc_distance();
+ if (max_range > 0 && dist > max_range) {
+ return false;
+ }
+ return true;
+}
+
+double
+LatLongDistancePostingSource::get_weight() const
+{
+ return weight_from_distance(dist, k1, k2);
+}
+
+LatLongDistancePostingSource *
+LatLongDistancePostingSource::clone() const
+{
+ return new LatLongDistancePostingSource(slot, centre,
+ metric->clone(),
+ max_range, k1, k2);
+}
+
+string
+LatLongDistancePostingSource::name() const
+{
+ return "Xapian::LatLongDistancePostingSource";
+}
+
+string
+LatLongDistancePostingSource::serialise() const
+{
+ string serialised_centre = centre.serialise();
+ string metric_name = metric->name();
+ string serialised_metric = metric->serialise();
+
+ string result = encode_length(slot);
+ result += encode_length(serialised_centre.size());
+ result += serialised_centre;
+ result += encode_length(metric_name.size());
+ result += metric_name;
+ result += encode_length(serialised_metric.size());
+ result += serialised_metric;
+ result += serialise_double(max_range);
+ result += serialise_double(k1);
+ result += serialise_double(k2);
+ return result;
+}
+
+LatLongDistancePostingSource *
+LatLongDistancePostingSource::unserialise_with_registry(const string &s,
+ const Registry & registry) const
+{
+ const char * p = s.data();
+ const char * end = p + s.size();
+
+ valueno new_slot = decode_length(&p, end, false);
+ size_t len = decode_length(&p, end, true);
+ string new_serialised_centre(p, len);
+ p += len;
+ len = decode_length(&p, end, true);
+ string new_metric_name(p, len);
+ p += len;
+ len = decode_length(&p, end, true);
+ string new_serialised_metric(p, len);
+ p += len;
+ double new_max_range = unserialise_double(&p, end);
+ double new_k1 = unserialise_double(&p, end);
+ double new_k2 = unserialise_double(&p, end);
+ if (p != end) {
+ throw NetworkError("Bad serialised LatLongDistancePostingSource - junk at end");
+ }
+
+ LatLongCoords new_centre;
+ new_centre.unserialise(new_serialised_centre);
+
+ const Xapian::LatLongMetric * metric_type =
+ registry.get_lat_long_metric(new_metric_name);
+ if (metric_type == NULL) {
+ string msg("LatLongMetric ");
+ msg += new_metric_name;
+ msg += " not registered";
+ throw InvalidArgumentError(msg);
+ }
+ LatLongMetric * new_metric =
+ metric_type->unserialise(new_serialised_metric);
+
+ return new LatLongDistancePostingSource(new_slot, new_centre,
+ new_metric,
+ new_max_range, new_k1, new_k2);
+}
+
+void
+LatLongDistancePostingSource::init(const Database & db_)
+{
+ ValuePostingSource::init(db_);
+ if (max_range > 0.0) {
+ // Possible that no documents are in range.
+ termfreq_min = 0;
+ // Note - would be good to improve termfreq_est here, too, but
+ // I can't think of anything we can do with the information
+ // available.
+ }
+}
+
+string
+LatLongDistancePostingSource::get_description() const
+{
+ string result("Xapian::LatLongDistancePostingSource(slot=");
+ result += str(slot);
+ result += ")";
+ return result;
+}
diff --git a/xapian-core/geospatial/latlongcoord.cc b/xapian-core/geospatial/latlongcoord.cc
new file mode 100644
index 0000000..9504b27
--- /dev/null
+++ b/xapian-core/geospatial/latlongcoord.cc
@@ -0,0 +1,140 @@
+/** @file latlongcoord.cc
+ * @brief Latitude and longitude representations.
+ */
+/* Copyright 2008 Lemur Consulting Ltd
+ * Copyright 2011 Richard Boulton
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#include <config.h>
+
+#include "xapian/geospatial.h"
+#include "xapian/error.h"
+
+#include "geoencode.h"
+#include "common/serialise.h"
+#include "serialise-double.h"
+#include "str.h"
+
+#include <cmath>
+
+using namespace Xapian;
+using namespace std;
+
+LatLongCoord::LatLongCoord(double latitude_, double longitude_)
+ : latitude(latitude_),
+ longitude(longitude_)
+{
+ if (latitude < -90.0 || latitude > 90.0)
+ throw InvalidArgumentError("Latitude out-of-range");
+ longitude = fmod(longitude_, 360);
+ if (longitude < 0) longitude += 360;
+}
+
+void
+LatLongCoord::unserialise(const string & serialised)
+{
+ const char * ptr = serialised.data();
+ const char * end = ptr + serialised.size();
+ unserialise(&ptr, end);
+ if (ptr != end)
+ throw SerialisationError(
+ "Junk found at end of serialised LatLongCoord");
+}
+
+void
+LatLongCoord::unserialise(const char ** ptr, const char * end)
+{
+ size_t len = end - *ptr;
+ if (len < 2) {
+ latitude = 0;
+ longitude = 0;
+ return;
+ }
+ GeoEncode::decode(*ptr, end - *ptr, latitude, longitude);
+ if (len < 6) {
+ *ptr = end;
+ } else {
+ *ptr += 6;
+ }
+}
+
+string
+LatLongCoord::serialise() const
+{
+ string result;
+ GeoEncode::encode(latitude, longitude, result);
+ return result;
+}
+
+string
+LatLongCoord::get_description() const
+{
+ string res("Xapian::LatLongCoord(");
+ res += str(latitude);
+ res += ", ";
+ res += str(longitude);
+ res += ")";
+ return res;
+}
+
+void
+LatLongCoords::unserialise(const string & serialised)
+{
+ const char * ptr = serialised.data();
+ const char * end_ptr = ptr + serialised.size();
+ coords.clear();
+ while (ptr != end_ptr) {
+ coords.push_back(LatLongCoord());
+ coords.back().unserialise(&ptr, end_ptr);
+ }
+ if (ptr != end_ptr) {
+ throw SerialisationError("Junk found at end of serialised "
+ "LatLongCoords");
+ }
+}
+
+string
+LatLongCoords::serialise() const
+{
+ string result;
+ vector<LatLongCoord>::const_iterator coord;
+ for (coord = coords.begin(); coord != coords.end(); ++coord)
+ {
+ GeoEncode::encode(coord->latitude, coord->longitude, result);
+ }
+ return result;
+}
+
+string
+LatLongCoords::get_description() const
+{
+ string res("Xapian::LatLongCoords(");
+ vector<LatLongCoord>::const_iterator coord;
+ for (coord = coords.begin(); coord != coords.end(); ++coord) {
+ if (coord != coords.begin()) {
+ res += ", ";
+ }
+ res += "(";
+ res += str(coord->latitude);
+ res += ", ";
+ res += str(coord->longitude);
+ res += ")";
+ }
+ res += ")";
+ return res;
+}
diff --git a/xapian-core/include/Makefile.mk b/xapian-core/include/Makefile.mk
index 987ceb5..4037521 100644
--- a/xapian-core/include/Makefile.mk
+++ b/xapian-core/include/Makefile.mk
@@ -35,6 +35,7 @@ xapianinclude_HEADERS =\
include/xapian/unicode.h\
include/xapian/valueiterator.h\
include/xapian/valuesetmatchdecider.h\
+ include/xapian/geospatial.h\
include/xapian/visibility.h\
include/xapian/weight.h
diff --git a/xapian-core/include/xapian.h b/xapian-core/include/xapian.h
index 3d2a529..d59cac7 100644
--- a/xapian-core/include/xapian.h
+++ b/xapian-core/include/xapian.h
@@ -72,6 +72,9 @@
// Unicode support
#include <xapian/unicode.h>
+// Geospatial
+#include <xapian/geospatial.h>
+
// Database compaction and merging
#include <xapian/compactor.h>
diff --git a/xapian-core/include/xapian/geospatial.h b/xapian-core/include/xapian/geospatial.h
new file mode 100644
index 0000000..c8a5ec8
--- /dev/null
+++ b/xapian-core/include/xapian/geospatial.h
@@ -0,0 +1,565 @@
+/** @file geospatial.h
+ * @brief Geospatial search support routines.
+ */
+/* Copyright 2008,2009 Lemur Consulting Ltd
+ * Copyright 2010,2011 Richard Boulton
+ * Copyright 2012 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#ifndef XAPIAN_INCLUDED_GEOSPATIAL_H
+#define XAPIAN_INCLUDED_GEOSPATIAL_H
+
+#include <iterator>
+#include <vector>
+#include <string>
+
+#include <xapian/derefwrapper.h>
+#include <xapian/keymaker.h>
+#include <xapian/postingsource.h>
+#include <xapian/queryparser.h> // For sortable_serialise
+#include <xapian/visibility.h>
+
+namespace Xapian {
+
+class Registry;
+
+/** Convert from miles to metres.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ */
+inline double
+miles_to_metres(double miles)
+{
+ return 1609.344 * miles;
+}
+
+/** Convert from metres to miles.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ */
+inline double
+metres_to_miles(double metres)
+{
+ return metres * (1.0 / 1609.344);
+}
+
+/** A latitude-longitude coordinate.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ *
+ * Note that latitude-longitude coordinates are only precisely meaningful if
+ * the datum used to define them is specified. This class ignores this
+ * issue - it is up to the caller to ensure that the datum used for each
+ * coordinate in a system is consistent.
+ */
+struct XAPIAN_VISIBILITY_DEFAULT LatLongCoord {
+ /** A latitude, as decimal degrees.
+ *
+ * Should be in the range -90 <= latitude <= 90
+ *
+ * Positive latitudes represent the northern hemisphere.
+ */
+ double latitude;
+
+ /** A longitude, as decimal degrees.
+ *
+ * Will be wrapped around, so for example, -150 is equal to 210. When
+ * obtained from a serialised form, will be in the range 0 <= longitude <
+ * 360.
+ *
+ * Longitudes increase as coordinates move eastwards.
+ */
+ double longitude;
+
+ /** Construct an uninitialised coordinate.
+ */
+ LatLongCoord() {}
+
+ /** Construct a coordinate.
+ *
+ * If the supplied longitude is out of the standard range, it will be
+ * normalised to the range 0 <= longitude < 360.
+ *
+ * If you want to avoid the checks (for example, you know that your values
+ * are already in range), you can use the alternate constructor to
+ * construct an uninitialised coordinate, and then set the latitude and
+ * longitude directly.
+ *
+ * @exception InvalidArgumentError the supplied latitude is out of range.
+ */
+ LatLongCoord(double latitude_, double longitude_);
+
+ /** Unserialise a string and set this object to its coordinate.
+ *
+ * @param serialised the string to unserialise the coordinate from.
+ *
+ * @exception Xapian::SerialisationError if the string does not contain
+ * a valid serialised latitude-longitude pair, or contains extra data at
+ * the end of it.
+ */
+ void unserialise(const std::string & serialised);
+
+ /** Unserialise a buffer and set this object to its coordinate.
+ *
+ * The buffer may contain further data after that for the coordinate.
+ *
+ * @param ptr A pointer to the start of the string. This will be updated
+ * to point to the end of the data representing the coordinate.
+ * @param end A pointer to the end of the string.
+ *
+ * @exception Xapian::SerialisationError if the string does not start with
+ * a valid serialised latitude-longitude pair.
+ */
+ void unserialise(const char ** ptr, const char * end);
+
+ /** Return a serialised representation of the coordinate.
+ */
+ std::string serialise() const;
+
+ /** Compare with another LatLongCoord.
+ */
+ bool operator<(const LatLongCoord & other) const
+ {
+ if (latitude < other.latitude) return true;
+ if (latitude > other.latitude) return false;
+ return (longitude < other.longitude);
+ }
+
+ /// Return a string describing this object.
+ std::string get_description() const;
+};
+
+/** An iterator across the values in a LatLongCoords object.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ */
+class XAPIAN_VISIBILITY_DEFAULT LatLongCoordsIterator {
+ /// Friend class which needs to be able to construct us.
+ friend class LatLongCoords;
+
+ /// The current position of the iterator.
+ std::vector<LatLongCoord>::const_iterator iter;
+
+ /// Constructor used by LatLongCoords.
+ LatLongCoordsIterator(std::vector<LatLongCoord>::const_iterator iter_)
+ : iter(iter_) {}
+
+ public:
+ /// Default constructor. Produces an uninitialised iterator.
+ LatLongCoordsIterator() {}
+
+ const LatLongCoord & operator *() const {
+ return *iter;
+ }
+
+ LatLongCoordsIterator & operator++() {
+ ++iter;
+ return *this;
+ }
+
+ DerefWrapper_<LatLongCoord> operator++(int) {
+ const LatLongCoord & tmp = **this;
+ ++iter;
+ return DerefWrapper_<LatLongCoord>(tmp);
+ }
+
+ bool operator==(const LatLongCoordsIterator &other) const
+ {
+ return iter == other.iter;
+ }
+
+ // Allow use as an STL iterator
+ typedef std::input_iterator_tag iterator_category;
+ typedef LatLongCoord value_type;
+ typedef size_t difference_type;
+ typedef LatLongCoord * pointer;
+ typedef LatLongCoord & reference;
+};
+
+/** A sequence of latitude-longitude coordinates.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ */
+class XAPIAN_VISIBILITY_DEFAULT LatLongCoords {
+ /// The coordinates.
+ std::vector<LatLongCoord> coords;
+
+ public:
+ /// Get a begin iterator for the coordinates.
+ LatLongCoordsIterator begin() const {
+ return LatLongCoordsIterator(coords.begin());
+ }
+
+ /// Get an end iterator for the coordinates.
+ LatLongCoordsIterator end() const {
+ return LatLongCoordsIterator(coords.end());
+ }
+
+ /// Get the number of coordinates in the container.
+ size_t size() const
+ {
+ return coords.size();
+ }
+
+ /// Return true if and only if there are no coordinates in the container.
+ bool empty() const
+ {
+ return coords.empty();
+ }
+
+ /// Append a coordinate to the end of the sequence.
+ void append(const LatLongCoord & coord)
+ {
+ coords.push_back(coord);
+ }
+
+ /// Construct an empty container.
+ LatLongCoords() : coords() {}
+
+ /// Construct a container holding one coordinate.
+ LatLongCoords(const LatLongCoord & coord) : coords()
+ {
+ coords.push_back(coord);
+ }
+
+ /** Unserialise a string and set this object to the coordinates in it.
+ *
+ * @param serialised the string to unserialise the coordinates from.
+ *
+ * @exception Xapian::SerialisationError if the string does not contain
+ * a valid serialised latitude-longitude pair, or contains junk at the end
+ * of it.
+ */
+ void unserialise(const std::string & serialised);
+
+ /** Return a serialised form of the coordinate list.
+ */
+ std::string serialise() const;
+
+ /// Return a string describing this object.
+ std::string get_description() const;
+};
+
+/// Inequality test for LatLongCoordsIterator objects.
+inline bool
+operator!=(const LatLongCoordsIterator &a, const LatLongCoordsIterator &b)
+{
+ return !(a == b);
+}
+
+/** Base class for calculating distances between two lat/long coordinates.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ */
+class XAPIAN_VISIBILITY_DEFAULT LatLongMetric {
+ public:
+ /// Destructor.
+ virtual ~LatLongMetric();
+
+ /** Return the distance between two coordinates, in metres.
+ */
+ virtual double pointwise_distance(const LatLongCoord & a,
+ const LatLongCoord & b) const = 0;
+
+ /** Return the distance between two coordinate lists, in metres.
+ *
+ * The distance between the coordinate lists is defined to be the minimum
+ * pairwise distance between coordinates in the lists.
+ *
+ * @exception InvalidArgumentError either of the lists is empty.
+ *
+ * @param a The first coordinate list.
+ * @param b The second coordinate list.
+ */
+ double operator()(const LatLongCoords & a, const LatLongCoords & b) const;
+
+ /** Return the distance between two coordinate lists, in metres.
+ *
+ * One of the coordinate lists is supplied in serialised form.
+ *
+ * The distance between the coordinate lists is defined to be the minimum
+ * pairwise distance between coordinates in the lists.
+ *
+ * @exception InvalidArgumentError either of the lists is empty.
+ *
+ * @param a The first coordinate list.
+ * @param b The second coordinate list, in serialised form.
+ */
+ double operator()(const LatLongCoords & a, const std::string & b) const
+ {
+ return (*this)(a, b.data(), b.size());
+ }
+
+ /** Return the distance between two coordinate lists, in metres.
+ *
+ * One of the coordinate lists is supplied in serialised form.
+ *
+ * The distance between the coordinate lists is defined to be the minimum
+ * pairwise distance between coordinates in the lists.
+ *
+ * @exception InvalidArgumentError either of the lists is empty.
+ *
+ * @param a The first coordinate list.
+ * @param b_ptr The start of the serialised form of the second coordinate
+ * list.
+ * @param b_len The length of the serialised form of the second coordinate
+ * list.
+ */
+ double operator()(const LatLongCoords & a,
+ const char * b_ptr, size_t b_len) const;
+
+ /** Clone the metric. */
+ virtual LatLongMetric * clone() const = 0;
+
+ /** Return the full name of the metric.
+ *
+ * This is used when serialising and unserialising metrics; for example,
+ * for performing remote searches.
+ *
+ * If the subclass is in a C++ namespace, the namespace should be included
+ * in the name, using "::" as a separator. For example, for a
+ * LatLongMetric subclass called "FooLatLongMetric" in the "Xapian"
+ * namespace the result of this call should be "Xapian::FooLatLongMetric".
+ */
+ virtual std::string name() const = 0;
+
+ /** Serialise object parameters into a string.
+ *
+ * The serialised parameters should represent the configuration of the
+ * metric.
+ */
+ virtual std::string serialise() const = 0;
+
+ /** Create object given string serialisation returned by serialise().
+ *
+ * @param s A serialised instance of this LatLongMetric subclass.
+ */
+ virtual LatLongMetric * unserialise(const std::string & s) const = 0;
+};
+
+/** Calculate the great-circle distance between two coordinates on a sphere.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ *
+ * This uses the haversine formula to calculate the distance. Note that this
+ * formula is subject to inaccuracy due to numerical errors for coordinates on
+ * the opposite side of the sphere.
+ *
+ * See http://en.wikipedia.org/wiki/Haversine_formula
+ */
+class XAPIAN_VISIBILITY_DEFAULT GreatCircleMetric : public LatLongMetric {
+ /** The radius of the sphere in metres.
+ */
+ double radius;
+
+ public:
+ /** Construct a GreatCircleMetric.
+ *
+ * The (quadratic mean) radius of the Earth will be used by this
+ * calculator.
+ */
+ GreatCircleMetric();
+
+ /** Construct a GreatCircleMetric using a specified radius.
+ *
+ * This is useful for data sets in which the points are not on Earth (eg,
+ * a database of features on Mars).
+ *
+ * @param radius_ The radius of the sphere to use, in metres.
+ */
+ explicit GreatCircleMetric(double radius_);
+
+ /** Return the great-circle distance between points on the sphere.
+ */
+ double pointwise_distance(const LatLongCoord & a,
+ const LatLongCoord &b) const;
+
+ LatLongMetric * clone() const;
+ std::string name() const;
+ std::string serialise() const;
+ LatLongMetric * unserialise(const std::string & s) const;
+};
+
+/** Posting source which returns a weight based on geospatial distance.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ *
+ * Results are weighted by the distance from a fixed point, or list of points,
+ * calculated according to the metric supplied. If multiple points are
+ * supplied (either in the constructor, or in the coordinates stored in a
+ * document), the closest pointwise distance is used.
+ *
+ * Documents further away than a specified maximum range (or with no location
+ * stored in the specified slot) will not be returned.
+ *
+ * The weight returned is computed from the distance using the formula:
+ *
+ * k1 * pow(distance + k1, -k2)
+ *
+ * (Where k1 and k2 are (strictly) positive, floating point constants, which
+ * default to 1000 and 1, respectively. Distance is measured in metres, so
+ * this means that something at the centre gets a weight of 1.0, something 1km
+ * away gets a weight of 0.5, and something 3km away gets a weight of 0.25,
+ * etc)
+ */
+class XAPIAN_VISIBILITY_DEFAULT LatLongDistancePostingSource : public ValuePostingSource
+{
+ /// Current distance from centre.
+ double dist;
+
+ /// Centre, to compute distance from.
+ LatLongCoords centre;
+
+ /// Metric to compute the distance with.
+ const LatLongMetric * metric;
+
+ /// Maximum range to allow. If set to 0, there is no maximum range.
+ double max_range;
+
+ /// Constant used in weighting function.
+ double k1;
+
+ /// Constant used in weighting function.
+ double k2;
+
+ /// Calculate the distance for the current document.
+ void calc_distance();
+
+ /// Internal constructor; used by clone() and serialise().
+ LatLongDistancePostingSource(Xapian::valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric * metric_,
+ double max_range_,
+ double k1_,
+ double k2_);
+
+ public:
+ /** Construct a new match decider which returns only documents within
+ * range of one of the central coordinates.
+ *
+ * @param db_ The database to read values from.
+ * @param slot_ The value slot to read values from.
+ * @param centre_ The centre point to use for distance calculations.
+ * @param metric_ The metric to use for distance calculations.
+ * @param max_range_ The maximum distance for documents which are returned.
+ * @param k1_ The k1 constant to use in the weighting function.
+ * @param k2_ The k2 constant to use in the weighting function.
+ */
+ LatLongDistancePostingSource(Xapian::valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric & metric_,
+ double max_range_ = 0.0,
+ double k1_ = 1000.0,
+ double k2_ = 1.0);
+ ~LatLongDistancePostingSource();
+
+ void next(double min_wt);
+ void skip_to(Xapian::docid min_docid, double min_wt);
+ bool check(Xapian::docid min_docid, double min_wt);
+
+ double get_weight() const;
+ LatLongDistancePostingSource * clone() const;
+ std::string name() const;
+ std::string serialise() const;
+ LatLongDistancePostingSource *
+ unserialise_with_registry(const std::string &s,
+ const Registry & registry) const;
+ void init(const Database & db_);
+
+ std::string get_description() const;
+};
+
+/** KeyMaker subclass which sorts by distance from a latitude/longitude.
+ *
+ * Experimental - see http://xapian.org/docs/deprecation#experimental-features
+ *
+ * Results are ordered by the distance from a fixed point, or list of points,
+ * calculated according to the metric supplied. If multiple points are
+ * supplied (either in the constructor, or in the coordinates stored in a
+ * document), the closest pointwise distance is used.
+ *
+ * If a document contains no coordinate stored in the specified slot, a
+ * special value for the distance will be used. This defaults to a large
+ * number, so that such results get a low rank, but may be specified by a
+ * constructor parameter.
+ */
+class XAPIAN_VISIBILITY_DEFAULT LatLongDistanceKeyMaker : public KeyMaker {
+
+ /// The value slot to read.
+ Xapian::valueno slot;
+
+ /// The centre point (or points) for distance calculation.
+ LatLongCoords centre;
+
+ /// The metric to use when calculating distances.
+ const LatLongMetric * metric;
+
+ /// The default key to return, for documents with no value stored.
+ std::string defkey;
+
+ public:
+ LatLongDistanceKeyMaker(Xapian::valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric & metric_,
+ double defdistance)
+ : slot(slot_),
+ centre(centre_),
+ metric(metric_.clone()),
+ defkey(sortable_serialise(defdistance))
+ {}
+
+ LatLongDistanceKeyMaker(Xapian::valueno slot_,
+ const LatLongCoords & centre_,
+ const LatLongMetric & metric_)
+ : slot(slot_),
+ centre(centre_),
+ metric(metric_.clone()),
+ defkey(9, '\xff')
+ {}
+
+ LatLongDistanceKeyMaker(Xapian::valueno slot_,
+ const LatLongCoord & centre_,
+ const LatLongMetric & metric_,
+ double defdistance)
+ : slot(slot_),
+ centre(),
+ metric(metric_.clone()),
+ defkey(sortable_serialise(defdistance))
+ {
+ centre.append(centre_);
+ }
+
+ LatLongDistanceKeyMaker(Xapian::valueno slot_,
+ const LatLongCoord & centre_,
+ const LatLongMetric & metric_)
+ : slot(slot_),
+ centre(),
+ metric(metric_.clone()),
+ defkey(9, '\xff')
+ {
+ centre.append(centre_);
+ }
+
+ ~LatLongDistanceKeyMaker();
+
+ std::string operator()(const Xapian::Document & doc) const;
+};
+
+}
+
+#endif /* XAPIAN_INCLUDED_GEOSPATIAL_H */
diff --git a/xapian-core/include/xapian/postingsource.h b/xapian-core/include/xapian/postingsource.h
index 8dc7e85..c75f618 100644
--- a/xapian-core/include/xapian/postingsource.h
+++ b/xapian-core/include/xapian/postingsource.h
@@ -31,6 +31,8 @@
namespace Xapian {
+class Registry;
+
/** Base class which provides an "external" source of postings.
*/
class XAPIAN_VISIBILITY_DEFAULT PostingSource {
@@ -297,6 +299,26 @@ class XAPIAN_VISIBILITY_DEFAULT PostingSource {
*/
virtual PostingSource * unserialise(const std::string &s) const;
+ /** Create object given string serialisation returned by serialise().
+ *
+ * Note that the returned object will be deallocated by Xapian after use
+ * with "delete". If you want to handle the deletion in a special way
+ * (for example when wrapping the Xapian API for use from another
+ * language) then you can define a static <code>operator delete</code>
+ * method in your subclass as shown here:
+ * http://trac.xapian.org/ticket/554#comment:1
+ *
+ * This method is supplied with a Registry object, which can be used when
+ * unserialising objects contained within the posting source. The default
+ * implementation simply calls unserialise() which doesn't take the
+ * Registry object, so you do not need to implement this method unless you
+ * want to take advantage of the Registry object when unserialising.
+ *
+ * @param s A serialised instance of this PostingSource subclass.
+ */
+ virtual PostingSource * unserialise_with_registry(const std::string &s,
+ const Registry & registry) const;
+
/** Set this PostingSource to the start of the list of postings.
*
* This is called automatically by the matcher prior to each query being
diff --git a/xapian-core/include/xapian/registry.h b/xapian-core/include/xapian/registry.h
index 17ac67b..080e79d 100644
--- a/xapian-core/include/xapian/registry.h
+++ b/xapian-core/include/xapian/registry.h
@@ -30,6 +30,7 @@
namespace Xapian {
// Forward declarations.
+class LatLongMetric;
class MatchSpy;
class PostingSource;
class Weight;
@@ -124,6 +125,19 @@ class XAPIAN_VISIBILITY_DEFAULT Registry {
*/
const Xapian::MatchSpy *
get_match_spy(const std::string & name) const;
+
+ /// Register a user-defined lat-long metric class.
+ void register_lat_long_metric(const Xapian::LatLongMetric &metric);
+
+ /** Get a lat-long metric given a name.
+ *
+ * The returned metric is owned by the registry object.
+ *
+ * Returns NULL if the metric could not be found.
+ */
+ const Xapian::LatLongMetric *
+ get_lat_long_metric(const std::string & name) const;
+
};
}
diff --git a/xapian-core/tests/Makefile.am b/xapian-core/tests/Makefile.am
index f5ed1a9..b73e816 100644
--- a/xapian-core/tests/Makefile.am
+++ b/xapian-core/tests/Makefile.am
@@ -138,6 +138,7 @@ collated_apitest_sources = \
api_compact.cc \
api_db.cc \
api_generated.cc \
+ api_geospatial.cc \
api_matchspy.cc \
api_metadata.cc \
api_nodb.cc \
diff --git a/xapian-core/tests/api_geospatial.cc b/xapian-core/tests/api_geospatial.cc
new file mode 100644
index 0000000..a36b977
--- /dev/null
+++ b/xapian-core/tests/api_geospatial.cc
@@ -0,0 +1,333 @@
+/** @file api_geospatial.cc
+ * @brief Tests of geospatial functionality.
+ */
+/* Copyright 2008 Lemur Consulting Ltd
+ * Copyright 2010,2011 Richard Boulton
+ * Copyright 2012 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#include <config.h>
+#include "api_geospatial.h"
+#include <xapian.h>
+
+#include "apitest.h"
+#include "testsuite.h"
+#include "testutils.h"
+
+using namespace std;
+using namespace Xapian;
+
+// #######################################################################
+// # Tests start here
+
+static void
+builddb_coords1(Xapian::WritableDatabase &db, const string &)
+{
+ Xapian::LatLongCoord coord1(10, 10);
+ Xapian::LatLongCoord coord2(20, 10);
+ Xapian::LatLongCoord coord3(30, 10);
+
+ Xapian::Document doc;
+ doc.add_value(0, coord1.serialise());
+ db.add_document(doc);
+
+ doc = Xapian::Document();
+ doc.add_value(0, coord2.serialise());
+ db.add_document(doc);
+
+ doc = Xapian::Document();
+ doc.add_value(0, coord3.serialise());
+ db.add_document(doc);
+}
+
+/// Test behaviour of the LatLongPostingSource
+DEFINE_TESTCASE(latlongpostingsource1, backend && writable && !remote && !inmemory) {
+ Xapian::Database db = get_database("coords1", builddb_coords1, "");
+ Xapian::LatLongCoord coord1(10, 10);
+ Xapian::LatLongCoord coord2(20, 10);
+ Xapian::LatLongCoord coord3(30, 10);
+
+ Xapian::GreatCircleMetric metric;
+ Xapian::LatLongCoords centre;
+ centre.append(coord1);
+ double coorddist = metric(coord1, coord2);
+ TEST_EQUAL_DOUBLE(coorddist, metric(coord2, coord3));
+
+ // Test a search with no range restriction.
+ {
+ Xapian::LatLongDistancePostingSource ps(0, coord1, metric);
+ ps.init(db);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1.0);
+ TEST_EQUAL(ps.get_docid(), 1);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist));
+ TEST_EQUAL(ps.get_docid(), 2);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist * 2));
+ TEST_EQUAL(ps.get_docid(), 3);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), true);
+ }
+
+ // Test a search with a tight range restriction
+ {
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, coorddist * 0.5);
+ ps.init(db);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1.0);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), true);
+ }
+
+ // Test a search with a looser range restriction
+ {
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, coorddist);
+ ps.init(db);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1.0);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist));
+ TEST_EQUAL(ps.get_docid(), 2);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), true);
+ }
+
+ // Test a search with a looser range restriction, but not enough to return
+ // the next document.
+ {
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, coorddist * 1.5);
+ ps.init(db);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1.0);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist));
+ TEST_EQUAL(ps.get_docid(), 2);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), true);
+ }
+
+ // Test a search with a loose enough range restriction that all docs should
+ // be returned.
+ {
+ Xapian::LatLongDistancePostingSource ps(0, centre, metric, coorddist * 2.5);
+ ps.init(db);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1.0);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist));
+ TEST_EQUAL(ps.get_docid(), 2);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), false);
+ TEST_EQUAL_DOUBLE(ps.get_weight(), 1000.0 / (1000.0 + coorddist * 2));
+ TEST_EQUAL(ps.get_docid(), 3);
+
+ ps.next(0.0);
+ TEST_EQUAL(ps.at_end(), true);
+ }
+
+ return true;
+}
+
+// Test various methods of LatLongCoord and LatLongCoords
+DEFINE_TESTCASE(latlongcoords1, !backend) {
+ LatLongCoord c1(0, 0);
+ LatLongCoord c2(1, 0);
+ LatLongCoord c3(1, 0);
+ LatLongCoord c4(0, 1);
+
+ // Test comparison
+ TEST_NOT_EQUAL(c1.get_description(), c2.get_description());
+ // Exactly one of these inequalities should be true.
+ TEST((c1 < c2) ^ (c2 < c1));
+ TEST_EQUAL(c2.get_description(), c3.get_description());
+ TEST(!(c2 < c3) && !(c3 < c2));
+ TEST_NOT_EQUAL(c3.get_description(), c4.get_description());
+ // Exactly one of these inequalities should be true. This is a regression
+ // test for bug found prior to 1.3.0.
+ TEST((c3 < c4) ^ (c4 < c3));
+
+ // Test serialisation
+ std::string s1 = c1.serialise();
+ LatLongCoord c5;
+ c4.unserialise(s1);
+ TEST(!(c1 < c4 || c4 < c1));
+ const char * ptr = s1.data();
+ const char * end = ptr + s1.size();
+ c5.unserialise(&ptr, end);
+ TEST_EQUAL(c1.get_description(), c4.get_description());
+ TEST_EQUAL(c1.get_description(), "Xapian::LatLongCoord(0, 0)");
+ TEST_EQUAL(ptr, end);
+
+ // Test uninitialised iterator constructor
+ LatLongCoordsIterator i1;
+
+ // Test building a set of LatLongCoords
+ LatLongCoords g1(c1);
+ TEST(!g1.empty());
+ TEST_EQUAL(g1.size(), 1);
+ TEST_EQUAL(g1.get_description(), "Xapian::LatLongCoords((0, 0))");
+ g1.append(c2);
+ TEST_EQUAL(g1.size(), 2);
+ TEST_EQUAL(g1.get_description(), "Xapian::LatLongCoords((0, 0), (1, 0))");
+
+ // Test iterating through a set of LatLongCoords
+ i1 = g1.begin();
+ TEST(i1 != g1.end());
+ TEST_EQUAL((*i1).serialise(), c1.serialise());
+ TEST_EQUAL((*(i1++)).serialise(), c1.serialise());
+ TEST(i1 != g1.end());
+ TEST_EQUAL((*i1).serialise(), c2.serialise());
+ i1 = g1.begin();
+ TEST_EQUAL((*(++i1)).serialise(), c2.serialise());
+ TEST(i1 != g1.end());
+ ++i1;
+ TEST(i1 == g1.end());
+
+ // Test that duplicates are allowed in the list of coordinates, now.
+ g1.append(c3);
+ TEST_EQUAL(g1.size(), 3);
+ TEST_EQUAL(g1.get_description(), "Xapian::LatLongCoords((0, 0), (1, 0), (1, 0))");
+
+ // Test building an empty LatLongCoords
+ LatLongCoords g2;
+ TEST(g2.empty());
+ TEST_EQUAL(g2.size(), 0);
+ TEST_EQUAL(g2.get_description(), "Xapian::LatLongCoords()");
+ TEST(g2.begin() == g2.end());
+
+ return true;
+}
+
+// Test various methods of LatLongMetric
+DEFINE_TESTCASE(latlongmetric1, !backend) {
+ LatLongCoord c1(0, 0);
+ LatLongCoord c2(1, 0);
+ Xapian::GreatCircleMetric m1;
+ double d1 = m1(c1, c2);
+ TEST_REL(d1, >, 111226.0);
+ TEST_REL(d1, <, 111227.0);
+
+ // Let's make another metric, this time using the radius of mars, so
+ // distances should be quite a bit smaller.
+ Xapian::GreatCircleMetric m2(3310000);
+ double d2 = m2(c1, c2);
+ TEST_REL(d2, >, 57770.0);
+ TEST_REL(d2, <, 57771.0);
+
+ // Check serialise and unserialise.
+ Xapian::Registry registry;
+ std::string s1 = m2.serialise();
+ const Xapian::LatLongMetric * m3;
+ m3 = registry.get_lat_long_metric(m2.name());
+ TEST(m3 != NULL);
+ m3 = m3->unserialise(s1);
+ double d3 = (*m3)(c1, c2);
+ TEST_EQUAL_DOUBLE(d2, d3);
+
+ delete m3;
+
+ return true;
+}
+
+// Test LatLongMetric on lists of coords.
+DEFINE_TESTCASE(latlongmetric2, !backend) {
+ LatLongCoord c1(0, 0);
+ LatLongCoord c2(1, 0);
+ LatLongCoords cl1(c1);
+ LatLongCoords cl2(c2);
+ string c2_str = c2.serialise();
+ string cl2_str = cl2.serialise();
+ TEST_EQUAL(c2_str, cl2_str);
+
+ LatLongCoord c2_check(5, 5);
+ c2_check.unserialise(c2_str);
+ TEST_EQUAL(c2_check.latitude, c2.latitude);
+ TEST_EQUAL(c2_check.longitude, c2.longitude);
+
+ Xapian::GreatCircleMetric m1;
+ double d1 = m1(c1, c2);
+ double dl1 = m1(cl1, cl2);
+ TEST_EQUAL(d1, dl1);
+ double d1_str = m1(cl1, c2_str);
+ TEST_EQUAL(d1, d1_str);
+
+ return true;
+}
+
+// Test a LatLongDistanceKeyMaker directly.
+DEFINE_TESTCASE(latlongkeymaker1, !backend) {
+ Xapian::GreatCircleMetric m1(3310000);
+ LatLongCoord c1(0, 0);
+ LatLongCoord c2(1, 0);
+ LatLongCoord c3(2, 0);
+ LatLongCoord c4(3, 0);
+
+ LatLongCoords g1(c1);
+ g1.append(c2);
+
+ LatLongDistanceKeyMaker keymaker(0, g1, m1);
+ Xapian::Document doc1;
+ doc1.add_value(0, g1.serialise());
+ Xapian::Document doc2;
+ doc2.add_value(0, c3.serialise());
+ Xapian::Document doc3;
+ doc3.add_value(0, c4.serialise());
+ Xapian::Document doc4;
+
+ std::string k1 = keymaker(doc1);
+ std::string k2 = keymaker(doc2);
+ std::string k3 = keymaker(doc3);
+ std::string k4 = keymaker(doc4);
+ TEST_REL(k1, <, k2);
+ TEST_REL(k2, <, k3);
+ TEST_REL(k3, <, k4);
+
+ LatLongDistanceKeyMaker keymaker2(0, g1, m1, 0);
+ std::string k3b = keymaker2(doc3);
+ std::string k4b = keymaker2(doc4);
+ TEST_EQUAL(k3, k3b);
+ TEST_REL(k3b, >, k4b);
+
+ return true;
+}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment