Skip to content

Instantly share code, notes, and snippets.

@whelmed
Created April 3, 2018 18:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save whelmed/d3064011309c757c0433a359ac142e9a to your computer and use it in GitHub Desktop.
Save whelmed/d3064011309c757c0433a359ac142e9a to your computer and use it in GitHub Desktop.
CloudSearch PoC
#!/bin/python
''' Target: python 3.6
This is a very simplistic CloudSearch query builder.
The intent is that this will ensure values are encoded and have a valid syntax.
Combine and nest the operators along with a query type, a field, or fields, or a value:
Query types: Term, Matchall, Prefix, Phrase, Near, and Range.
Operators: And, Or, Not
Conditionals: If
Values: Field, Fields, Value
published_after = datetime.now()
q = Or(
Fields('topic', 'big_data', 'storage'),
If(
published_after is not None,
Range(field='pub_date', start=published_after, is_start_inclusive=True)
)
)
print(q)
Something missing from the syntax?
This is a simple string builder, so you can always use raw strings.
q = And(
'(awesome field={} ultra={}'.format(Value('Greetings!'), 5),
Fields('topic', 'big_data', 'storage')
)
'''
from datetime import datetime
def _escape_control_characters(s):
return s.replace("'", '%27').replace('"', '%22').replace("\\", '%5C').replace('&', '%26')
def _str(value):
return "'{}'".format(_escape_control_characters(value))
def _date(value):
return "'{}'".format(value.strftime('%Y-%m-%dT%H:%M:%SZ'))
def _parse(value):
if isinstance(value, str):
return _str(value)
if isinstance(value, datetime):
return _date(value)
if isinstance(value, (int, float)):
return value
raise TypeError(
'Expected: str, datetime, int, float, Got: {}'.format(type(value)))
def Value(v):
''' Value parses the argument to ensure the datatype is properly handled.
'''
return _parse(v)
def Field(name, value):
return '{}={}'.format(name, _parse(value))
def Fields(name, *fields):
return ' '.join([Field(name, f) for f in fields])
def _named_query(query_name, field, value, boost=None, distance=None, suppress_parse=False):
b = Field('boost', boost) if boost is not None else ''
d = Field('distance', boost) if boost is not None else ''
v = value if suppress_parse else _parse(value)
return '({} field={} {} {} {})'.format(query_name, field, d, b, v)
def Matchall():
''' matchall—Matches every document in the domain. Syntax: matchall.
'''
return 'matchall'
def Term(field, value, boost=None):
'''' term—Searches for an individual term or value in any field;
for example: (and (term field=title 'star')(term field=year 1977)).
Syntax: (term field=FIELD boost=N 'STRING'|VALUE).
'''
return _named_query('term', field, value, boost)
def Prefix(field, value, boost=None):
''' prefix—Searches a text, text-array, literal, or literal-array field for the specified prefix
followed by zero or more characters; for example, (prefix field='title' 'wait').
Supports boosting documents that match the expression.
For more information about prefix searches, see Searching for
Prefixes.Syntax: (prefix field=FIELD boost=N 'STRING').
'''
return _named_query('prefix', field, value, boost)
def Phrase(field, value, boost=None):
''' phrase—Searches for a phrase in text or text-array fields;
for example, (phrase field="title" 'teenage mutant ninja').
Supports boosting documents that match the expression.
For more information about phrase searches, see Searching for Phrases.
Syntax: (phrase field=FIELD boost=N 'STRING').
'''
return _named_query('phrase', field, value, boost)
def Near(field, value, boost=None, distance=None):
''' near—Supports sloppy phrase queries.
The distance value specifies the maximum number of words that can separate the words in the phrase;
for example, (near field='plot' distance=4 'naval mutiny demonstration').
Use the near operator to enable matching when the specified terms are in close proximity,
but not adjacent. For more information about sloppy phrase searches, see Searching for Phrases.
Syntax: (near field=FIELD distance=N boost=N 'STRING').
'''
return _named_query('phrase', field, value, boost, distance)
def Range(field=None, start=None, end=None, is_start_inclusive=False, is_end_inclusive=False):
''' You can use structured queries to search a field for a range of values.
To specify a range of values, use a comma (,) to separate the upper and lower bounds
and enclose the range using brackets or braces. A square brace, [ or ], indicates
that the bound is included in the range, a curly brace, { or }, excludes the bound.
(range field=year [2013,})
'''
if field is None:
raise ValueError("A field must be set in a range query")
def _format(s):
return '' if s is None else _parse(s)
def _symbol(true_symbol, false_symbol, condition):
return true_symbol if condition else false_symbol
start = _format(start)
end = _format(end)
start_symbol = _symbol('[', '{', is_start_inclusive)
end_symbol = _symbol(']', '}', is_end_inclusive)
value = '{}{},{}{}'.format(start_symbol, start, end, end_symbol)
# Since we parse the values above, skipping here so we don't double encode.
return _named_query('range', field, value, suppress_parse=True)
def And(*expressions):
''' Combine the expressions with an and operator
Example:
And(
Or(
Fields('topic', 'big_data', 'storage')
),
Field(name="difficulty", value="beginner"),
)
Results:
(and (topic='big_data' topic='storage' difficulty='beginner'))
'''
return '(and ({}))'.format(' '.join(expressions))
def Or(*expressions):
''' Combine the expressions with an or operator
Example:
Or(
Fields('topic', 'big_data', 'storage'),
)
Results:
(or (topic='big_data' topic='storage'))
'''
return '(or ({}))'.format(' '.join(expressions))
def Not(*expressions):
''' Exclude results with the not operator
Example:
Or(
Fields('topic', 'big_data', 'storage'),
Not(
Field('title', 'Boring')
)
)
Results:
(or (topic='big_data' topic='storage' (not (title='Boring'))))
'''
return '(not ({}))'.format(' '.join(expressions))
def If(condition, *expressions):
''' Only process the expressions if the condition is met.
Example:
published_after = None
q = Or(
Fields('topic', 'big_data', 'storage'),
If(
published_after is not None,
Range(field='pub_date', start=published_after, is_start_inclusive=True)
)
)
Result:
(or (topic='big_data' topic='storage' ))
No range filter is applied in the above example because the published_after is None.
'''
if not condition:
return '' # fail with an empty string.
return ' '.join(expressions)
if __name__ == '__main__':
print(
'Target Python 3.6\n'
'Example usage: '
'''
>>> published_after = datetime.now()
>>> q = Or(
Fields('topic', 'big_data', 'storage'),
If(
published_after is not None,
Range(field='pub_date', start=published_after, is_start_inclusive=True)
)
)
>>> (or (topic='big_data' topic='storage' (range field=pub_date ['2018-03-31T19:34:17Z',})))
'''
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment