Skip to content

Instantly share code, notes, and snippets.

@lemon24
Last active May 5, 2020 15:33
Show Gist options
  • Save lemon24/a5d9d8f26ca365ec7d2aca46501a7a48 to your computer and use it in GitHub Desktop.
Save lemon24/a5d9d8f26ca365ec7d2aca46501a7a48 to your computer and use it in GitHub Desktop.
import collections
import textwrap
import functools
class Query(collections.OrderedDict):
indent_prefix = ' '
default_separators = dict(WHERE='AND', HAVING='AND')
_compound_keywords = ['UNION', 'UNION ALL', 'INTERSECT', 'EXCEPT']
_keyword_order = [
'',
'COMPOUND',
'WITH',
'SELECT',
'FROM',
'JOIN',
'WHERE',
'GROUP BY',
'HAVING',
'ORDER BY',
'LIMIT',
]
def _keyword_key(self, keyword):
if 'JOIN' in keyword:
keyword = 'JOIN'
elif keyword in self._compound_keywords:
keyword = 'COMPOUND'
try:
return self._keyword_order.index(keyword)
except ValueError:
return float('inf')
_keyword_formats = {
1: collections.defaultdict(lambda: '{thing}'),
2: dict(SELECT='{thing} AS {name}', WITH='{name} AS (\n{indented_thing}\n)',),
}
def __getattr__(self, name):
keyword = name.replace('_', ' ').upper()
if keyword in self._compound_keywords:
self = type(self)()._add('', self)
return functools.partial(self._add, keyword)
def _add(self, keyword, *things):
target = self.setdefault(keyword, [])
for thing in things:
if not isinstance(thing, (tuple, list)):
thing = (thing,)
target.append([self._clean_up(t) for t in thing])
return self
def _clean_up(self, thing):
if not isinstance(thing, str):
return thing
return textwrap.dedent(thing.rstrip()).strip()
def __str__(self, complete=True, end=';\n'):
text = ''.join(self._lines())
if complete:
return text + end
return text.rstrip()
def _lines(self):
pairs = sorted(self.items(), key=lambda p: self._keyword_key(p[0]))
for keyword, things in pairs:
if keyword:
yield keyword + '\n'
for i, thing in enumerate(things, 1):
fmt = self._keyword_formats[len(thing)][keyword]
name, thing = (None, *thing) if len(thing) == 1 else thing
if keyword and keyword not in self._compound_keywords:
if isinstance(thing, Query):
thing = thing.__str__(complete=False)
yield self._indent(fmt.format_map(dict(
name=name,
thing=thing,
indented_thing=self._indent(thing),
)))
if i < len(things):
yield self._get_separator(keyword)
yield '\n'
else:
if isinstance(thing, Query):
yield from thing._lines()
else:
yield fmt.format_map(dict(name=name, thing=thing))
yield '\n'
if i < len(things):
yield keyword
yield '\n'
def _indent(self, text):
return textwrap.indent(text, self.indent_prefix)
def _get_separator(self, keyword):
if 'JOIN' in keyword:
return '\n' + keyword
separator = self.default_separators.get(keyword)
if separator:
return ' ' + separator
return ','
class ScrollingWindow:
def __init__(self, query, *things, desc=False, keyword='WHERE'):
self._query = query
self._things = things = self._query._clean_up(things)
self._desc = desc
self._keyword = keyword
order = 'DESC' if desc else 'ASC'
self._query.ORDER_BY(*(f'{thing} {order}' for thing in things))
_make_label = 'last_{}'.format
def LIMIT(self, *things, last):
self._query.LIMIT(things)
if not last:
return
op = '<' if self._desc else '>'
labels = (':' + self._make_label(i) for i in range(len(self._things)))
getattr(self._query, self._keyword)(
Query()._add('(', *self._things)._add(f') {op} (', *labels).__str__(end=')')
)
def extract_last(self, result):
names = [t[0] for t in self._query['SELECT']]
return [
(self._make_label(i), result[names.index(thing)])
for i, thing in enumerate(self._things)
]
def apply_filter_options(query, filter_options, keyword='WHERE'):
add = getattr(query, keyword)
feed_url, entry_id, read, important, has_enclosures = filter_options
if feed_url:
add("feeds.url = :feed_url")
if entry_id:
add("entries.id = :entry_id")
if read is not None:
add(f"{'' if read else 'NOT'} entries.read")
if important is not None:
add(f"{'' if important else 'NOT'} entries.important")
if has_enclosures is not None:
add(
f"""
{'NOT' if has_enclosures else ''}
(json_array_length(entries.enclosures) IS NULL
OR json_array_length(entries.enclosures) = 0)
"""
)
def make_get_entries_query(filter_options, sort, chunk_size=None, last=None):
query = (
Query()
.SELECT(
*"""
feeds.url
feeds.updated
feeds.title
feeds.link
feeds.author
feeds.user_title
entries.id
entries.updated
entries.title
entries.link
entries.author
entries.published
entries.summary
entries.content
entries.enclosures
entries.read
entries.important
""".split()
)
.FROM("entries")
.JOIN("feeds ON feeds.url = entries.feed")
)
# noop scrolling window
scrolling_window = ScrollingWindow(query)
apply_filter_options(query, filter_options)
if sort == 'recent':
query.SELECT(
"entries.last_updated",
(
"kinda_first_updated",
"""
coalesce (
CASE
WHEN
coalesce(entries.published, entries.updated)
>= :recent_threshold
THEN entries.first_updated_epoch
END,
entries.published, entries.updated
)
""",
),
("kinda_published", "coalesce(entries.published, entries.updated)"),
("negative_feed_order", "- entries.feed_order"),
)
scrolling_window = ScrollingWindow(
query,
*"""
kinda_first_updated
kinda_published
feeds.url
entries.last_updated
negative_feed_order
entries.id
""".split(),
desc=True,
)
if chunk_size:
scrolling_window.LIMIT(":chunk_size", last=last)
elif sort == 'random':
query.ORDER_BY("random()")
if chunk_size:
query.LIMIT(":chunk_size")
return query, scrolling_window
'''
# without scrolling_window, the "if sort == 'recent' ... scrolling_window = ..." block would be written as:
query.ORDER_BY(
*"""
kinda_first_updated DESC
kinda_published DESC
feeds.url DESC
entries.last_updated DESC
negative_feed_order DESC
entries.id DESC
""".strip().splitlines()
)
if chunk_size:
query.LIMIT(":chunk_size")
if last:
query.WHERE(
"""
(
kinda_first_updated,
kinda_published,
feeds.url,
entries.last_updated,
negative_feed_order,
entries.id
) < (
:last_entry_first_updated,
:last_entry_updated,
:last_feed_url,
:last_entry_last_updated,
:last_negative_feed_order,
:last_entry_id
)
"""
)
'''
def make_search_entries_query(filter_options, chunk_size=None, last=None):
# we could have used the original query as-is;
# this is to show you can pass a query
search = (
Query()
.SELECT(
# as long as we don't use this with scrolling_window,
# we can pass everything in a single string
"""
_id,
_feed,
rank,
snippet(
entries_search, 0, :before_mark, :after_mark, '...',
:snippet_tokens
) AS title,
snippet(
entries_search, 2, :before_mark, :after_mark, '...',
:snippet_tokens
) AS feed,
_is_feed_user_title AS is_feed_user_title,
json_object(
'path', _content_path,
'value', snippet(
entries_search, 1,
:before_mark, :after_mark, '...', :snippet_tokens
),
'rank', rank
) AS content
"""
)
.FROM("entries_search")
.WHERE("entries_search MATCH :query")
.ORDER_BY("rank")
.LIMIT("-1 OFFSET 0")
)
query = (
Query()
.WITH(("search", search))
.SELECT(
"entries.id",
"entries.feed",
("rank", "min(search.rank)"),
"search.title",
"search.feed",
"search.is_feed_user_title",
"json_group_array(json(search.content))",
)
.FROM("entries")
.JOIN("search ON (entries.id, entries.feed) = (search._id, search._feed)")
.GROUP_BY("entries.id", "entries.feed")
)
apply_filter_options(query, filter_options, 'HAVING')
scrolling_window = ScrollingWindow(
query, *"rank entries.id entries.feed".split(), keyword='HAVING'
)
if chunk_size:
scrolling_window.LIMIT(":chunk_size", last=last)
return query, scrolling_window
from reader._types import EntryFilterOptions
filter_options = EntryFilterOptions.from_args(feed='feed')
def print_query(chunk_size, factory, args, result):
print(
f'--- {factory.__name__}, chunk size {chunk_size}', *args, sep='\n', end='\n\n'
)
query, scrolling_window = factory(*args, chunk_size=chunk_size, last=None)
print(query, end='\n\n')
if not chunk_size:
return
last = scrolling_window.extract_last(result)
print(dict(last), end='\n\n')
query, _ = factory(*args, chunk_size=chunk_size, last=last)
print(query, end='\n\n')
things = [
(
make_get_entries_query,
(EntryFilterOptions.from_args(feed='feed'), 'recent',),
"""
feeds.url
feeds.updated
feeds.title
feeds.link
feeds.author
feeds.user_title
entries.id
entries.updated
entries.title
entries.link
entries.author
entries.published
entries.summary
entries.content
entries.enclosures
entries.read
entries.important
entries.last_updated
kinda_first_updated
kinda_published
negative_feed_order
""".split(),
),
(
make_get_entries_query,
(EntryFilterOptions.from_args(), 'random',),
"""
feeds.url
feeds.updated
feeds.title
feeds.link
feeds.author
feeds.user_title
entries.id
entries.updated
entries.title
entries.link
entries.author
entries.published
entries.summary
entries.content
entries.enclosures
entries.read
entries.important
""".split(),
),
(
make_search_entries_query,
(EntryFilterOptions.from_args(feed='feed'),),
"""
entries.id
entries.feed
rank
search.title
search.feed
search.is_feed_user_title
content
""".split(),
),
]
for factory, args, result in things:
for chunk_size in None, 2:
print_query(chunk_size, factory, args, result)
union_queries = [
(
'simple UNION',
Query().SELECT('*').FROM('one').UNION().SELECT('another').FROM('two'),
),
(
'Query UNION args',
Query()
.SELECT('* FROM one')
.UNION(
Query().SELECT('* FROM two'),
Query().SELECT('xxx').UNION().SELECT('yyy'),
'SELECT * FROM STRING',
)
.UNION()
.SELECT('* FROM three'),
),
(
'Search._update -like UNION',
Query()
.WITH('cte1')
.WITH(('cte2-name', 'cte2'))
.WITH(
('cte3', Query().SELECT('*').FROM('cte1').UNION().SELECT('*').FROM('cte2'))
)
.SELECT('whatever')
.FROM('cte3')
.JOIN('some other thing')
.JOIN('yet another one'),
),
]
for name, query in union_queries:
print('---', name)
print()
print(query)
print()
--- make_get_entries_query, chunk size None
EntryFilterOptions(feed_url='feed', entry_id=None, read=None, important=None, has_enclosures=None)
recent
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important,
entries.last_updated,
coalesce (
CASE
WHEN
coalesce(entries.published, entries.updated)
>= :recent_threshold
THEN entries.first_updated_epoch
END,
entries.published, entries.updated
) AS kinda_first_updated,
coalesce(entries.published, entries.updated) AS kinda_published,
- entries.feed_order AS negative_feed_order
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
WHERE
feeds.url = :feed_url
ORDER BY
kinda_first_updated DESC,
kinda_published DESC,
feeds.url DESC,
entries.last_updated DESC,
negative_feed_order DESC,
entries.id DESC
;
--- make_get_entries_query, chunk size 2
EntryFilterOptions(feed_url='feed', entry_id=None, read=None, important=None, has_enclosures=None)
recent
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important,
entries.last_updated,
coalesce (
CASE
WHEN
coalesce(entries.published, entries.updated)
>= :recent_threshold
THEN entries.first_updated_epoch
END,
entries.published, entries.updated
) AS kinda_first_updated,
coalesce(entries.published, entries.updated) AS kinda_published,
- entries.feed_order AS negative_feed_order
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
WHERE
feeds.url = :feed_url
ORDER BY
kinda_first_updated DESC,
kinda_published DESC,
feeds.url DESC,
entries.last_updated DESC,
negative_feed_order DESC,
entries.id DESC
LIMIT
:chunk_size
;
{'last_0': 'kinda_first_updated', 'last_1': 'kinda_published', 'last_2': 'feeds.url', 'last_3': 'entries.last_updated', 'last_4': 'negative_feed_order', 'last_5': 'entries.id'}
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important,
entries.last_updated,
coalesce (
CASE
WHEN
coalesce(entries.published, entries.updated)
>= :recent_threshold
THEN entries.first_updated_epoch
END,
entries.published, entries.updated
) AS kinda_first_updated,
coalesce(entries.published, entries.updated) AS kinda_published,
- entries.feed_order AS negative_feed_order
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
WHERE
feeds.url = :feed_url AND
(
kinda_first_updated,
kinda_published,
feeds.url,
entries.last_updated,
negative_feed_order,
entries.id
) < (
:last_0,
:last_1,
:last_2,
:last_3,
:last_4,
:last_5
)
ORDER BY
kinda_first_updated DESC,
kinda_published DESC,
feeds.url DESC,
entries.last_updated DESC,
negative_feed_order DESC,
entries.id DESC
LIMIT
:chunk_size
;
--- make_get_entries_query, chunk size None
EntryFilterOptions(feed_url=None, entry_id=None, read=None, important=None, has_enclosures=None)
random
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
ORDER BY
random()
;
--- make_get_entries_query, chunk size 2
EntryFilterOptions(feed_url=None, entry_id=None, read=None, important=None, has_enclosures=None)
random
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
ORDER BY
random()
LIMIT
:chunk_size
;
{}
SELECT
feeds.url,
feeds.updated,
feeds.title,
feeds.link,
feeds.author,
feeds.user_title,
entries.id,
entries.updated,
entries.title,
entries.link,
entries.author,
entries.published,
entries.summary,
entries.content,
entries.enclosures,
entries.read,
entries.important
FROM
entries
JOIN
feeds ON feeds.url = entries.feed
ORDER BY
random()
LIMIT
:chunk_size
;
--- make_search_entries_query, chunk size None
EntryFilterOptions(feed_url='feed', entry_id=None, read=None, important=None, has_enclosures=None)
WITH
search AS (
SELECT
_id,
_feed,
rank,
snippet(
entries_search, 0, :before_mark, :after_mark, '...',
:snippet_tokens
) AS title,
snippet(
entries_search, 2, :before_mark, :after_mark, '...',
:snippet_tokens
) AS feed,
_is_feed_user_title AS is_feed_user_title,
json_object(
'path', _content_path,
'value', snippet(
entries_search, 1,
:before_mark, :after_mark, '...', :snippet_tokens
),
'rank', rank
) AS content
FROM
entries_search
WHERE
entries_search MATCH :query
ORDER BY
rank
LIMIT
-1 OFFSET 0
)
SELECT
entries.id,
entries.feed,
min(search.rank) AS rank,
search.title,
search.feed,
search.is_feed_user_title,
json_group_array(json(search.content))
FROM
entries
JOIN
search ON (entries.id, entries.feed) = (search._id, search._feed)
GROUP BY
entries.id,
entries.feed
HAVING
feeds.url = :feed_url
ORDER BY
rank ASC,
entries.id ASC,
entries.feed ASC
;
--- make_search_entries_query, chunk size 2
EntryFilterOptions(feed_url='feed', entry_id=None, read=None, important=None, has_enclosures=None)
WITH
search AS (
SELECT
_id,
_feed,
rank,
snippet(
entries_search, 0, :before_mark, :after_mark, '...',
:snippet_tokens
) AS title,
snippet(
entries_search, 2, :before_mark, :after_mark, '...',
:snippet_tokens
) AS feed,
_is_feed_user_title AS is_feed_user_title,
json_object(
'path', _content_path,
'value', snippet(
entries_search, 1,
:before_mark, :after_mark, '...', :snippet_tokens
),
'rank', rank
) AS content
FROM
entries_search
WHERE
entries_search MATCH :query
ORDER BY
rank
LIMIT
-1 OFFSET 0
)
SELECT
entries.id,
entries.feed,
min(search.rank) AS rank,
search.title,
search.feed,
search.is_feed_user_title,
json_group_array(json(search.content))
FROM
entries
JOIN
search ON (entries.id, entries.feed) = (search._id, search._feed)
GROUP BY
entries.id,
entries.feed
HAVING
feeds.url = :feed_url
ORDER BY
rank ASC,
entries.id ASC,
entries.feed ASC
LIMIT
:chunk_size
;
{'last_0': 'rank', 'last_1': 'entries.id', 'last_2': 'entries.feed'}
WITH
search AS (
SELECT
_id,
_feed,
rank,
snippet(
entries_search, 0, :before_mark, :after_mark, '...',
:snippet_tokens
) AS title,
snippet(
entries_search, 2, :before_mark, :after_mark, '...',
:snippet_tokens
) AS feed,
_is_feed_user_title AS is_feed_user_title,
json_object(
'path', _content_path,
'value', snippet(
entries_search, 1,
:before_mark, :after_mark, '...', :snippet_tokens
),
'rank', rank
) AS content
FROM
entries_search
WHERE
entries_search MATCH :query
ORDER BY
rank
LIMIT
-1 OFFSET 0
)
SELECT
entries.id,
entries.feed,
min(search.rank) AS rank,
search.title,
search.feed,
search.is_feed_user_title,
json_group_array(json(search.content))
FROM
entries
JOIN
search ON (entries.id, entries.feed) = (search._id, search._feed)
GROUP BY
entries.id,
entries.feed
HAVING
feeds.url = :feed_url AND
(
rank,
entries.id,
entries.feed
) > (
:last_0,
:last_1,
:last_2
)
ORDER BY
rank ASC,
entries.id ASC,
entries.feed ASC
LIMIT
:chunk_size
;
--- simple UNION
SELECT
*
FROM
one
UNION
SELECT
another
FROM
two
;
--- Query UNION args
SELECT
* FROM one
UNION
SELECT
* FROM two
UNION
SELECT
xxx
UNION
SELECT
yyy
UNION
SELECT * FROM STRING
UNION
SELECT
* FROM three
;
--- Search._update -like UNION
WITH
cte1,
cte2-name AS (
cte2
),
cte3 AS (
SELECT
*
FROM
cte1
UNION
SELECT
*
FROM
cte2
)
SELECT
whatever
FROM
cte3
JOIN
some other thing
JOIN
yet another one
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment