Skip to content

Instantly share code, notes, and snippets.

@lemon24
Last active July 7, 2020 19:58
Show Gist options
  • Save lemon24/2f7a4b29266ddce73f2deecd72ff84c4 to your computer and use it in GitHub Desktop.
Save lemon24/2f7a4b29266ddce73f2deecd72ff84c4 to your computer and use it in GitHub Desktop.
import os
import os.path
import time
import sys
import random
import itertools
from datetime import datetime
from subprocess import run
from threading import Thread
import threading
from collections import Counter
from queue import Queue, Empty
from reader import make_reader, ReaderError
import sqlite3
import pickle
import traceback
root_dir = os.path.dirname(__file__)
sys.path.insert(0, os.path.join(root_dir, 'tests'))
from fakeparser import Parser
trace_q = Queue()
trace_local = threading.local()
def trace(stmt):
if not hasattr(trace_local, 'db'):
trace_local.db = sqlite3.connect('db.sqlite')
execute = trace_local.db.execute
try:
trace_q.put((
datetime.now(),
threading.current_thread().name,
traceback.extract_stack(),
stmt,
list(execute("select id, title from entries order by id")),
list(execute("select _id, title from entries_search order by _id")),
list(execute("select id, to_update from entries_search_sync_state order by id")),
))
except Exception as e:
trace_q.put((f"{type(e).__module__}.{type(e).__qualname__}: {e}",))
def my_make_reader():
reader = make_reader('db.sqlite')
reader._storage.db.set_trace_callback(trace)
return reader
def update_entries(done_q):
reader = my_make_reader()
titles = itertools.count()
while True:
try:
done_q.get(False)
break
except Empty:
pass
entries = list(reader._storage.db.execute("select feed, id from entries;"))
to_update = set(random.choices(entries, k=random.randrange(len(entries))))
params = [
(f"title {title}", id, url)
for (id, url), title
in zip(to_update, titles)
]
with reader._storage.db as db:
db.executemany(
"update entries set title = ? where (feed, id) = (?, ?);",
params
)
def update_search(done_q):
reader = my_make_reader()
while True:
try:
done_q.get(False)
break
except Empty:
pass
reader.update_search()
run("rm db.sqlite*", shell=True)
reader = make_reader('db.sqlite')
reader._parser = parser = Parser()
reader.add_feed(parser.feed(0, datetime(2010, 1, 1)))
for i in range(5):
parser.entry(0, i, datetime(2010, 1, 1))
reader.update_feeds()
reader.enable_search()
reader.update_search()
done_q = Queue()
threads = [Thread(target=update_entries, args=(done_q,))]
for _ in range(1):
threads.append(Thread(target=update_search, args=(done_q,)))
for thread in threads:
thread.start()
try:
while True:
time.sleep(.1)
(count,), = reader._storage.db.execute("select count(*) from entries_search;")
if count > len(parser.entries[0]) :
break
except KeyboardInterrupt:
pass
for thread in threads:
done_q.put(None)
for thread in threads:
thread.join()
counts = Counter(len(t) for t in trace_q.queue)
print(counts)
with open(f'{sys.argv[1]}.{counts.most_common(1)[0][1]}', 'wb') as f:
pickle.dump(trace_q.queue, f)
def print_query(query):
rows = reader._storage.db.execute(query)
print(query, *rows, sep='\n', end='\n\n')
print_query("select id, title from entries order by id")
print_query("select _id, title from entries_search order by _id")
import pickle
import typing
import textwrap
import sys
import re
from itertools import zip_longest
class Trace(typing.NamedTuple):
ts: 'datetime'
thread: str
tb: list
stmt: str
entries: list
search: list
sync_state: list
data = sorted(map(Trace._make, pickle.load(open(sys.argv[1], 'rb'))))
# skip internal stuff FTS5 does
data = filter(lambda t: "'main'" not in t.stmt, data)
for i, trace in enumerate(data, 1):
flag = (
'<<<' if len(trace.search) < len(trace.entries) else
'>>>' if len(trace.search) > len(trace.entries) else
'==='
)
# whatever was before trace()
frame = trace.tb[-2]
filename = frame.filename.split('/')[-1]
print(flag, str(i).zfill(4), trace.thread, trace.ts, filename, frame.name)
print()
repr_entry = lambda t: f"{t[0].rpartition(', ')[2]}: {t[1]}"
entries = list(map(repr_entry, trace.entries))
search = list(map(repr_entry, trace.search))
sync_state = list(map(repr_entry, trace.sync_state))
entries.insert(0, 'entries')
search.insert(0, 'search')
sync_state.insert(0, 'sync_state')
width = max(len(s) for s in entries + search + sync_state)
for e, s, ss in zip_longest(entries, search, sync_state, fillvalue='' ):
print(f" {e:<{width}} {s:<{width}} {ss}")
print()
stmt = trace.stmt
# trigger statements show commented and up without newlines, fix them
if stmt.startswith('-- '):
stmt = ' ' * 80 + re.sub('([^\n])( +)', r'\1\n\2', stmt[3:])
stmt = textwrap.dedent(stmt).strip()
print(textwrap.indent(stmt, ' '))
print()
import os
import os.path
import time
import sys
import random
import itertools
from datetime import datetime
from subprocess import run
from threading import Thread
from queue import Queue, Empty
from reader import make_reader, ReaderError
root_dir = os.path.dirname(__file__)
sys.path.insert(0, os.path.join(root_dir, 'tests'))
from fakeparser import Parser
def my_make_reader():
reader = make_reader('db.sqlite')
return reader
def update_entries(done_q):
reader = my_make_reader()
titles = itertools.count()
while True:
try:
done_q.get(False)
break
except Empty:
pass
entries = list(reader._storage.db.execute("select feed, id from entries;"))
to_update = set(random.choices(entries, k=random.randrange(len(entries))))
params = [
(f"title {title}", id, url)
for (id, url), title
in zip(to_update, titles)
]
with reader._storage.db as db:
db.executemany(
"update entries set title = ? where (feed, id) = (?, ?);",
params
)
# not used initially, found another bug after the fix for the repro
# https://github.com/lemon24/reader/commit/4a6e5ce29b425041a25457953cd8e284a0ab9a69
def add_remove_entries(done_q):
reader = my_make_reader()
while True:
try:
done_q.get(False)
break
except Empty:
pass
entries = list(reader._storage.db.execute("select feed, id from entries;"))
to_delete = set(random.choices(entries, k=random.randrange(len(entries))))
with reader._storage.db as db:
db.executemany(
"delete from entries where (feed, id) = (?, ?);",
to_delete
)
reader._parser = parser = Parser()
parser.feed(0, datetime(2010, 1, 1))
for _, i in to_delete:
parser.entry(0, eval(i)[1], datetime(2010, 1, 1))
reader.update_feeds()
def update_search(done_q):
reader = my_make_reader()
while True:
try:
done_q.get(False)
break
except Empty:
pass
reader.update_search()
run("rm db.sqlite*", shell=True)
reader = my_make_reader()
reader._parser = parser = Parser()
reader.add_feed(parser.feed(0, datetime(2010, 1, 1)))
for i in range(5):
parser.entry(0, i, datetime(2010, 1, 1))
reader.update_feeds()
reader.enable_search()
reader.update_search()
done_q = Queue()
threads = [Thread(target=update_entries, args=(done_q,))]
for _ in range(4):
threads.append(Thread(target=update_search, args=(done_q,)))
for thread in threads:
thread.start()
try:
while True:
time.sleep(.1)
(count,), = reader._storage.db.execute("select count(*) from entries_search;")
if count > len(parser.entries[0]) :
break
except KeyboardInterrupt:
pass
for thread in threads:
done_q.put(None)
for thread in threads:
thread.join()
def print_query(query):
rows = reader._storage.db.execute(query)
print(query, *rows, sep='\n', end='\n\n')
print_query("select id, title from entries order by id")
print_query("select _id, title from entries_search order by _id")
$ python search_update_bug_replay.py subr.pickle.425 2>/dev/null | grep '^>>> 0030' -m1 -B9999
=== 0001 Thread-1 2020-07-07 00:11:46.075849 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
select feed, id from entries;
=== 0002 Thread-2 2020-07-07 00:11:46.079275 _search.py _delete_from_search
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN
=== 0003 Thread-2 2020-07-07 00:11:46.080374 _search.py _delete_from_search
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
DELETE FROM entries_search
WHERE (_id, _feed) IN (
SELECT esss.id, esss.feed
FROM entries_search_sync_state AS esss
JOIN entries_search ON (esss.id, esss.feed) = (_id, _feed)
WHERE to_update OR to_delete
LIMIT 256
);
=== 0004 Thread-1 2020-07-07 00:11:46.080474 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN
=== 0005 Thread-1 2020-07-07 00:11:46.081340 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
update entries set title = 'title 0' where (feed, id) = ('0', '0, 2');
=== 0006 Thread-2 2020-07-07 00:11:46.083775 _search.py _delete_from_search
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
COMMIT
=== 0007 Thread-2 2020-07-07 00:11:46.083984 _search.py _delete_from_sync_state
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN
=== 0008 Thread-2 2020-07-07 00:11:46.084144 _search.py _delete_from_sync_state
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
DELETE
FROM entries_search_sync_state
WHERE (id, feed) IN (
SELECT id, feed
FROM entries_search_sync_state
WHERE to_delete
LIMIT 256
);
=== 0009 Thread-1 2020-07-07 00:11:46.084303 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
TRIGGER entries_search_entries_update
=== 0010 Thread-1 2020-07-07 00:11:46.084518 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
UPDATE entries_search_sync_state
SET to_update = 1
WHERE (new.id, new.feed) = (
entries_search_sync_state.id,
entries_search_sync_state.feed
)
=== 0011 Thread-1 2020-07-07 00:11:46.084666 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: Entry #2 2: Entry #2 2: 0
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
COMMIT
=== 0012 Thread-1 2020-07-07 00:11:46.085091 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
select feed, id from entries;
=== 0013 Thread-1 2020-07-07 00:11:46.085312 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN
=== 0014 Thread-1 2020-07-07 00:11:46.085449 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
update entries set title = 'title 1' where (feed, id) = ('0', '0, 0');
=== 0015 Thread-2 2020-07-07 00:11:46.085666 _search.py _delete_from_sync_state
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
COMMIT
=== 0016 Thread-2 2020-07-07 00:11:46.086184 _search.py _insert_into_search
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
SELECT
entries.id,
entries.feed,
entries.last_updated,
coalesce(feeds.user_title, feeds.title),
feeds.user_title IS NOT NULL,
entries.title,
entries.summary,
entries.content
FROM entries_search_sync_state AS esss
JOIN entries USING (id, feed)
JOIN feeds ON feeds.url = esss.feed
WHERE esss.to_update
LIMIT 256
=== 0017 Thread-1 2020-07-07 00:11:46.087255 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
TRIGGER entries_search_entries_update
=== 0018 Thread-2 2020-07-07 00:11:46.087323 _search.py _insert_into_search
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN IMMEDIATE;
=== 0019 Thread-1 2020-07-07 00:11:46.087786 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
UPDATE entries_search_sync_state
SET to_update = 1
WHERE (new.id, new.feed) = (
entries_search_sync_state.id,
entries_search_sync_state.feed
)
=== 0020 Thread-1 2020-07-07 00:11:46.088050 search_update_bug_record.py update_entries
entries search sync_state
0: Entry #0 0: Entry #0 0: 0
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
COMMIT
=== 0021 Thread-1 2020-07-07 00:11:46.088664 search_update_bug_record.py update_entries
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
select feed, id from entries;
=== 0022 Thread-1 2020-07-07 00:11:46.089065 search_update_bug_record.py update_entries
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
BEGIN
=== 0023 Thread-1 2020-07-07 00:11:46.089274 search_update_bug_record.py update_entries
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
update entries set title = 'title 2' where (feed, id) = ('0', '0, 4');
=== 0024 Thread-2 2020-07-07 00:11:46.089582 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
SELECT to_update
FROM entries_search_sync_state
WHERE (id, feed) = ('0, 2', '0');
=== 0025 Thread-2 2020-07-07 00:11:46.090250 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
SELECT last_updated FROM entries WHERE (id, feed) = ('0, 2', '0');
=== 0026 Thread-2 2020-07-07 00:11:46.090733 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
INSERT INTO entries_search
VALUES (
'title 0',
NULL,
'Feed #0',
'0, 2',
'0',
NULL,
0
);
=== 0027 Thread-2 2020-07-07 00:11:46.091591 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
UPDATE entries_search_sync_state
SET to_update = 0
WHERE (id, feed) = ('0, 2', '0');
=== 0028 Thread-2 2020-07-07 00:11:46.091750 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 1
3: Entry #3 3: Entry #3 3: 0
4: Entry #4 4: Entry #4 4: 0
COMMIT
>>> 0029 Thread-2 2020-07-07 00:11:46.093020 _search.py _insert_into_search
entries search sync_state
0: title 1 0: Entry #0 0: 1
1: Entry #1 1: Entry #1 1: 0
2: title 0 2: Entry #2 2: 0
3: Entry #3 2: title 0 3: 0
4: Entry #4 3: Entry #3 4: 0
4: Entry #4
SELECT
entries.id,
entries.feed,
entries.last_updated,
coalesce(feeds.user_title, feeds.title),
feeds.user_title IS NOT NULL,
entries.title,
entries.summary,
entries.content
FROM entries_search_sync_state AS esss
JOIN entries USING (id, feed)
JOIN feeds ON feeds.url = esss.feed
WHERE esss.to_update
LIMIT 256
>>> 0030 Thread-2 2020-07-07 00:11:46.093987 _search.py _insert_into_search
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment