Skip to content

Instantly share code, notes, and snippets.

@WinslowJosiah
Created May 20, 2024 04:58
Show Gist options
  • Save WinslowJosiah/92874a5d136daafcc0f351bfc39cbe08 to your computer and use it in GitHub Desktop.
Save WinslowJosiah/92874a5d136daafcc0f351bfc39cbe08 to your computer and use it in GitHub Desktop.
from contextlib import nullcontext
from purequransearch import Concordance, VALID_CORPORA
# Get corpus from user
corpus = input("Enter corpus (default pickthall): ") or "pickthall"
assert corpus in VALID_CORPORA
concordance = Concordance(corpus)
# Get verse from user
chapter, verse = map(int, input("Enter verse reference: ").split(":"))
phrase = concordance.content[chapter - 1][verse - 1]
words = [
Concordance.normalize_word(word, preserve_case=True)
for word in Concordance.text_to_words(phrase)
]
print(f"{chapter}:{verse}\t{phrase}")
# Get other info from user
target = int(input("Enter target: "))
max_phrase_length = int(
input("Enter max words in phrase (default 8): ") or 8
)
max_deleted_words = int(
input("Enter max words to delete (default 4): ") or 4
)
output_path = input("Enter output file path (default none): ")
# NOTE This function performs a recursive search of every possible
# partition and combination of search types (which will be a lot!). It
# tries to cut off some branches of the search if the occurrence/verse
# totals get past the target, but if you're dealing with a long verse,
# going through every possibility will be slow.
def search_miracles(
queries: list[str] | None = None,
window_start: int = 0,
occurrences: int = 0,
verses: set[tuple[int, int]] | None = None,
deleted_words: int = 0,
):
global total_occurrences
global total_verses
# NOTE Setting these defaults here avoids some nasty bugs!
if queries is None:
queries = []
if verses is None:
verses = set()
# If occurrence/verse total is past the target, don't search deeper
if occurrences > target and len(verses) > target:
return
# If current partition uses the entire verse
if window_start >= len(words):
queries_str = ";".join(
query
for query in queries
if not query.startswith("#")
)
# Report occurrence/verse totals that hit the target
if occurrences == target:
yield f"{queries_str}\n= {occurrences} occurrence(s)"
total_occurrences += 1
if len(verses) == target:
yield f"{queries_str}\n= {len(verses)} verse(s)"
total_verses += 1
return
# For each possible "window" starting from this word
for window_end in range(
min(len(words), window_start + max_phrase_length),
window_start,
-1,
):
window = words[window_start:window_end]
# This window will correspond to a query we need to search
query = " ".join(window)
# Perform case-insensitive search
result = concordance.search(query)
occ = len(result)
vss = concordance.word_indices_to_verses(result)
# Go deeper
yield from search_miracles(
queries + [query],
window_end,
occurrences + occ,
verses | vss,
deleted_words,
)
# Perform case-sensitive search
result_case = concordance.search("^" + query)
occ_case = len(result_case)
vss_case = concordance.word_indices_to_verses(result_case)
# Only go deeper if the occurrence/verse totals are different
if not (occ_case == occ and vss_case == vss):
yield from search_miracles(
queries + ["^" + query],
window_end,
occurrences + occ_case,
verses | vss_case,
deleted_words,
)
# Perform no search; a deleted word will give no results
# Only go deeper if the last query wasn't deleted...
if (not queries or not queries[-1].startswith("#")):
# ...and we're still under the max number of deleted words
if deleted_words + len(window) <= max_deleted_words:
yield from search_miracles(
queries + ["#" + query],
window_end,
occurrences,
verses,
deleted_words + len(window),
)
print("Searching...")
# HACK If an output path was specified, I open a file and write to it;
# otherwise, I don't open a file, and I don't write to it. There might
# be a better way to do this, though.
with open(output_path, "w") if output_path else nullcontext() as outfile:
total_occurrences = 0
total_verses = 0
for miracle in search_miracles():
print(miracle)
if output_path:
assert outfile is not None
outfile.write(miracle + "\n")
print()
print("Done.")
print(
f"Found {total_occurrences} occurrence miracle(s) and "
f"{total_verses} verse-count miracle(s)."
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment