Skip to content

Instantly share code, notes, and snippets.

@glickmac
Last active September 9, 2022 15:07
Show Gist options
  • Save glickmac/22d395ee9300ba523da939d28340b1b9 to your computer and use it in GitHub Desktop.
Save glickmac/22d395ee9300ba523da939d28340b1b9 to your computer and use it in GitHub Desktop.
#%%timeit ## ~2.44 s ± 377
# Alice in Wonderland Starting from Chapter 8
import urllib3
url = 'https://www.gutenberg.org/files/11/11-0.txt'
http = urllib3.PoolManager()
text = http.urlopen("GET", url).data.decode()
chapters = text.split("THE END")[0].split("CHAPTER VIII")[2]
## 61235 Characters | 27432 Words | 3762 Lines
#characters = len(text)
#words = len(text.split(" "))
#lines = len(text.split("\n"))
bwt_data = generate_all(chapters)
## 3 instances of "Off with her head"
print("Number of Exact Matches: "+ str(len(find('Off with her head', chapters, mismatches=0, bwt_data=bwt_data))))
## 0 instances of "off with her head" CASE SENSITIVE
print("Number of Exact Matches: "+ str(len(find('off with her head', chapters, mismatches=0, bwt_data=bwt_data))))
### Mismatches = 2
## 5 instances of "Off with her/his head"
print("Number of Fuzzy Matches: "+ str(len(find('Off with her head', chapters, mismatches=2, bwt_data=bwt_data))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment