Skip to content

Instantly share code, notes, and snippets.

@danilobellini
Created June 17, 2013 04:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danilobellini/5794639 to your computer and use it in GitHub Desktop.
Save danilobellini/5794639 to your computer and use it in GitHub Desktop.
Get some statistics from bible
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created on Sun Jun 16 05:19:49 2013
# Danilo de Jesus da Silva Bellini
""" Get some statistics from bible """
from __future__ import print_function, unicode_literals
import os, zipfile, random
try: # Python 2
from urllib2 import urlopen
from string import letters as ascii_letters
except ImportError: # Python 3
from urllib.request import urlopen
from string import ascii_letters
bible_link = "http://printkjv.ifbweb.com/"
zip_name = "AV_txt.zip"
class DownloadError(Exception):
pass
def download_file(url, output_filename):
data = urlopen(url)
code = data.getcode()
if code != 200:
raise DownloadError("Error {}".format(code))
with open(output_filename, "wb") as f:
f.write(data.read())
def get_bible_data():
if not os.path.exists(zip_name) or not zipfile.is_zipfile(zip_name):
download_file(bible_link + zip_name, zip_name)
with zipfile.ZipFile(zip_name) as zf:
return zf.read(zf.namelist()[0]).decode("utf-8")
def new_random_word():
msg = list(ascii_letters)
size = random.randrange(3, len(msg))
random.shuffle(msg)
return "".join(msg[:size])
if __name__ == "__main__":
words = get_bible_data().split()
set_words = set(words)
print("Total whitespace-separated items:", len(words))
print("Unique (case sensitive):", len(set_words))
# Comparison for Junior Polegato
print()
nw = [words[5], words[37], words[458]]
nw.extend(new_random_word() for _ in range(17))
print("New words:", nw)
print()
print("Repeated:", len([x for x in nw if x in words]))
print("Repeated in set:", len([x for x in nw if x in set_words]))
final = words + [x for x in nw if x in words]
print("Final size, appending the new words:", len(final))
print("Final size, adding to set:", len(set(final)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment