wesleywerner/Godville Crossword Solver

## Godville Crossword Solver
'''
    Godville crossword solver by Goddess Shadowed Princess

    Copyleft 2018, licensed under GPL 3

    Requirements: python, html2text (pip install html2text)

    Version 1
'''

# enable debugging
DEBUG = False

VERSION = 1

# the database file extension
extension = ".gcs"

# system exit function
import sys

# for regular expression searching
import re

# for downloading web content
import urllib2

# for extracting text from html
try:
    import html2text
except ImportError:
    print("Looks like you are missing some dependencies. Please pip install html2text")
    raw_input("Press Enter to continue...")
    sys.exit()

# for listing of files
from os import listdir

# get this script's path
from os import path
scriptPath = path.dirname(path.realpath(__file__))

# load list of searchable files
dbfiles = None
dbdata = None

def loadDatabase():

    # Only load lines that that contain a PIPE character
    # and only the text up to the pipe occurence
    ONLY_PIPED_LINES = True

    # list of db files
    global dbfiles
    dbfiles = [f for f in listdir(scriptPath) if f.endswith(extension)]

    if DEBUG:
        print("# List of files found:")
        for n in dbfiles:
            print("\t%s" % (n))

    # load file contents
    global dbdata
    dbdata = []
    linecount = 0
    for dbf in dbfiles:
        dbptr = open(dbf, "r")
        lines = dbptr.readlines()
        dbptr.close()
        linecount = linecount + len(lines)
        if ONLY_PIPED_LINES:
            culledLines = []
            for oneline in lines:
                pipePosition = oneline.find("|")
                if pipePosition > -1:
                    culledLines.append(oneline[0:pipePosition])
            dbdata.append(culledLines)
        else:
            # take every line in the file
            dbdata.append(lines)

    if DEBUG:
        print("# %d lines of text loaded" % (linecount))

def extractURL(url):

    # reverse url to get the page name
    pagename = url[::-1]
    slashpos = pagename.find("/")
    pagename = pagename[0:slashpos][::-1]

    print("\t* Fetching " + pagename)

    # get page content
    response = urllib2.urlopen(url)
    webContent = response.read().decode('utf8')

    # extract text
    converter = html2text.HTML2Text()
    converter.ignore_links = True
    converter.ignore_images = True

    plaintext = converter.handle(webContent)

    fileptr = open(pagename + extension, "w")
    fileptr.write(plaintext.encode('utf8'))
    fileptr.close()

def downloadPages():

    print("Fetching pages from the wiki, this shouldn't take long...")

    # list of pages to get
    pages = (
        "https://wiki.godvillegame.com/List_of_Artifacts",
        "https://wiki.godvillegame.com/List_of_Equipment",
        "https://wiki.godvillegame.com/List_of_Monsters",
        "https://wiki.godvillegame.com/List_of_Quests",
        "https://wiki.godvillegame.com/List_of_Skills",
        "https://wiki.godvillegame.com/Milestones"
        )

    # process each page
    for url in pages:
        extractURL(url)

    loadDatabase()

def searchquery(query):

    #print("Searching for %s" % (query))

    # case insensitive match
    pattern = re.compile(query, re.IGNORECASE)

    for index in range(0, len(dbfiles)):

        if DEBUG:
            print("# file %s" % (dbfiles[index]))

        for line in dbdata[index]:
            match = re.search(pattern, line)
            if match:
                print("\t%s" % (match.string))

def startup():

    # auto download if pages are missing
    loadDatabase()
    if len(dbfiles) == 0:
        downloadPages()

    # print useful words
    print("Godville crossword solver by Goddess Shadowed Princess (version %d)" % (VERSION))
    print("Copyleft 2018, licensed under GPL 3")
    print("(enter HELP for help)")

def printhelp():
    print("")
    print("Enter the word to search for, replacing any unknown letters with \".\"")
    print("Example: \"..dden..g\" will find \"hidden agenda\".")
    print("\nDOWNLOAD will fetch the latest lists from the wiki.")
    print("Press ^C or ^D to quit")

def menu():
    while True:
        command = raw_input("\nSearch: ").upper()
        if command == "HELP":
            printhelp()
        elif command == "DOWNLOAD":
            downloadPages()
        else:
            searchquery(command)

if DEBUG:
    print("# Script path is %s" % (scriptPath))

startup()

try:
    menu()
except (KeyboardInterrupt, EOFError):
    print("Goodbye o/")
	'''
	Godville crossword solver by Goddess Shadowed Princess

	Copyleft 2018, licensed under GPL 3

	Requirements: python, html2text (pip install html2text)

	Version 1
	'''

	# enable debugging
	DEBUG = False

	VERSION = 1

	# the database file extension
	extension = ".gcs"

	# system exit function
	import sys

	# for regular expression searching
	import re

	# for downloading web content
	import urllib2

	# for extracting text from html
	try:
	import html2text
	except ImportError:
	print("Looks like you are missing some dependencies. Please pip install html2text")
	raw_input("Press Enter to continue...")
	sys.exit()

	# for listing of files
	from os import listdir

	# get this script's path
	from os import path
	scriptPath = path.dirname(path.realpath(__file__))

	# load list of searchable files
	dbfiles = None
	dbdata = None

	def loadDatabase():

	# Only load lines that that contain a PIPE character
	# and only the text up to the pipe occurence
	ONLY_PIPED_LINES = True

	# list of db files
	global dbfiles
	dbfiles = [f for f in listdir(scriptPath) if f.endswith(extension)]

	if DEBUG:
	print("# List of files found:")
	for n in dbfiles:
	print("\t%s" % (n))

	# load file contents
	global dbdata
	dbdata = []
	linecount = 0
	for dbf in dbfiles:
	dbptr = open(dbf, "r")
	lines = dbptr.readlines()
	dbptr.close()
	linecount = linecount + len(lines)
	if ONLY_PIPED_LINES:
	culledLines = []
	for oneline in lines:
	pipePosition = oneline.find("\|")
	if pipePosition > -1:
	culledLines.append(oneline[0:pipePosition])
	dbdata.append(culledLines)
	else:
	# take every line in the file
	dbdata.append(lines)

	if DEBUG:
	print("# %d lines of text loaded" % (linecount))

	def extractURL(url):

	# reverse url to get the page name
	pagename = url[::-1]
	slashpos = pagename.find("/")
	pagename = pagename[0:slashpos][::-1]

	print("\t* Fetching " + pagename)

	# get page content
	response = urllib2.urlopen(url)
	webContent = response.read().decode('utf8')

	# extract text
	converter = html2text.HTML2Text()
	converter.ignore_links = True
	converter.ignore_images = True

	plaintext = converter.handle(webContent)

	fileptr = open(pagename + extension, "w")
	fileptr.write(plaintext.encode('utf8'))
	fileptr.close()

	def downloadPages():

	print("Fetching pages from the wiki, this shouldn't take long...")

	# list of pages to get
	pages = (
	"https://wiki.godvillegame.com/List_of_Artifacts",
	"https://wiki.godvillegame.com/List_of_Equipment",
	"https://wiki.godvillegame.com/List_of_Monsters",
	"https://wiki.godvillegame.com/List_of_Quests",
	"https://wiki.godvillegame.com/List_of_Skills",
	"https://wiki.godvillegame.com/Milestones"
	)

	# process each page
	for url in pages:
	extractURL(url)

	loadDatabase()

	def searchquery(query):

	#print("Searching for %s" % (query))

	# case insensitive match
	pattern = re.compile(query, re.IGNORECASE)

	for index in range(0, len(dbfiles)):

	if DEBUG:
	print("# file %s" % (dbfiles[index]))

	for line in dbdata[index]:
	match = re.search(pattern, line)
	if match:
	print("\t%s" % (match.string))

	def startup():

	# auto download if pages are missing
	loadDatabase()
	if len(dbfiles) == 0:
	downloadPages()

	# print useful words
	print("Godville crossword solver by Goddess Shadowed Princess (version %d)" % (VERSION))
	print("Copyleft 2018, licensed under GPL 3")
	print("(enter HELP for help)")

	def printhelp():
	print("")
	print("Enter the word to search for, replacing any unknown letters with \".\"")
	print("Example: \"..dden..g\" will find \"hidden agenda\".")
	print("\nDOWNLOAD will fetch the latest lists from the wiki.")
	print("Press ^C or ^D to quit")

	def menu():
	while True:
	command = raw_input("\nSearch: ").upper()
	if command == "HELP":
	printhelp()
	elif command == "DOWNLOAD":
	downloadPages()
	else:
	searchquery(command)

	if DEBUG:
	print("# Script path is %s" % (scriptPath))

	startup()

	try:
	menu()
	except (KeyboardInterrupt, EOFError):
	print("Goodbye o/")