christopherkullenberg/facebooksok.py

## facebooksok.py
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# Import modules for CGI handling and UTF-8 handling of input/output
import cgi, cgitb
import sys
import re
import os
import sqlite3
import numpy as np
import collections
import pandas as pd
from bokeh.plotting import figure, output_file, save
from bokeh.embed import file_html
from bokeh.resources import CDN

# Fix IO and utf8
sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)

''' For regexp search to work you need to install 'apt-get install sqlite3-pcre'
and put '.load /usr/lib/sqlite3/pcre.so' into the file '~/.sqliterc'''
conn = sqlite3.connect('fs.db')
# Enable regexp in sqlite3
def regexp(expr, item):
    reg = re.compile(expr)
    return reg.search(item) is not None
conn.create_function("REGEXP", 2, regexp)

# Get data from fields
form = cgi.FieldStorage()
if form.getvalue('like_search_word'):
    form_string = form.getvalue('like_search_word')
    search_string = form_string
else:
    search_string = "Not entered"

if form.getvalue('result_limit'):
    result_limit = form.getvalue('result_limit')
else:
    result_limit = 1000

if form.getvalue('order'):
    theorder = form.getvalue('order')
    if theorder == "Stigande":
        order = "ASC"
    elif theorder == "Fallande":
        order = "DESC"
    else:
        order = "ASC"

if form.getvalue('mode'):
    themode = form.getvalue('mode')
    if themode == "like":
        mode = "like"
    elif themode == "regexp":
        mode = "regexp"
    else:
        mode = "like"
else:
    mode = "undefined"


# Queries to the database with a LIKE seach
def likesearches():
    percentsearchstring = "%" + search_string + "%"
    if order == "ASC":
        search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
                          message LIKE (?) ORDER BY thedate ASC LIMIT (?)",\
                          (percentsearchstring, result_limit, ))
    elif order == "DESC":
        search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
                          message LIKE (?) ORDER BY thedate DESC LIMIT (?)",\
                          (percentsearchstring, result_limit, ))
    else:
        print("Something went wrong")
    return(search)

# Queries to the database with a REGEXP seach
def regexpsearches():
    if order == "ASC":
        search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
                          message REGEXP (?) ORDER BY thedate ASC LIMIT (?)",\
                          (search_string, result_limit, ))
    elif order == "DESC":
        search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
                          message REGEXP (?) ORDER BY thedate DESC LIMIT (?)",\
                          (search_string, result_limit, ))
    else:
        print("Something went wrong")

    return(search)

# Select which search method
if mode == 'like':
    searchmode = likesearches()
elif mode == 'regexp':
    searchmode = regexpsearches()
else:
    print("Error")

results = []
for s in searchmode:
    results.append(s)

def totalsize():
    totaldbsize = conn.execute("SELECT count(*) FROM main")
    for t in totaldbsize:
        total = t
    return(total[0])

def graph():
    datelist = []
    for date in results:
        # [:-17] will return %Y-%m and [:-14] will return %Y-%m-%d
        thedate = date[1][:-14]
        datelist.append(thedate)

    counter = collections.Counter(datelist)
    output_file("/home/christopher/www/results/years.html", title="Resultat")
    years = []
    val   = []
    yearvaldict = {}

    for number in sorted(counter):
        years.append(number)
        value = counter[number]
        val.append(value)
        yearvaldict[number] = [value]

    #for key, value in yearvaldict.items():
    #    print(key, value)

    # Convert data into a panda DataFrame format
    data=pd.DataFrame({'year':years, 'value':val}, )

    # Create new column (yearDate) equal to the year Column but with datetime format
    data['yearDate']=pd.to_datetime(data['year'],format='%Y-%m-%d')

    # Create a line graph with datetime x axis and use datetime column(yearDate)
    # for this axis
    p = figure(width=1000, height=250, x_axis_type="datetime")
    p.logo = None
    p.toolbar_location = "right"
    p.line(x=data['yearDate'],y=data['value'], color="#9B287B", line_width=2)
    #show(p) # for debugging
    bokehhtml = file_html(p, CDN, "Resultat")
    save(p)
    return(bokehhtml)


###
print("Content-type:text/html; charset=utf-8\r\n\r\n")
print()
print(graph())

print('''<style>
    table#t01 tr:nth-child(even) {
    background-color: #eee;
    }
    table#t01 tr:nth-child(odd) {
    background-color: #fff;
    }
    table#t01 th {
    color: white;
    background-color: black;
    }
    td#d01 {
    padding: 15px;
    }
    span.highlight {
    background-color: yellow;
    }
    </style>
    ''')


print('<p>Du sökte på ordet <b>' + search_string + '</b> i ' + mode + '-läge. Databasen har \
sammanlagt <b>' + str(totalsize()) + ' </b> sparade kommentarer. Gör \
en <a href="http://localhost">ny sökning</a>.</p>')
print("<br>")


def printresults():
    resultcounter = 0
    print('<table id="t01">'    )
    for s in results:
        print("<tr>")
        print("<td id=#d01><b>" + s[0] + "</b></td>")
        print("<td id=#d01>" + s[1][:-14] + "</td>")
        print("<td>")
        #This makes each search word bold.
        for word in s[2].split():
            printbold = re.findall(form_string, word, re.IGNORECASE)
            if printbold:
                print('<span class="highlight">' + word + "</span>")
            else:
                print(word)
        print("</td>")
        #print("<td id=#d01>" + s[2] + "</td>")
        print("</tr>")
        resultcounter += 1
    print("</table>")
    return(resultcounter)

#print(graphcontrol())
print("<i>Sökningen gav " + str(printresults()) + " träffar.</i>")
print('<br>Gör en <a href="http://localhost">ny sökning</a>.')

print('''
    <br>
    </body>
    </html>
    ''')
	#!/usr/bin/env python3
	# -- coding: UTF-8 --
	# Import modules for CGI handling and UTF-8 handling of input/output
	import cgi, cgitb
	import sys
	import re
	import os
	import sqlite3
	import numpy as np
	import collections
	import pandas as pd
	from bokeh.plotting import figure, output_file, save
	from bokeh.embed import file_html
	from bokeh.resources import CDN

	# Fix IO and utf8
	sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)

	''' For regexp search to work you need to install 'apt-get install sqlite3-pcre'
	and put '.load /usr/lib/sqlite3/pcre.so' into the file '~/.sqliterc'''
	conn = sqlite3.connect('fs.db')
	# Enable regexp in sqlite3
	def regexp(expr, item):
	reg = re.compile(expr)
	return reg.search(item) is not None
	conn.create_function("REGEXP", 2, regexp)

	# Get data from fields
	form = cgi.FieldStorage()
	if form.getvalue('like_search_word'):
	form_string = form.getvalue('like_search_word')
	search_string = form_string
	else:
	search_string = "Not entered"

	if form.getvalue('result_limit'):
	result_limit = form.getvalue('result_limit')
	else:
	result_limit = 1000

	if form.getvalue('order'):
	theorder = form.getvalue('order')
	if theorder == "Stigande":
	order = "ASC"
	elif theorder == "Fallande":
	order = "DESC"
	else:
	order = "ASC"

	if form.getvalue('mode'):
	themode = form.getvalue('mode')
	if themode == "like":
	mode = "like"
	elif themode == "regexp":
	mode = "regexp"
	else:
	mode = "like"
	else:
	mode = "undefined"


	# Queries to the database with a LIKE seach
	def likesearches():
	percentsearchstring = "%" + search_string + "%"
	if order == "ASC":
	search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
	message LIKE (?) ORDER BY thedate ASC LIMIT (?)",\
	(percentsearchstring, result_limit, ))
	elif order == "DESC":
	search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
	message LIKE (?) ORDER BY thedate DESC LIMIT (?)",\
	(percentsearchstring, result_limit, ))
	else:
	print("Something went wrong")
	return(search)

	# Queries to the database with a REGEXP seach
	def regexpsearches():
	if order == "ASC":
	search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
	message REGEXP (?) ORDER BY thedate ASC LIMIT (?)",\
	(search_string, result_limit, ))
	elif order == "DESC":
	search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
	message REGEXP (?) ORDER BY thedate DESC LIMIT (?)",\
	(search_string, result_limit, ))
	else:
	print("Something went wrong")

	return(search)

	# Select which search method
	if mode == 'like':
	searchmode = likesearches()
	elif mode == 'regexp':
	searchmode = regexpsearches()
	else:
	print("Error")

	results = []
	for s in searchmode:
	results.append(s)

	def totalsize():
	totaldbsize = conn.execute("SELECT count(*) FROM main")
	for t in totaldbsize:
	total = t
	return(total[0])

	def graph():
	datelist = []
	for date in results:
	# [:-17] will return %Y-%m and [:-14] will return %Y-%m-%d
	thedate = date[1][:-14]
	datelist.append(thedate)

	counter = collections.Counter(datelist)
	output_file("/home/christopher/www/results/years.html", title="Resultat")
	years = []
	val = []
	yearvaldict = {}

	for number in sorted(counter):
	years.append(number)
	value = counter[number]
	val.append(value)
	yearvaldict[number] = [value]

	#for key, value in yearvaldict.items():
	# print(key, value)

	# Convert data into a panda DataFrame format
	data=pd.DataFrame({'year':years, 'value':val}, )

	# Create new column (yearDate) equal to the year Column but with datetime format
	data['yearDate']=pd.to_datetime(data['year'],format='%Y-%m-%d')

	# Create a line graph with datetime x axis and use datetime column(yearDate)
	# for this axis
	p = figure(width=1000, height=250, x_axis_type="datetime")
	p.logo = None
	p.toolbar_location = "right"
	p.line(x=data['yearDate'],y=data['value'], color="#9B287B", line_width=2)
	#show(p) # for debugging
	bokehhtml = file_html(p, CDN, "Resultat")
	save(p)
	return(bokehhtml)


	###
	print("Content-type:text/html; charset=utf-8\r\n\r\n")
	print()
	print(graph())

	print('''<style>
	table#t01 tr:nth-child(even) {
	background-color: #eee;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}
	table#t01 th {
	color: white;
	background-color: black;
	}
	td#d01 {
	padding: 15px;
	}
	span.highlight {
	background-color: yellow;
	}
	</style>
	''')


	print('<p>Du sökte på ordet <b>' + search_string + '</b> i ' + mode + '-läge. Databasen har \
	sammanlagt <b>' + str(totalsize()) + ' </b> sparade kommentarer. Gör \
	en <a href="http://localhost">ny sökning</a>.</p>')
	print("<br>")


	def printresults():
	resultcounter = 0
	print('<table id="t01">' )
	for s in results:
	print("<tr>")
	print("<td id=#d01><b>" + s[0] + "</b></td>")
	print("<td id=#d01>" + s[1][:-14] + "</td>")
	print("<td>")
	#This makes each search word bold.
	for word in s[2].split():
	printbold = re.findall(form_string, word, re.IGNORECASE)
	if printbold:
	print('<span class="highlight">' + word + "</span>")
	else:
	print(word)
	print("</td>")
	#print("<td id=#d01>" + s[2] + "</td>")
	print("</tr>")
	resultcounter += 1
	print("</table>")
	return(resultcounter)

	#print(graphcontrol())
	print("<i>Sökningen gav " + str(printresults()) + " träffar.</i>")
	print('<br>Gör en <a href="http://localhost">ny sökning</a>.')

	print('''
	<br>
	</body>
	</html>
	''')