Skip to content

Instantly share code, notes, and snippets.

@christopherkullenberg
Created April 10, 2016 09:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save christopherkullenberg/0b36b6497f6beeafb6532e9fc1c5159d to your computer and use it in GitHub Desktop.
Save christopherkullenberg/0b36b6497f6beeafb6532e9fc1c5159d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# Import modules for CGI handling and UTF-8 handling of input/output
import cgi, cgitb
import sys
import re
import os
import sqlite3
import numpy as np
import collections
import pandas as pd
from bokeh.plotting import figure, output_file, save
from bokeh.embed import file_html
from bokeh.resources import CDN
# Fix IO and utf8
sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
''' For regexp search to work you need to install 'apt-get install sqlite3-pcre'
and put '.load /usr/lib/sqlite3/pcre.so' into the file '~/.sqliterc'''
conn = sqlite3.connect('fs.db')
# Enable regexp in sqlite3
def regexp(expr, item):
reg = re.compile(expr)
return reg.search(item) is not None
conn.create_function("REGEXP", 2, regexp)
# Get data from fields
form = cgi.FieldStorage()
if form.getvalue('like_search_word'):
form_string = form.getvalue('like_search_word')
search_string = form_string
else:
search_string = "Not entered"
if form.getvalue('result_limit'):
result_limit = form.getvalue('result_limit')
else:
result_limit = 1000
if form.getvalue('order'):
theorder = form.getvalue('order')
if theorder == "Stigande":
order = "ASC"
elif theorder == "Fallande":
order = "DESC"
else:
order = "ASC"
if form.getvalue('mode'):
themode = form.getvalue('mode')
if themode == "like":
mode = "like"
elif themode == "regexp":
mode = "regexp"
else:
mode = "like"
else:
mode = "undefined"
# Queries to the database with a LIKE seach
def likesearches():
percentsearchstring = "%" + search_string + "%"
if order == "ASC":
search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
message LIKE (?) ORDER BY thedate ASC LIMIT (?)",\
(percentsearchstring, result_limit, ))
elif order == "DESC":
search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
message LIKE (?) ORDER BY thedate DESC LIMIT (?)",\
(percentsearchstring, result_limit, ))
else:
print("Something went wrong")
return(search)
# Queries to the database with a REGEXP seach
def regexpsearches():
if order == "ASC":
search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
message REGEXP (?) ORDER BY thedate ASC LIMIT (?)",\
(search_string, result_limit, ))
elif order == "DESC":
search = conn.execute("SELECT group_name, thedate, message FROM main WHERE \
message REGEXP (?) ORDER BY thedate DESC LIMIT (?)",\
(search_string, result_limit, ))
else:
print("Something went wrong")
return(search)
# Select which search method
if mode == 'like':
searchmode = likesearches()
elif mode == 'regexp':
searchmode = regexpsearches()
else:
print("Error")
results = []
for s in searchmode:
results.append(s)
def totalsize():
totaldbsize = conn.execute("SELECT count(*) FROM main")
for t in totaldbsize:
total = t
return(total[0])
def graph():
datelist = []
for date in results:
# [:-17] will return %Y-%m and [:-14] will return %Y-%m-%d
thedate = date[1][:-14]
datelist.append(thedate)
counter = collections.Counter(datelist)
output_file("/home/christopher/www/results/years.html", title="Resultat")
years = []
val = []
yearvaldict = {}
for number in sorted(counter):
years.append(number)
value = counter[number]
val.append(value)
yearvaldict[number] = [value]
#for key, value in yearvaldict.items():
# print(key, value)
# Convert data into a panda DataFrame format
data=pd.DataFrame({'year':years, 'value':val}, )
# Create new column (yearDate) equal to the year Column but with datetime format
data['yearDate']=pd.to_datetime(data['year'],format='%Y-%m-%d')
# Create a line graph with datetime x axis and use datetime column(yearDate)
# for this axis
p = figure(width=1000, height=250, x_axis_type="datetime")
p.logo = None
p.toolbar_location = "right"
p.line(x=data['yearDate'],y=data['value'], color="#9B287B", line_width=2)
#show(p) # for debugging
bokehhtml = file_html(p, CDN, "Resultat")
save(p)
return(bokehhtml)
###
print("Content-type:text/html; charset=utf-8\r\n\r\n")
print()
print(graph())
print('''<style>
table#t01 tr:nth-child(even) {
background-color: #eee;
}
table#t01 tr:nth-child(odd) {
background-color: #fff;
}
table#t01 th {
color: white;
background-color: black;
}
td#d01 {
padding: 15px;
}
span.highlight {
background-color: yellow;
}
</style>
''')
print('<p>Du sökte på ordet <b>' + search_string + '</b> i ' + mode + '-läge. Databasen har \
sammanlagt <b>' + str(totalsize()) + ' </b> sparade kommentarer. Gör \
en <a href="http://localhost">ny sökning</a>.</p>')
print("<br>")
def printresults():
resultcounter = 0
print('<table id="t01">' )
for s in results:
print("<tr>")
print("<td id=#d01><b>" + s[0] + "</b></td>")
print("<td id=#d01>" + s[1][:-14] + "</td>")
print("<td>")
#This makes each search word bold.
for word in s[2].split():
printbold = re.findall(form_string, word, re.IGNORECASE)
if printbold:
print('<span class="highlight">' + word + "</span>")
else:
print(word)
print("</td>")
#print("<td id=#d01>" + s[2] + "</td>")
print("</tr>")
resultcounter += 1
print("</table>")
return(resultcounter)
#print(graphcontrol())
print("<i>Sökningen gav " + str(printresults()) + " träffar.</i>")
print('<br>Gör en <a href="http://localhost">ny sökning</a>.')
print('''
<br>
</body>
</html>
''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment