Created
November 23, 2019 21:46
-
-
Save sojohnnysaid/602bbdf69d97f155ea0e293f2a3f3fc1 to your computer and use it in GitHub Desktop.
similarities less helpers.py, and index.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.tokenize import sent_tokenize | |
def lines(a, b): | |
"""Return lines in both a and b""" | |
(a, b) = (a.split('\n'), b.split('\n')) | |
# TODO | |
return [line for line in a if line in b] | |
def sentences(a, b): | |
"""Return sentences in both a and b""" | |
(a, b) = (sent_tokenize(a), sent_tokenize(b)) | |
sentences = set([line for line in a if line in b]) | |
unique_list = list(sentences) | |
# TODO | |
return unique_list | |
def substrings(a, b, n): | |
"""Return substrings of length n in both a and b""" | |
sub_list = [] | |
strlen = len(a)-1 | |
n = 2 | |
for i in range(0,strlen,n): | |
substring = a[i:i+n] | |
if substring in b and substring.isalpha(): | |
sub_list.append(substring) | |
sub_list = set(sub_list) | |
sub_list = list(sub_list) | |
# TODO | |
return sub_list |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% extends "layout.html" %} | |
{% block body %} | |
<div class="col"> | |
<form action="/compare" enctype="multipart/form-data" method="post"> | |
<form action="/compare" enctype="multipart/form-data" method="post"> | |
<input type="file" name="file1"><br><br> | |
<input type="file" name="file2"><br><br> | |
choose an algorithm: | |
<select | |
name="algorithm" | |
onchange="if (this.value == 'substrings') { | |
document.getElementById('length').style.display = 'block' | |
'block' | |
}else{document.getElementById('length').style.display = 'none'} | |
"> | |
<option selected="selected" value="lines">lines</option> | |
<option value="sentences">sentences</option> | |
<option value="substrings">substrings</option> | |
</select><br><br> | |
<p id="length" style="display: none;"> | |
<input | |
placeholder="substring length" | |
type="number" | |
name="length"> | |
</p> | |
<input type="submit" value="Submit"> | |
</form> | |
</form> | |
</div> | |
{# reload page on back button #} | |
<script type="text/javascript"> | |
window.addEventListener( "pageshow", function ( event ) { | |
var historyTraversal = event.persisted || | |
( typeof window.performance != "undefined" && | |
window.performance.navigation.type === 2 ); | |
if ( historyTraversal ) { | |
// Handle page restore. | |
window.location.reload(); | |
} | |
}); | |
</script> | |
{% endblock %} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment