Created
October 22, 2017 13:54
-
-
Save xorwen/64430abe21f890a721abeb0aecea30b7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict_conclusion(judicat=None): | |
""" Predictor for conclusion from model/59ec825249c4a1477200017c | |
Predictive model by BigML - Machine Learning Made Easy | |
""" | |
import re | |
tm_tokens = 'tokens_only' | |
tm_full_term = 'full_terms_only' | |
tm_all = 'all' | |
def term_matches(text, field_name, term): | |
""" Counts the number of occurences of term and its variants in text | |
""" | |
forms_list = term_forms[field_name].get(term, [term]) | |
options = term_analysis[field_name] | |
token_mode = options.get('token_mode', tm_tokens) | |
case_sensitive = options.get('case_sensitive', False) | |
first_term = forms_list[0] | |
if token_mode == tm_full_term: | |
return full_term_match(text, first_term, case_sensitive) | |
else: | |
# In token_mode='all' we will match full terms using equals and | |
# tokens using contains | |
if token_mode == tm_all and len(forms_list) == 1: | |
pattern = re.compile(r'^.+\b.+$', re.U) | |
if re.match(pattern, first_term): | |
return full_term_match(text, first_term, case_sensitive) | |
return term_matches_tokens(text, forms_list, case_sensitive) | |
def full_term_match(text, full_term, case_sensitive): | |
"""Counts the match for full terms according to the case_sensitive | |
option | |
""" | |
if not case_sensitive: | |
text = text.lower() | |
full_term = full_term.lower() | |
return 1 if text == full_term else 0 | |
def get_tokens_flags(case_sensitive): | |
"""Returns flags for regular expression matching depending on text | |
analysis options | |
""" | |
flags = re.U | |
if not case_sensitive: | |
flags = (re.I | flags) | |
return flags | |
def term_matches_tokens(text, forms_list, case_sensitive): | |
""" Counts the number of occurrences of the words in forms_list in | |
the text | |
""" | |
flags = get_tokens_flags(case_sensitive) | |
expression = ur'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join(forms_list) | |
pattern = re.compile(expression, flags=flags) | |
matches = re.findall(pattern, text) | |
return len(matches) | |
term_analysis = { | |
"judicat": { | |
"token_mode": 'all', | |
"case_sensitive": False, | |
}, | |
} | |
term_forms = { | |
"judicat": { | |
}, | |
} | |
if (judicat is None): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "zpravodajem") > 0): | |
if (term_matches(judicat, "judicat", "neodstrann") > 0): | |
if (term_matches(judicat, "judicat", "poskytuje") > 0): | |
return u'odmtnuto pro neodstrann vady' | |
if (term_matches(judicat, "judicat", "poskytuje") <= 0): | |
return u'odmtnuto pro neodstrann vady' | |
if (term_matches(judicat, "judicat", "neodstrann") <= 0): | |
if (term_matches(judicat, "judicat", "neppustnost") > 0): | |
if (term_matches(judicat, "judicat", "lhty") > 0): | |
return u'odmtnuto pro neppustnost' | |
if (term_matches(judicat, "judicat", "lhty") <= 0): | |
return u'odmtnuto pro neppustnost' | |
if (term_matches(judicat, "judicat", "neppustnost") <= 0): | |
if (term_matches(judicat, "judicat", "lhty") > 0): | |
return u'odmtnuto pro nedodren lhty' | |
if (term_matches(judicat, "judicat", "lhty") <= 0): | |
if (term_matches(judicat, "judicat", "prce") > 0): | |
return u'procesn - spojen vc' | |
if (term_matches(judicat, "judicat", "prce") <= 0): | |
return u'odmtnuto pro neoprvnnost navrhovatele' | |
if (term_matches(judicat, "judicat", "zpravodajem") <= 0): | |
if (term_matches(judicat, "judicat", "vyhovno") > 0): | |
if (term_matches(judicat, "judicat", "vyjden") > 0): | |
if (term_matches(judicat, "judicat", "zruil") > 0): | |
if (term_matches(judicat, "judicat", "vyhovl") > 0): | |
if (term_matches(judicat, "judicat", "porueno") > 0): | |
return u'vyhovno' | |
if (term_matches(judicat, "judicat", "porueno") <= 0): | |
return u'vyhovno' | |
if (term_matches(judicat, "judicat", "vyhovl") <= 0): | |
return u'vyhovno' | |
if (term_matches(judicat, "judicat", "zruil") <= 0): | |
if (term_matches(judicat, "judicat", "dvodn") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "dvodn") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "vyjden") <= 0): | |
if (term_matches(judicat, "judicat", "procesnm") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "procesnm") <= 0): | |
if (term_matches(judicat, "judicat", "vyhovl") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "vyhovl") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "vyhovno") <= 0): | |
if (term_matches(judicat, "judicat", "neodstrann") > 0): | |
if (term_matches(judicat, "judicat", "argumentace") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "argumentace") <= 0): | |
return u'odmtnuto pro neodstrann vady' | |
if (term_matches(judicat, "judicat", "neodstrann") <= 0): | |
if (term_matches(judicat, "judicat", "neppustnost") > 0): | |
if (term_matches(judicat, "judicat", "poskytuje") > 0): | |
if (term_matches(judicat, "judicat", "podstavnho") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podstavnho") <= 0): | |
if (term_matches(judicat, "judicat", "odvodnil") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "odvodnil") <= 0): | |
return u'odmtnuto pro neppustnost' | |
if (term_matches(judicat, "judicat", "poskytuje") <= 0): | |
if (term_matches(judicat, "judicat", "aplikace") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "aplikace") <= 0): | |
if (term_matches(judicat, "judicat", "mla") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "mla") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "neppustnost") <= 0): | |
if (term_matches(judicat, "judicat", "vylouen") > 0): | |
if (term_matches(judicat, "judicat", "pochybnost") > 0): | |
if (term_matches(judicat, "judicat", "zsad") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "zsad") <= 0): | |
return u'procesn - vylouen soudce, asistenta, apod.' | |
if (term_matches(judicat, "judicat", "pochybnost") <= 0): | |
if (term_matches(judicat, "judicat", "inn") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "inn") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "vylouen") <= 0): | |
if (term_matches(judicat, "judicat", "dkaz") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "dkaz") <= 0): | |
if (term_matches(judicat, "judicat", "okamiku") > 0): | |
if (term_matches(judicat, "judicat", "vklad") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "vklad") <= 0): | |
if (term_matches(judicat, "judicat", "innost") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "innost") <= 0): | |
return u'zastaveno' | |
if (term_matches(judicat, "judicat", "okamiku") <= 0): | |
if (term_matches(judicat, "judicat", "spojen") > 0): | |
if (term_matches(judicat, "judicat", "nadle") > 0): | |
if (term_matches(judicat, "judicat", "spolu") > 0): | |
return u'procesn - spojen vc' | |
if (term_matches(judicat, "judicat", "spolu") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "nadle") <= 0): | |
if (term_matches(judicat, "judicat", "zench") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "zench") <= 0): | |
if (term_matches(judicat, "judicat", "opravn") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "opravn") <= 0): | |
if (term_matches(judicat, "judicat", "nebude") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "nebude") <= 0): | |
if (term_matches(judicat, "judicat", "zjem") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "zjem") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "spojen") <= 0): | |
if (term_matches(judicat, "judicat", "jinm") > 0): | |
if (term_matches(judicat, "judicat", "nebude") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "nebude") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "jinm") <= 0): | |
if (term_matches(judicat, "judicat", "podnm") > 0): | |
if (term_matches(judicat, "judicat", "podna") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podna") <= 0): | |
if (term_matches(judicat, "judicat", "filipa") > 0): | |
return u'zastaveno' | |
if (term_matches(judicat, "judicat", "filipa") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podnm") <= 0): | |
if (term_matches(judicat, "judicat", "prostedek") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "prostedek") <= 0): | |
if (term_matches(judicat, "judicat", "podala") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podala") <= 0): | |
if (term_matches(judicat, "judicat", "argumentaci") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "argumentaci") <= 0): | |
if (term_matches(judicat, "judicat", "podna") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podna") <= 0): | |
if (term_matches(judicat, "judicat", "stavnprvn") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "stavnprvn") <= 0): | |
if (term_matches(judicat, "judicat", "podanou") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podanou") <= 0): | |
if (term_matches(judicat, "judicat", "podstavnho") > 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' | |
if (term_matches(judicat, "judicat", "podstavnho") <= 0): | |
return u'odmtnuto pro zjevnou neopodstatnnost' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment