Skip to content

Instantly share code, notes, and snippets.

@xorwen
Created October 22, 2017 13:56
Show Gist options
  • Save xorwen/0b09c730aceafb10503c8ee01622e6f3 to your computer and use it in GitHub Desktop.
Save xorwen/0b09c730aceafb10503c8ee01622e6f3 to your computer and use it in GitHub Desktop.
def predict_conclusions_num(judikat=None,
field2_month=None,
field2_day_of_month=None):
""" Predictor for conclusions from model/59ec8e4aaf447f0b19000092
Predictive model by BigML - Machine Learning Made Easy
"""
import re
tm_tokens = 'tokens_only'
tm_full_term = 'full_terms_only'
tm_all = 'all'
def term_matches(text, field_name, term):
""" Counts the number of occurences of term and its variants in text
"""
forms_list = term_forms[field_name].get(term, [term])
options = term_analysis[field_name]
token_mode = options.get('token_mode', tm_tokens)
case_sensitive = options.get('case_sensitive', False)
first_term = forms_list[0]
if token_mode == tm_full_term:
return full_term_match(text, first_term, case_sensitive)
else:
# In token_mode='all' we will match full terms using equals and
# tokens using contains
if token_mode == tm_all and len(forms_list) == 1:
pattern = re.compile(r'^.+\b.+$', re.U)
if re.match(pattern, first_term):
return full_term_match(text, first_term, case_sensitive)
return term_matches_tokens(text, forms_list, case_sensitive)
def full_term_match(text, full_term, case_sensitive):
"""Counts the match for full terms according to the case_sensitive
option
"""
if not case_sensitive:
text = text.lower()
full_term = full_term.lower()
return 1 if text == full_term else 0
def get_tokens_flags(case_sensitive):
"""Returns flags for regular expression matching depending on text
analysis options
"""
flags = re.U
if not case_sensitive:
flags = (re.I | flags)
return flags
def term_matches_tokens(text, forms_list, case_sensitive):
""" Counts the number of occurrences of the words in forms_list in
the text
"""
flags = get_tokens_flags(case_sensitive)
expression = ur'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join(forms_list)
pattern = re.compile(expression, flags=flags)
matches = re.findall(pattern, text)
return len(matches)
term_analysis = {
"judikat": {
"token_mode": 'all',
"case_sensitive": False,
},
}
term_forms = {
"judikat": {
},
}
if (judikat is None):
return 1.10446
if (term_matches(judikat, "judikat", "zsti") > 0):
if (term_matches(judikat, "judikat", "ppustnost") > 0):
if (term_matches(judikat, "judikat", "sti") > 0):
if (term_matches(judikat, "judikat", "120") > 0):
return 2.43478
if (term_matches(judikat, "judikat", "120") <= 0):
if (term_matches(judikat, "judikat", "odkzal") > 0):
if (term_matches(judikat, "judikat", "smujc") > 0):
return 1.89474
if (term_matches(judikat, "judikat", "smujc") <= 0):
if (term_matches(judikat, "judikat", "argument") > 0):
return 2
if (term_matches(judikat, "judikat", "argument") <= 0):
return 1.07143
if (term_matches(judikat, "judikat", "odkzal") <= 0):
if (term_matches(judikat, "judikat", "instanci") > 0):
if (term_matches(judikat, "judikat", "nrok") > 0):
return 1
if (term_matches(judikat, "judikat", "nrok") <= 0):
return 2
if (term_matches(judikat, "judikat", "instanci") <= 0):
if (term_matches(judikat, "judikat", "vyjdit") > 0):
if (term_matches(judikat, "judikat", "doruen") > 0):
return 3
if (term_matches(judikat, "judikat", "doruen") <= 0):
return 2.09091
if (term_matches(judikat, "judikat", "vyjdit") <= 0):
if (field2_day_of_month is None):
return 1.98058
if (field2_day_of_month > 1):
if (term_matches(judikat, "judikat", "vsledku") > 0):
if (term_matches(judikat, "judikat", "nmitka") > 0):
return 2
if (term_matches(judikat, "judikat", "nmitka") <= 0):
return 3
if (term_matches(judikat, "judikat", "vsledku") <= 0):
if (term_matches(judikat, "judikat", "pouit") > 0):
if (term_matches(judikat, "judikat", "dnho") > 0):
return 1
if (term_matches(judikat, "judikat", "dnho") <= 0):
return 2
if (term_matches(judikat, "judikat", "pouit") <= 0):
if (term_matches(judikat, "judikat", "moc") > 0):
return 2.5
if (term_matches(judikat, "judikat", "moc") <= 0):
if (term_matches(judikat, "judikat", "astnkm") > 0):
if (term_matches(judikat, "judikat", "postupoval") > 0):
return 2
if (term_matches(judikat, "judikat", "postupoval") <= 0):
return 1
if (term_matches(judikat, "judikat", "astnkm") <= 0):
return 2
if (field2_day_of_month <= 1):
return 1
if (term_matches(judikat, "judikat", "sti") <= 0):
if (term_matches(judikat, "judikat", "kon") > 0):
if (term_matches(judikat, "judikat", "zaloit") > 0):
return 3
if (term_matches(judikat, "judikat", "zaloit") <= 0):
return 2
if (term_matches(judikat, "judikat", "kon") <= 0):
if (term_matches(judikat, "judikat", "neppustnost") > 0):
if (term_matches(judikat, "judikat", "zpravodajem") > 0):
return 1
if (term_matches(judikat, "judikat", "zpravodajem") <= 0):
return 1.91667
if (term_matches(judikat, "judikat", "neppustnost") <= 0):
return 1.11765
if (term_matches(judikat, "judikat", "ppustnost") <= 0):
if (term_matches(judikat, "judikat", "neppustnost") > 0):
return 1.725
if (term_matches(judikat, "judikat", "neppustnost") <= 0):
if (term_matches(judikat, "judikat", "prostedku") > 0):
if (term_matches(judikat, "judikat", "doruen") > 0):
return 1.93333
if (term_matches(judikat, "judikat", "doruen") <= 0):
return 1.29412
if (term_matches(judikat, "judikat", "prostedku") <= 0):
if (term_matches(judikat, "judikat", "sldeka") > 0):
if (term_matches(judikat, "judikat", "nmu") > 0):
if (term_matches(judikat, "judikat", "nejsou") > 0):
if (term_matches(judikat, "judikat", "prostedek") > 0):
return 2
if (term_matches(judikat, "judikat", "prostedek") <= 0):
return 1
if (term_matches(judikat, "judikat", "nejsou") <= 0):
return 2.16667
if (term_matches(judikat, "judikat", "nmu") <= 0):
if (term_matches(judikat, "judikat", "takovm") > 0):
if (term_matches(judikat, "judikat", "svj") > 0):
return 2
if (term_matches(judikat, "judikat", "svj") <= 0):
return 1.16667
if (term_matches(judikat, "judikat", "takovm") <= 0):
return 1.08571
if (term_matches(judikat, "judikat", "sldeka") <= 0):
if (term_matches(judikat, "judikat", "nejvym") > 0):
if (term_matches(judikat, "judikat", "znovu") > 0):
return 1.85714
if (term_matches(judikat, "judikat", "znovu") <= 0):
return 1.07692
if (term_matches(judikat, "judikat", "nejvym") <= 0):
if (term_matches(judikat, "judikat", "prce") > 0):
if (term_matches(judikat, "judikat", "zamtnuta") > 0):
return 2
if (term_matches(judikat, "judikat", "zamtnuta") <= 0):
return 1.09091
if (term_matches(judikat, "judikat", "prce") <= 0):
if (term_matches(judikat, "judikat", "lht") > 0):
if (term_matches(judikat, "judikat", "napadenmu") > 0):
return 2
if (term_matches(judikat, "judikat", "napadenmu") <= 0):
if (term_matches(judikat, "judikat", "pop") > 0):
if (term_matches(judikat, "judikat", "navc") > 0):
return 1
if (term_matches(judikat, "judikat", "navc") <= 0):
return 2
if (term_matches(judikat, "judikat", "pop") <= 0):
return 1
if (term_matches(judikat, "judikat", "lht") <= 0):
if (term_matches(judikat, "judikat", "lhta") > 0):
return 1.5
if (term_matches(judikat, "judikat", "lhta") <= 0):
if (term_matches(judikat, "judikat", "napad") > 0):
return 1.125
if (term_matches(judikat, "judikat", "napad") <= 0):
return 1
if (term_matches(judikat, "judikat", "zsti") <= 0):
if (term_matches(judikat, "judikat", "sti") > 0):
if (term_matches(judikat, "judikat", "prostedek") > 0):
if (term_matches(judikat, "judikat", "ppustnost") > 0):
if (term_matches(judikat, "judikat", "soudcem") > 0):
return 1.25
if (term_matches(judikat, "judikat", "soudcem") <= 0):
if (term_matches(judikat, "judikat", "napad") > 0):
return 2.31579
if (term_matches(judikat, "judikat", "napad") <= 0):
if (term_matches(judikat, "judikat", "1964") > 0):
if (term_matches(judikat, "judikat", "nicmn") > 0):
return 2
if (term_matches(judikat, "judikat", "nicmn") <= 0):
return 1.09091
if (term_matches(judikat, "judikat", "1964") <= 0):
if (term_matches(judikat, "judikat", "strnce") > 0):
if (term_matches(judikat, "judikat", "odstrann") > 0):
return 1
if (term_matches(judikat, "judikat", "odstrann") <= 0):
return 1.75
if (term_matches(judikat, "judikat", "strnce") <= 0):
if (term_matches(judikat, "judikat", "soudnictv") > 0):
if (term_matches(judikat, "judikat", "nlezu") > 0):
return 2
if (term_matches(judikat, "judikat", "nlezu") <= 0):
return 1.16667
if (term_matches(judikat, "judikat", "soudnictv") <= 0):
return 2
if (term_matches(judikat, "judikat", "ppustnost") <= 0):
if (term_matches(judikat, "judikat", "odepen") > 0):
if (term_matches(judikat, "judikat", "osob") > 0):
if (term_matches(judikat, "judikat", "nikoliv") > 0):
return 1.85714
if (term_matches(judikat, "judikat", "nikoliv") <= 0):
return 3
if (term_matches(judikat, "judikat", "osob") <= 0):
if (term_matches(judikat, "judikat", "tomuto") > 0):
return 1
if (term_matches(judikat, "judikat", "tomuto") <= 0):
return 1.8
if (term_matches(judikat, "judikat", "odepen") <= 0):
if (term_matches(judikat, "judikat", "podan") > 0):
if (term_matches(judikat, "judikat", "eho") > 0):
return 1
if (term_matches(judikat, "judikat", "eho") <= 0):
if (term_matches(judikat, "judikat", "doruen") > 0):
if (term_matches(judikat, "judikat", "take") > 0):
return 2
if (term_matches(judikat, "judikat", "take") <= 0):
return 1.23077
if (term_matches(judikat, "judikat", "doruen") <= 0):
if (term_matches(judikat, "judikat", "dkaz") > 0):
return 1
if (term_matches(judikat, "judikat", "dkaz") <= 0):
return 1.91667
if (term_matches(judikat, "judikat", "podan") <= 0):
if (term_matches(judikat, "judikat", "nemohlo") > 0):
if (term_matches(judikat, "judikat", "relevantn") > 0):
return 1.08333
if (term_matches(judikat, "judikat", "relevantn") <= 0):
if (term_matches(judikat, "judikat", "porueno") > 0):
return 2
if (term_matches(judikat, "judikat", "porueno") <= 0):
return 1
if (term_matches(judikat, "judikat", "nemohlo") <= 0):
if (term_matches(judikat, "judikat", "napadenmu") > 0):
if (field2_month is None):
return 1.33333
if (field2_month > 9):
return 2
if (field2_month <= 9):
return 1
if (term_matches(judikat, "judikat", "napadenmu") <= 0):
return 1
if (term_matches(judikat, "judikat", "prostedek") <= 0):
if (term_matches(judikat, "judikat", "neppustnost") > 0):
if (term_matches(judikat, "judikat", "uven") > 0):
if (term_matches(judikat, "judikat", "elem") > 0):
if (term_matches(judikat, "judikat", "obecnm") > 0):
return 2
if (term_matches(judikat, "judikat", "obecnm") <= 0):
return 3
if (term_matches(judikat, "judikat", "elem") <= 0):
if (term_matches(judikat, "judikat", "mst") > 0):
return 1.8
if (term_matches(judikat, "judikat", "mst") <= 0):
return 1
if (term_matches(judikat, "judikat", "uven") <= 0):
if (term_matches(judikat, "judikat", "avak") > 0):
if (term_matches(judikat, "judikat", "advokta") > 0):
return 2.2
if (term_matches(judikat, "judikat", "advokta") <= 0):
if (term_matches(judikat, "judikat", "evropskho") > 0):
return 1.8
if (term_matches(judikat, "judikat", "evropskho") <= 0):
if (field2_month is None):
return 1.125
if (field2_month > 11):
if (term_matches(judikat, "judikat", "dajn") > 0):
return 1
if (term_matches(judikat, "judikat", "dajn") <= 0):
return 2
if (field2_month <= 11):
return 1.03704
if (term_matches(judikat, "judikat", "avak") <= 0):
if (term_matches(judikat, "judikat", "neopodstatnnou") > 0):
if (term_matches(judikat, "judikat", "vechna") > 0):
return 2
if (term_matches(judikat, "judikat", "vechna") <= 0):
return 1.0625
if (term_matches(judikat, "judikat", "neopodstatnnou") <= 0):
return 1
if (term_matches(judikat, "judikat", "neppustnost") <= 0):
if (term_matches(judikat, "judikat", "smujc") > 0):
if (term_matches(judikat, "judikat", "soudnmu") > 0):
if (term_matches(judikat, "judikat", "pravomocnm") > 0):
return 3
if (term_matches(judikat, "judikat", "pravomocnm") <= 0):
return 1.23529
if (term_matches(judikat, "judikat", "soudnmu") <= 0):
if (term_matches(judikat, "judikat", "smuje") > 0):
if (term_matches(judikat, "judikat", "zvil") > 0):
return 1.83333
if (term_matches(judikat, "judikat", "zvil") <= 0):
if (term_matches(judikat, "judikat", "odvodnil") > 0):
if (term_matches(judikat, "judikat", "zpravidla") > 0):
return 2
if (term_matches(judikat, "judikat", "zpravidla") <= 0):
return 1
if (term_matches(judikat, "judikat", "odvodnil") <= 0):
return 1
if (term_matches(judikat, "judikat", "smuje") <= 0):
if (term_matches(judikat, "judikat", "zpravodajem") > 0):
return 2
if (term_matches(judikat, "judikat", "zpravodajem") <= 0):
if (term_matches(judikat, "judikat", "nedostaten") > 0):
return 1.16
if (term_matches(judikat, "judikat", "nedostaten") <= 0):
return 1
if (term_matches(judikat, "judikat", "smujc") <= 0):
if (term_matches(judikat, "judikat", "zkonnch") > 0):
if (term_matches(judikat, "judikat", "navrhuje") > 0):
if (term_matches(judikat, "judikat", "nemohl") > 0):
return 1
if (term_matches(judikat, "judikat", "nemohl") <= 0):
return 2
if (term_matches(judikat, "judikat", "navrhuje") <= 0):
if (term_matches(judikat, "judikat", "nezbytn") > 0):
if (term_matches(judikat, "judikat", "hodnotit") > 0):
return 2
if (term_matches(judikat, "judikat", "hodnotit") <= 0):
if (term_matches(judikat, "judikat", "roviny") > 0):
return 2
if (term_matches(judikat, "judikat", "roviny") <= 0):
return 1.11765
if (term_matches(judikat, "judikat", "nezbytn") <= 0):
if (field2_month is None):
return 1.02128
if (field2_month > 11):
return 1.33333
if (field2_month <= 11):
if (term_matches(judikat, "judikat", "lichovnk") > 0):
return 1.16667
if (term_matches(judikat, "judikat", "lichovnk") <= 0):
return 1
if (term_matches(judikat, "judikat", "zkonnch") <= 0):
if (term_matches(judikat, "judikat", "kadho") > 0):
if (term_matches(judikat, "judikat", "navrhl") > 0):
if (term_matches(judikat, "judikat", "nleitosti") > 0):
return 2
if (term_matches(judikat, "judikat", "nleitosti") <= 0):
if (term_matches(judikat, "judikat", "podat") > 0):
return 2
if (term_matches(judikat, "judikat", "podat") <= 0):
return 1
if (term_matches(judikat, "judikat", "navrhl") <= 0):
return 1
if (term_matches(judikat, "judikat", "kadho") <= 0):
if (term_matches(judikat, "judikat", "dispozici") > 0):
if (term_matches(judikat, "judikat", "rozhodnut1") > 0):
if (term_matches(judikat, "judikat", "uvedenmu") > 0):
return 2
if (term_matches(judikat, "judikat", "uvedenmu") <= 0):
return 1.08333
if (term_matches(judikat, "judikat", "rozhodnut1") <= 0):
return 1.02174
if (term_matches(judikat, "judikat", "dispozici") <= 0):
if (term_matches(judikat, "judikat", "vznice") > 0):
return 1.04651
if (term_matches(judikat, "judikat", "vznice") <= 0):
return 1
if (term_matches(judikat, "judikat", "sti") <= 0):
if (term_matches(judikat, "judikat", "prostedek") > 0):
if (term_matches(judikat, "judikat", "neodstrann") > 0):
if (term_matches(judikat, "judikat", "nleitosti") > 0):
return 1
if (term_matches(judikat, "judikat", "nleitosti") <= 0):
return 2.33333
if (term_matches(judikat, "judikat", "neodstrann") <= 0):
if (term_matches(judikat, "judikat", "pmo") > 0):
if (term_matches(judikat, "judikat", "nemohlo") > 0):
if (term_matches(judikat, "judikat", "nikoliv") > 0):
return 1.1
if (term_matches(judikat, "judikat", "nikoliv") <= 0):
return 2.25
if (term_matches(judikat, "judikat", "nemohlo") <= 0):
if (term_matches(judikat, "judikat", "nejvymu") > 0):
if (term_matches(judikat, "judikat", "jakkoliv") > 0):
return 1
if (term_matches(judikat, "judikat", "jakkoliv") <= 0):
return 1.8
if (term_matches(judikat, "judikat", "nejvymu") <= 0):
if (term_matches(judikat, "judikat", "pedpokladu") > 0):
if (term_matches(judikat, "judikat", "prostedku") > 0):
return 1
if (term_matches(judikat, "judikat", "prostedku") <= 0):
return 2
if (term_matches(judikat, "judikat", "pedpokladu") <= 0):
if (term_matches(judikat, "judikat", "spatuje") > 0):
return 1.15385
if (term_matches(judikat, "judikat", "spatuje") <= 0):
return 1
if (term_matches(judikat, "judikat", "pmo") <= 0):
if (term_matches(judikat, "judikat", "anebo") > 0):
return 1.14545
if (term_matches(judikat, "judikat", "anebo") <= 0):
if (term_matches(judikat, "judikat", "odstrann") > 0):
if (term_matches(judikat, "judikat", "provedenm") > 0):
return 3
if (term_matches(judikat, "judikat", "provedenm") <= 0):
if (term_matches(judikat, "judikat", "majetku") > 0):
return 2
if (term_matches(judikat, "judikat", "majetku") <= 0):
if (term_matches(judikat, "judikat", "zakotven") > 0):
return 2
if (term_matches(judikat, "judikat", "zakotven") <= 0):
if (term_matches(judikat, "judikat", "vsledku") > 0):
if (term_matches(judikat, "judikat", "podat") > 0):
return 2
if (term_matches(judikat, "judikat", "podat") <= 0):
return 1
if (term_matches(judikat, "judikat", "vsledku") <= 0):
return 1
if (term_matches(judikat, "judikat", "odstrann") <= 0):
if (term_matches(judikat, "judikat", "pravomocnmu") > 0):
if (term_matches(judikat, "judikat", "smujc") > 0):
if (term_matches(judikat, "judikat", "prostedku") > 0):
return 1
if (term_matches(judikat, "judikat", "prostedku") <= 0):
return 2
if (term_matches(judikat, "judikat", "smujc") <= 0):
return 1
if (term_matches(judikat, "judikat", "pravomocnmu") <= 0):
if (term_matches(judikat, "judikat", "ustanovenm") > 0):
return 1.03922
if (term_matches(judikat, "judikat", "ustanovenm") <= 0):
return 1
if (term_matches(judikat, "judikat", "prostedek") <= 0):
if (term_matches(judikat, "judikat", "neppustnost") > 0):
if (term_matches(judikat, "judikat", "vad") > 0):
return 1.28571
if (term_matches(judikat, "judikat", "vad") <= 0):
if (term_matches(judikat, "judikat", "osobn") > 0):
if (term_matches(judikat, "judikat", "rozhodoval") > 0):
return 2
if (term_matches(judikat, "judikat", "rozhodoval") <= 0):
return 1.0625
if (term_matches(judikat, "judikat", "osobn") <= 0):
if (term_matches(judikat, "judikat", "pochybnost") > 0):
return 1.125
if (term_matches(judikat, "judikat", "pochybnost") <= 0):
return 1.00422
if (term_matches(judikat, "judikat", "neppustnost") <= 0):
if (term_matches(judikat, "judikat", "nebude") > 0):
if (term_matches(judikat, "judikat", "nadto") > 0):
if (term_matches(judikat, "judikat", "vkon") > 0):
if (term_matches(judikat, "judikat", "jednnm") > 0):
return 1
if (term_matches(judikat, "judikat", "jednnm") <= 0):
return 2
if (term_matches(judikat, "judikat", "vkon") <= 0):
return 1
if (term_matches(judikat, "judikat", "nadto") <= 0):
if (term_matches(judikat, "judikat", "lichovnk") > 0):
return 1.125
if (term_matches(judikat, "judikat", "lichovnk") <= 0):
return 1
if (term_matches(judikat, "judikat", "nebude") <= 0):
if (term_matches(judikat, "judikat", "prvm") > 0):
if (term_matches(judikat, "judikat", "uhl") > 0):
if (term_matches(judikat, "judikat", "nvrhem") > 0):
return 2
if (term_matches(judikat, "judikat", "nvrhem") <= 0):
if (term_matches(judikat, "judikat", "pipomenout") > 0):
return 2
if (term_matches(judikat, "judikat", "pipomenout") <= 0):
return 1.03571
if (term_matches(judikat, "judikat", "uhl") <= 0):
return 1
if (term_matches(judikat, "judikat", "prvm") <= 0):
if (term_matches(judikat, "judikat", "rozhodoval") > 0):
if (field2_month is None):
return 1.02907
if (field2_month > 10):
if (term_matches(judikat, "judikat", "tet") > 0):
return 2
if (term_matches(judikat, "judikat", "tet") <= 0):
return 1.05
if (field2_month <= 10):
if (term_matches(judikat, "judikat", "okamiku") > 0):
return 1.2
if (term_matches(judikat, "judikat", "okamiku") <= 0):
return 1
if (term_matches(judikat, "judikat", "rozhodoval") <= 0):
return 1.00445
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment