Created
May 5, 2012 18:26
-
-
Save mishadoff/2604580 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns cups.darkus) | |
;; data | |
(def sentences (seq (.split (slurp "res/FPC_May2012_tsk.txt") "\r\n"))) | |
(def priority {:add 0.0001 :sub 0.0 :mul 0.0002 :div 0.0003}) | |
(def features {"поровну" {:add 0 :sub 0 :mul 0 :div 1} | |
"всего\\?" {:add 1 :sub 0 :mul 1 :div 0} | |
" (П|п)о \\d+" {:add 0 :sub 0 :mul 0.9 :div 0.9} | |
"перегорел*" {:add 0 :sub 1 :mul 0 :div 0} | |
"завял*" {:add 0 :sub 1 :mul 0 :div 0} | |
"одинаков*" {:add 0 :sub 0 :mul 0 :div 1} | |
"осталось\\?" {:add 0 :sub 1 :mul 0 :div 0} | |
"долж(ен|на|ны)" {:add 0 :sub 1 :mul 0 :div 0} | |
"лишни(х|й)" {:add 0 :sub 1 :mul 0 :div 0} | |
"первоначально" {:add 0 :sub 1 :mul 0 :div 0} | |
"на \\d+ больше" {:add 1 :sub 0 :mul 0 :div 0} | |
"на \\d+ меньше" {:add 0 :sub 1 :mul 0 :div 0} | |
"по \\d+ .* в течении" {:add 0 :sub 0 :mul 1 :div 0} | |
"всего \\d+ .* в течении" {:add 0 :sub 0 :mul 0 :div 1} | |
"\\d+ .*, из которых \\d+" {:add 0 :sub 0.2 :mul 0 :div 0} | |
"было \\d+" {:add 0 :sub 0.5 :mul 0 :div 0} | |
"назад" {:add 0 :sub 0.5 :mul 0 :div 0} | |
"\\d+ .*, а .* \\d+" {:add 0.2 :sub 0 :mul 0 :div 0} | |
"\\d+ .*и \\d+" {:add 0.2 :sub 0 :mul 0 :div 0} | |
"кажд.{1,3} из которых" {:add 0 :sub 0 :mul 0.5 :div 0} | |
"еще \\d+" {:add 0.4 :sub 0 :mul 0 :div 0} | |
"стало \\d+" {:add 0 :sub 0.5 :mul 0 :div 0} | |
"через \\d+" {:add 0.2 :sub 0 :mul 0 :div 0} | |
"\\d+ .* (И|и)з них \\d+" {:add 0 :sub 0.3 :mul 0 :div 0} | |
" обеих " {:add 0 :sub 0.2 :mul 0 :div 0} | |
}) | |
(defn extract-numbers [sentence] | |
(map read-string (re-seq #"\d+" sentence))) | |
(defn sentence-contains? [sentence feat] | |
(= 1 (count (re-seq (re-pattern feat) sentence)))) | |
(defn sentence-features-value [sentence] | |
(loop [[f & fs] (keys features) res-map priority] | |
(if f | |
(if (sentence-contains? sentence f) | |
(recur fs (merge-with + res-map (get features f))) | |
(recur fs res-map)) | |
res-map))) | |
(defn apply-fn [key d1 d2] | |
(cond (= key :add) (+ d1 d2) | |
(= key :mul) (* d1 d2) | |
(= key :sub) (let [mx (max d1 d2) | |
mn (min d1 d2)] | |
(- mx mn)) | |
(= key :div) (let [mx (max d1 d2) | |
mn (min d1 d2)] | |
(/ mx mn)))) | |
(defn sentence-value [sentence] | |
(let [ks (map first (reverse (sort-by val (sentence-features-value sentence)))) | |
[d1 d2] (extract-numbers sentence)] | |
(if (and (= :div (first ks)) (not (or (= 0 (rem d1 d2)) (= 0 (rem d2 d1))))) | |
(apply-fn (second ks) d1 d2) | |
(apply-fn (first ks) d1 d2)))) | |
;; Elapsed time: 20113.506963 msecs | |
(defn output [file sentences] | |
(spit file (apply str (interpose "\n" (map sentence-value sentences))))) | |
;; dev part | |
(def GOLD (seq (.split (slurp "res/FPC_gold.txt") "\n"))) | |
(defn test-gold [gold] | |
(let [split (map #(seq (.split % "\\?")) gold) | |
total (count split) | |
sents (map #(str (first %) "?") split) | |
vals (map #(read-string (second %)) split)] | |
(loop [[s & ss] sents [v & vs] vals cor 0] | |
(if s | |
(if (= v (sentence-value s)) | |
(recur ss vs (inc cor)) | |
(recur ss vs cor)) | |
cor)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment