Skip to content

Instantly share code, notes, and snippets.

@visibletrap
Last active October 20, 2019 08:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save visibletrap/80e5a3c154b14ca246ddb954b18c31bc to your computer and use it in GitHub Desktop.
Save visibletrap/80e5a3c154b14ca246ddb954b18c31bc to your computer and use it in GitHub Desktop.
(ns correct-tags.use-parser
(:require [instaparse.core :as insta]))
; Additional dependencies
; [instaparse "1.4.10"]]
; https://github.com/Engelberg/instaparse
(def annot-parser
(insta/parser
"LINE = ('|' GROUP)+ '|'
GROUP = WORD TAG?
WORD = #'([ก-๙]|\\.)*' | #'\\s*'
TAG = '(' TYPE ('_' IOB)* ')'
TYPE = #'[a-z]*'
IOB = 'start' | 'end' "))
(defn add-cont
[parsed-tree]
(reduce (fn [{:keys [status type] :as state} node]
(case status
:none
(if (vector? node)
(if (= (get-in node [2 4 1]) "start")
(-> state
(update :out conj node)
(assoc :status :start)
(assoc :type (get-in node [2 2 1])))
(update state :out conj node))
(update state :out conj node))
:start
(if (vector? node)
(if (= (get-in node [2 4 1]) "end")
(-> state
(update :out conj node)
(assoc :status :none)
(assoc :type nil))
(update state :out conj (assoc node 2 [:TAG "(" [:TYPE type] "_" [:IOB "cont"] ")"])))
(update state :out conj node))))
{:out []
:status :none
:type nil}
parsed-tree))
(defn unparse
[tree]
(if (vector? tree)
(apply str (map unparse (next tree)))
tree))
(comment
(-> "|อุทยาน|แห่ง(org_start)|ชาติ|ลำน้ำ|น่าน(org_end)| |จ.|อุตรดิตถ์(loc)| |จัด|\n|บริเวณ|กอง(loc_start)|หิน(loc)|แฟนตาซี(loc_end)| |ชั่วคราว|"
annot-parser
add-cont
:out
unparse))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment