Created
March 19, 2011 01:25
-
-
Save timmc/877123 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns org.timmc.mipsiss.instructions | |
"Abstract MIPS instruction representation.") | |
;; Text formats: | |
;; - :s is RS e.g. $r9 | |
;; - :t is RT e.g. $r9 | |
;; - :d is RD e.g. $r9 | |
;; - :C is Imm e.g. 8 | |
;; - :o is offset and base (RS) e.g. -4($r9) | |
(defrecord ^{:doc "A full instruction instance will have some additional arg | |
keys such as :rs, :rt, :rd (ints) and :Imm (long)"} | |
Instr | |
[^{:doc "Instruction name as keyword"} | |
name | |
^{:doc "Format as :r, :i, or :j (machine language)"} | |
mform | |
^{:doc "Format as :dst, :tsC, :to, :stC, :C, or :s (text format)"} | |
tform | |
]) | |
(def all-instr | |
[ | |
;; arithmetic | |
(Instr. :add :r :dst) | |
(Instr. :sub :r :dst) | |
(Instr. :and :r :dst) | |
(Instr. :or :r :dst) | |
(Instr. :nor :r :dst) | |
(Instr. :slt :r :dst) | |
;; arithmetic -- immediate | |
(Instr. :addi :i :tsC) | |
(Instr. :andi :i :tsC) | |
(Instr. :ori :i :tsC) | |
(Instr. :slti :i :tsC) | |
;; branches and jumps | |
(Instr. :beq :r :stC) | |
(Instr. :bne :r :stC) | |
(Instr. :jr :r :s) | |
(Instr. :j :j :C) | |
;; memory | |
(Instr. :lw :i :to) | |
(Instr. :sw :i :to) | |
]) | |
(def ^{:doc "Map of instruction names (as keywords) to their info records." | |
:private true} | |
by-name | |
(memoize | |
(fn [] | |
(into {} (map #(vector (.name ^Instr %) %) all-instr))))) | |
(defn lookup | |
"Look up an Instr record by instruction name keyword." | |
[name-kw] | |
((by-name) name-kw)) | |
;;; For now, an instruction will be represented as a map of: | |
;;; :name - keyword like :add, :j, :lw | |
;;; :rs - int or nil | |
;;; :rt - int or nil | |
;;; :rd - int or nil | |
;;; :Imm - int or nil | |
;;; :name is sufficient to infer opcode, funct, and format, if needed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns org.timmc.mipsiss.parser | |
"Parse an input stream to MIPS instructions using regular expressions." | |
(:require [clojure.string :as str]) | |
(:require [org.timmc.mipsiss.instructions :as i]) | |
(:import [org.timmc.mipsiss.instructions Instr]) | |
(:import [java.util.regex Pattern])) | |
;;;; Regex components | |
(def ^Pattern instr-name #"\s*([a-z]+)") | |
;;;; Parsing | |
(def re-reg #"^\$r([0-9]+)$") | |
(defn parse-reg | |
"Parse \"$r5\" into {name-key 5}, throwing exception if outside 0..31" | |
[^String str, name-key] | |
(if-let [[_ numstr] (re-matches re-reg str)] | |
(let [num (Integer/parseInt numstr 10)] | |
(when-not (<= 0 num 31) | |
(throw (Exception. (str "Register index out of bounds: " num)))) | |
{name-key num}) | |
(throw (Exception. (str "Cannot parse as " name-key " register: " str))))) | |
(def re-hex-const #"^0x([0-9a-fA-F]+)$") | |
(defn parse-imm | |
"Parse \"0x5\" into {:Imm 5}. Also handles signed decimal." | |
[^String str] | |
{:Imm (if-let [[_ hex] (re-matches re-hex-const str)] | |
(Long/parseLong hex 16) | |
(Integer/parseInt str 10))}) | |
(def re-offset #"^(-?[0-9]+)\((.+)\)$") | |
(defn parse-offset | |
"Parse \"-4($r19)\" into {:Imm -4 :rs 19}." | |
[^String str] | |
(if-let [[_ istr rstr] (re-matches re-offset str)] | |
(merge {:Imm (Integer/parseInt istr 10)} | |
(parse-reg rstr :rs)) | |
(throw (Exception. (str "Expected base-offset argument."))))) | |
(defmulti ^{:doc "Parse the remaining args into a map."} | |
parse-args (fn [^Instr i & _] (.tform i))) | |
(defmethod parse-args :dst | |
[_ d s t] | |
(merge (parse-reg d :rd) | |
(parse-reg s :rs) | |
(parse-reg t :rt))) | |
(defmethod parse-args :tsC | |
[_ t s C] | |
(merge (parse-reg t :rt) | |
(parse-reg s :rs) | |
(parse-imm C))) | |
(defmethod parse-args :stC | |
[_ s t C] | |
(merge (parse-reg s :rs) | |
(parse-reg t :rt) | |
(parse-imm C))) | |
(defmethod parse-args :to | |
[_ t o] | |
(merge (parse-reg t :rt) | |
(parse-offset o))) | |
(defmethod parse-args :C | |
[_ C] | |
(parse-imm C)) | |
(defmethod parse-args :s | |
[_ s] | |
(parse-reg s :rs)) | |
(defn strip-line | |
"Strip a line of comments and trim the leading and trailing whitespace." | |
[line] | |
(str/replace line #"^\s+|\s*;.*\n?" "")) | |
(defn parse-line | |
[line] | |
(let [cleaned (strip-line line)] | |
(when (seq cleaned) | |
(let [[name+ & more-args] (map str/trim (str/split cleaned #",")) | |
[name arg0] (map str/trim (str/split name+ #"\s+" 2)) | |
name (keyword name) | |
args (cons arg0 more-args) | |
instr (i/lookup name)] | |
(merge instr (apply parse-args instr args)))))) | |
(defn parse | |
"Parse a seq of lines into a (possibly smaller) seq of Instr records." | |
[lines] | |
(filter (complement nil?) (map parse-line lines))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment