Skip to content

Instantly share code, notes, and snippets.

@shigemk2
Created November 3, 2014 08:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shigemk2/becf53574ec521e433d8 to your computer and use it in GitHub Desktop.
Save shigemk2/becf53574ec521e433d8 to your computer and use it in GitHub Desktop.
module Main where
import Test.HUnit
import System.IO
import Data.Char
import Data.Bits
binToInt '0' = 0
binToInt '1' = 1
binStrToInt bin = f (reverse bin)
where
f "" = 0
f (x:xs) = (binToInt x) + 2 * (f xs)
intToBin 0 = '0'
intToBin 1 = '1'
bin x
| x1 == 0 = x2
| otherwise = bin x1 ++ x2
where
x1 = x `div` 2
x2 = [intToBin $ x `mod` 2]
hexStrToInt hex = f (reverse hex)
where
f "" = 0
f (x:xs) = (digitToInt x) + 16 * (f xs)
-- メモリをシミュレートするためバイト区切りのリスト
hex x
| x1 == 0 = x2
| otherwise = hex x1 ++ x2
where
x1 = x `div` 16
x2 = [intToDigit (x `mod` 16)]
hexn n x
| r < 0 = drop (-r) x'
| otherwise = replicate r '0' ++ x'
where
x' = hex x
r = n - length x'
hexStrToList "" = []
hexStrToList (h:l:xs) = hexStrToInt [h, l] : hexStrToList xs
listToHexStr [] = ""
listToHexStr (x:xs) = hexn 2 x ++ listToHexStr xs
-- バイトオーダー
-- メモリはバイトごとに区切られています。1バイトに収まりきらない数値は分割して格納します
-- 分割した際に逆順に並べ替える方式をリトルエンディアンと呼びます。並べ替えない方式はビッグエンディアン
-- 8086以降はリトルエンディアン
-- バイトごとに手で区切る
-- バイトオーダーを考えるのは分割後です。分割前のバイトオーダーは考えません
-- 数値→リトルエンディアン
toLE 0 _ = []
toLE n x = x `mod` 0x100 : toLE (n - 1) (x `div` 0x100)
-- リトルエンディアン→数値
fromLE 0 _ = 0
fromLE n (x:xs) = x + 0x100 * fromLE (n - 1) xs
-- 数値→ビッグエンディアン
-- 計算がリトルエンディアンより計算が難しい
toBE 0 _ = []
toBE n x = x `div` (0x100 ^ (n - 1)) `mod` 0x100 : toBE (n - 1) x
-- ビッグエンディアン→数値
fromBE 0 _ = 0
fromBE n (x:xs) = x * 0x100^(n - 1) + fromBE (n - 1) xs
testHex = TestList
[ "reverse" ~: reverse "11001" ~?= "10011"
, "binStrToInt 5" ~: binStrToInt "101" ~?= 5
, "binStrToInt 25" ~: binStrToInt "11001" ~?= 25
, "binStrToInt 31" ~: binStrToInt "11111" ~?= 31
, "binStrToInt 350" ~: binStrToInt "110010" ~?= 50
, "div 1" ~: 1 `div` 2 ~?= 0
, "bin 0" ~: bin 0 ~?= "0"
, "bin 1" ~: bin 1 ~?= "1"
, "bin 5" ~: bin 5 ~?= "101"
, "bin 25" ~: bin 25 ~?= "11001"
, "bin 31" ~: bin 31 ~?= "11111"
, "bin 50" ~: bin 50 ~?= "110010"
, "digitToInt" ~: digitToInt 'a' ~?= 10
, "hexStrToInt 1" ~: hexStrToInt "100" ~?= 256
, "hexStrToInt 2" ~: hexStrToInt "ffff" ~?= 65535
, "replicate" ~: replicate 5 'a' ~?= "aaaaa"
, "intToDigit" ~: intToDigit 10 ~?= 'a'
, "hex 1" ~: hex 256 ~?= "100"
, "hex 2" ~: hex 65535 ~?= "ffff"
, "hexn 1" ~: hexn 2 1 ~?= "01"
, "hexn 2" ~: hexn 2 255 ~?= "ff"
, "hexn 3" ~: hexn 8 65535 ~?= "0000ffff"
, "hexn 4" ~: hexn 2 256 ~?= "00"
, "hexStrToList 1" ~: hexStrToList "123456" ~?= [0x12, 0x34, 0x56]
, "hexStrToList 2" ~: hexStrToList "010203" ~?= [1, 2, 3]
, "listToHexStr 1" ~: listToHexStr [0x12, 0x34, 0x56] ~?= "123456"
, "listToHexStr 2" ~: listToHexStr [1, 2, 3] ~?= "010203"
, "toLE 1" ~: toLE 2 1 ~?= [1, 0]
-- 桁数制限ではみ出した奴は消える
, "toLE 2" ~: toLE 2 0x10000 ~?= [0, 0]
, "toLE 3" ~: toLE 4 0x12345678 ~?= [0x78, 0x56, 0x34, 0x12]
, "fromLE 1" ~: fromLE 2 [0, 1] ~?= 0x100
, "fromLE 2" ~: fromLE 2 [0x78, 0x56, 0x34, 0x12] ~?= 0x5678
, "fromLE 3" ~: fromLE 4 [0x78, 0x56, 0x34, 0x12] ~?= 0x12345678
, "toBE 1" ~: toBE 2 1 ~?= [0, 1]
, "toBE 2" ~: toBE 2 0x10000 ~?= [0, 0]
, "toBE 3" ~: toBE 4 0x12345678 ~?= [0x12, 0x34, 0x56, 0x78]
, "fromBE 1" ~: fromBE 2 [0, 1] ~?= 0x1
, "fromBE 2" ~: fromBE 2 [0x78, 0x56, 0x34, 0x12] ~?= 0x7856
, "fromBE 3" ~: fromBE 4 [0x78, 0x56, 0x34, 0x12] ~?= 0x78563412
]
-- 逆アセンブル結果
-- アセンブリをみてマシン語をみて、なんとなくわかればいいんじゃなかろうか
-- B83412 mov ax,0x1234
-- B83412を機械語、movをニーモニック、axや0x1234をオペランドと呼びます。
-- 機械語はマシン語ともいう
-- 命令の分量はi7の1/5
-- disasm (x:xs)
-- | x == 0xb8 =
-- "mov ax,0x" ++ hex (fromLE 2 xs)
-- とりあえずmov命令あたりから初めて行って、オペコードの場合をすべて網羅していく
-- すべての命令を網羅するのが10とすると、今日の入門編でやるのは2
-- disasm (x:xs)
-- | 0xb0 <= x && x <= 0xb7 =
-- "mov " ++ reg8 !! (x - 0xb0) ++ ",0x" ++ hex (xs !! 0)
-- | 0xb8 <= x && x <= 0xbf =
-- "mov " ++ reg16 !! (x - 0xb8) ++ ",0x" ++ hex (fromLE 2 xs)
regs = [reg8, reg16]
-- Haskellは2進数を直接かけない
-- 即値(ハードコードされた数値)
-- disasm (x:xs)
-- -- DATA TRANSFER
-- -- MOV = Move:
-- -- Immediate to Register [1011wreg][data][data if w=1]
-- | 0xb0 <= x && x <= 0xbf =
-- "mov " ++ reg ++ "," ++ imm
-- where
-- w = (x `shiftR` 3) .&. 1
-- reg = regs !! w !! (x .&. 7)
-- imm = "0x" ++ hex (fromLE (w + 1) xs)
-- べんりかんすう
disasm' hex = disasm $ hexStrToList hex
disasm (x:xs) = disasmB (getBits x) xs
-- DATA TRANSFER
-- MOV = Move:
-- Register/Memory to/from Register [100010dw][mod reg r/m]
disasmB (1,0,0,0,1,0,d,w) xs
| d == 0 = "mov " ++ rm ++ "," ++ reg
| otherwise = "mov " ++ reg ++ "," ++ rm
where
(rm, r) = modrm xs
reg = regs !! w !! r
-- Immediate to Register [1011wreg][data][data if w=1]
disasmB (1,0,1,1,w,r,e,g) xs =
"mov " ++ reg ++ "," ++ imm
where
reg = regs !! w !! getReg r e g
imm = "0x" ++ hex (fromLE (w + 1) xs)
-- dがオペランドの順番、wがレジスタサイズを表します。modとr/mで1つのオペランドを表します。(以後ModR/M)
regad = ["bx+si", "bx+di", "bp+si", "bp+di", "si", "di", "bp", "bx"]
modrm (x:xs) = (f mode rm, reg)
where
mode = x `shiftR` 6
reg = (x `shiftR` 3) .&. 7
rm = x .&. 7
f 0 6 = "[0x" ++ hex (fromLE 2 xs) ++ "]"
f 0 rm = "[" ++ regad !! rm ++ "]"
f 1 rm = "[" ++ regad !! rm ++ disp ++ "]"
where
disp = disp8 (xs !! 0)
-- レジスタ=固定の変数のようなもの
reg16 = ["ax", "cx", "dx", "bx", "sp", "bp", "si", "di"]
-- 機械語のなかの一番重要な部分がオペコード
reg8 = ["al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"]
-- >>> bin(0xb0)
-- '0b10110000'
-- >>> bin(0xb8)
-- '0b10111000'
-- >>> bin(0xb9)
-- '0b10111001'
-- >>> bin(0xbf)
-- '0b10111111'
-- w=1がword(2バイト)命令 w=0がbyte(1バイト)命令
-- wの値によって、入れられるdata部分のサイズが8ビットか16ビットか分かるようになっている
-- 割り算は遅い処理なので、ビット演算でやる
-- ここまでの実装では16進数に変換して範囲チェックしていました。2進数のままパターンマッチできるように修正
-- byteは1バイトだけど、wordはCPUごとにバイト数が違う。8086だとwordは2バイトだが、ARMのwordは4バイト(wordは普遍じゃない)
-- 1バイトを8ビットに分解する関数(可変なのでtuppleで実装する)
-- 大抵の場合は型注釈を書かなくても型推論してくれるのだが、Haskellは本来型注釈を書くのが基本
getBits :: Int -> (Int,Int,Int,Int,Int,Int,Int,Int)
getBits x = (b 7, b 6, b 5, b 4, b 3, b 2, b 1, b 0)
where
b n = (x `shiftR` n) .&. 1
-- ビットごとに違うものをガチャっとくっつけるのはorでいける
-- 名前 :: 引数の型 -> 引数の型 -> 引数の型 -> 戻り値の型
getReg :: Int -> Int -> Int -> Int
getReg r e g =
(r `shiftL` 2) .|. (e `shiftL` 1) .|. g
disp8 x
| x < 0x80 = "+0x" ++ hex x
| otherwise = "-0x" ++ hex (0x100 - x)
testDisAsm = TestList
[ "b8 1" ~: disasm [0xb8, 0, 0] ~?= "mov ax,0x0"
, "b8 2" ~: disasm [0xb8, 0x34, 0x12] ~?= "mov ax,0x1234"
, "b8 2" ~: disasm [0xb8, 0x78, 0x56] ~?= "mov ax,0x5678"
, "b8 3" ~: disasm' "b80000" ~?= "mov ax,0x0"
, "b8 4" ~: disasm' "b83412" ~?= "mov ax,0x1234"
, "b8-bf 0" ~: disasm' "b80100" ~?= "mov ax,0x1"
, "b8-bf 1" ~: disasm' "b90100" ~?= "mov cx,0x1"
, "b8-bf 2" ~: disasm' "ba1000" ~?= "mov dx,0x10"
, "b8-bf 3" ~: disasm' "bb0001" ~?= "mov bx,0x100"
, "b8-bf 4" ~: disasm' "bc0010" ~?= "mov sp,0x1000"
, "b8-bf 5" ~: disasm' "bdff00" ~?= "mov bp,0xff"
, "b8-bf 6" ~: disasm' "be00ff" ~?= "mov si,0xff00"
, "b8-bf 7" ~: disasm' "bffeca" ~?= "mov di,0xcafe"
, "b0-b7 1" ~: disasm' "b000" ~?= "mov al,0x0"
, "b0-b7 2" ~: disasm' "b101" ~?= "mov cl,0x1"
, "b0-b7 3" ~: disasm' "b210" ~?= "mov dl,0x10"
, "b0-b7 4" ~: disasm' "b311" ~?= "mov bl,0x11"
, "b0-b7 5" ~: disasm' "b412" ~?= "mov ah,0x12"
, "b0-b7 6" ~: disasm' "b5ff" ~?= "mov ch,0xff"
, "b0-b7 7" ~: disasm' "b6ee" ~?= "mov dh,0xee"
, "b0-b7 8" ~: disasm' "b7ca" ~?= "mov bh,0xca"
, "getBits" ~: getBits 0xbd ~?= (1,0,1,1,1,1,0,1)
, "getReg" ~: getReg 1 0 1 ~?= 5
, "88-8b mod=00,r/m=000 1" ~: disasm' "8800" ~?= "mov [bx+si],al"
, "88-8b mod=00,r/m=000 2" ~: disasm' "8900" ~?= "mov [bx+si],ax"
, "88-8b mod=00,r/m=000 3" ~: disasm' "8A00" ~?= "mov al,[bx+si]"
, "88-8b mod=00,r/m=000 4" ~: disasm' "8B00" ~?= "mov ax,[bx+si]"
, "88-8b mod=00,r/m=110 1" ~: disasm' "88063412" ~?= "mov [0x1234],al"
, "88-8b mod=00,r/m=110 2" ~: disasm' "89063412" ~?= "mov [0x1234],ax"
, "88-8b mod=00,r/m=110 3" ~: disasm' "8A063412" ~?= "mov al,[0x1234]"
, "88-8b mod=00,r/m=110 4" ~: disasm' "8B063412" ~?= "mov ax,[0x1234]"
, "88-8b mod=00 1" ~: disasm' "8900" ~?= "mov [bx+si],ax"
, "88-8b mod=00 2" ~: disasm' "8909" ~?= "mov [bx+di],cx"
, "88-8b mod=00 3" ~: disasm' "8912" ~?= "mov [bp+si],dx"
, "88-8b mod=00 4" ~: disasm' "891b" ~?= "mov [bp+di],bx"
, "88-8b mod=00 5" ~: disasm' "8924" ~?= "mov [si],sp"
, "88-8b mod=00 6" ~: disasm' "892d" ~?= "mov [di],bp"
, "88-8b mod=00 7" ~: disasm' "893f" ~?= "mov [bx],di"
, "disp8 1" ~: disp8 0 ~?= "+0x0"
, "disp8 2" ~: disp8 0x7f ~?= "+0x7f"
, "disp8 3" ~: disp8 0x80 ~?= "-0x80"
, "disp8 4" ~: disp8 0xff ~?= "-0x1"
, "88-8b mod=01 1" ~: disasm' "894001" ~?= "mov [bx+si+0x1],ax"
, "88-8b mod=01 2" ~: disasm' "8949FF" ~?= "mov [bx+di-0x1],cx"
, "88-8b mod=01 3" ~: disasm' "895202" ~?= "mov [bp+si+0x2],dx"
, "88-8b mod=01 4" ~: disasm' "895BFE" ~?= "mov [bp+di-0x2],bx"
, "88-8b mod=01 5" ~: disasm' "896464" ~?= "mov [si+0x64],sp"
, "88-8b mod=01 6" ~: disasm' "896D9C" ~?= "mov [di-0x64],bp"
, "88-8b mod=01 7" ~: disasm' "897600" ~?= "mov [bp+0x0],si"
, "88-8b mod=01 8" ~: disasm' "897601" ~?= "mov [bp+0x1],si"
, "88-8b mod=01 9" ~: disasm' "897F01" ~?= "mov [bx+0x1],di"
]
main = do
runTestText (putTextToHandle stderr False) (TestList [testHex, testDisAsm])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment