Created
February 25, 2012 13:14
-
-
Save shogo82148/1908441 to your computer and use it in GitHub Desktop.
A Patch that fixes the matrix file of Igo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: igo/src/net/reduls/igo/dictionary/build/Matrix.java | |
=================================================================== | |
--- igo/src/net/reduls/igo/dictionary/build/Matrix.java (リビジョン 107) | |
+++ igo/src/net/reduls/igo/dictionary/build/Matrix.java (作業コピー) | |
@@ -21,33 +21,33 @@ | |
public static void build(String inputDir, String outputDir) throws ParseException, IOException { | |
final ReadLine rl = new ReadLine(inputDir+"/matrix.def", "UTF-8"); | |
try { | |
- // 一行目はサイズ: [左文脈IDの数] [右文脈IDの数] | |
+ // 一行目はサイズ: [右文脈IDの数] [左文脈IDの数] | |
String s = rl.readEof(); | |
- final int leftNum = Integer.valueOf(s.substring(0,s.indexOf(' '))); | |
- final int rightNum= Integer.valueOf(s.substring(s.indexOf(' ')+1)); | |
+ final int rightNum = Integer.valueOf(s.substring(0,s.indexOf(' '))); | |
+ final int leftNum= Integer.valueOf(s.substring(s.indexOf(' ')+1)); | |
final FileMappedOutputStream fmos = | |
new FileMappedOutputStream(outputDir+"/matrix.bin", 4*2+leftNum*rightNum*2); | |
try { | |
+ fmos.putInt(rightNum); | |
fmos.putInt(leftNum); | |
- fmos.putInt(rightNum); | |
// 二行目以降はデータ: [左文脈ID] [右文脈ID] [連接コスト] | |
final short[] tmpMatrix = new short[leftNum*rightNum]; | |
- for(int i=0; i < leftNum; i++) | |
- for(int j=0; j < rightNum; j++) { | |
+ for(int i=0; i < rightNum; i++) | |
+ for(int j=0; j < leftNum; j++) { | |
s = rl.readEof(); | |
final int p1 = s.indexOf(' '); | |
final int p2 = s.indexOf(' ',p1+1); | |
- final int lftID = Integer.valueOf(s.substring(0, p1)); | |
- final int rgtID = Integer.valueOf(s.substring(p1+1, p2)); | |
+ final int rgtID = Integer.valueOf(s.substring(0, p1)); | |
+ final int lftID = Integer.valueOf(s.substring(p1+1, p2)); | |
final short cost = Short.valueOf(s.substring(p2+1)); | |
- if(i != lftID) throw new ParseException | |
- ("Unexpected left context ID. ID="+lftID+", expedted="+i+"\t"+ | |
+ if(i != rgtID) throw new ParseException | |
+ ("Unexpected left context ID. ID="+rgtID+", expedted="+i+"\t"+ | |
"{file: matrix.def, line: "+rl.lineNumber()+"}", rl.lineNumber()); | |
- if(j != rgtID) throw new ParseException | |
- ("Unexpected right context ID. ID="+rgtID+", expedted="+j+"\t"+ | |
+ if(j != lftID) throw new ParseException | |
+ ("Unexpected right context ID. ID="+lftID+", expedted="+j+"\t"+ | |
"{file: matrix.def, line: "+rl.lineNumber()+"}", rl.lineNumber()); | |
// NOTE: tmpMatrixという一時配列を用いている理由 | |
@@ -61,7 +61,7 @@ | |
// そのためtmpMatrix配列を用いて、コスト値の並び順を変更し、 | |
// matrix[rightId][leftId]とったように、rightIdが第一添字になるようにした方が | |
// メモリアクセスの局所性が高まり(多分)、若干だが処理速度が向上する。 | |
- tmpMatrix[j*rightNum + i] = cost; | |
+ tmpMatrix[i*leftNum + j] = cost; | |
} | |
for(short cost : tmpMatrix) | |
fmos.putShort(cost); | |
Index: igo/src/net/reduls/igo/dictionary/Matrix.java | |
=================================================================== | |
--- igo/src/net/reduls/igo/dictionary/Matrix.java (リビジョン 107) | |
+++ igo/src/net/reduls/igo/dictionary/Matrix.java (作業コピー) | |
@@ -14,8 +14,8 @@ | |
public Matrix(String dataDir) throws IOException { | |
final FileMappedInputStream fmis = new FileMappedInputStream(dataDir+"/matrix.bin"); | |
try { | |
- leftSize = fmis.getInt(); | |
- rightSize= fmis.getInt(); | |
+ rightSize = fmis.getInt(); | |
+ leftSize= fmis.getInt(); | |
matrix = fmis.getShortArray(leftSize*rightSize); | |
} finally { | |
fmis.close(); | |
@@ -25,7 +25,7 @@ | |
/** | |
* 形態素同士の連接コストを求める | |
*/ | |
- public short linkCost(int leftId, int rightId) { | |
- return matrix[rightId*rightSize + leftId]; | |
+ public short linkCost(int rightId, int leftId) { | |
+ return matrix[rightId*leftSize + leftId]; | |
} | |
} | |
\ No newline at end of file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -cr igo-python-0.9/igo/dictionary.py igo-python-0.9-fixed/igo/dictionary.py | |
*** igo-python-0.9/igo/dictionary.py Thu Sep 1 02:46:40 2011 | |
--- igo-python-0.9-fixed/igo/dictionary.py Sat Feb 25 21:51:06 2012 | |
*************** | |
*** 82,98 **** | |
def __init__(self, dataDir, bigendian=False): | |
fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian) | |
try: | |
- self.leftSize = fmis.getInt() | |
self.rightSize = fmis.getInt() | |
self.matrix = fmis.getShortArray(self.leftSize * self.rightSize) | |
finally: | |
fmis.close() | |
! def linkCost(self, leftId, rightId): | |
""" | |
形態素同士の連接コストを求める | |
""" | |
! return self.matrix[rightId * self.rightSize + leftId] | |
class Unknown: | |
--- 82,98 ---- | |
def __init__(self, dataDir, bigendian=False): | |
fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian) | |
try: | |
self.rightSize = fmis.getInt() | |
+ self.leftSize = fmis.getInt() | |
self.matrix = fmis.getShortArray(self.leftSize * self.rightSize) | |
finally: | |
fmis.close() | |
! def linkCost(self, rightId, leftId): | |
""" | |
形態素同士の連接コストを求める | |
""" | |
! return self.matrix[rightId * self.leftSize + leftId] | |
class Unknown: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment