Skip to content

Instantly share code, notes, and snippets.

@shogo82148
Created February 25, 2012 13:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shogo82148/1908441 to your computer and use it in GitHub Desktop.
Save shogo82148/1908441 to your computer and use it in GitHub Desktop.
A Patch that fixes the matrix file of Igo
Index: igo/src/net/reduls/igo/dictionary/build/Matrix.java
===================================================================
--- igo/src/net/reduls/igo/dictionary/build/Matrix.java (リビジョン 107)
+++ igo/src/net/reduls/igo/dictionary/build/Matrix.java (作業コピー)
@@ -21,33 +21,33 @@
public static void build(String inputDir, String outputDir) throws ParseException, IOException {
final ReadLine rl = new ReadLine(inputDir+"/matrix.def", "UTF-8");
try {
- // 一行目はサイズ: [左文脈IDの数] [右文脈IDの数]
+ // 一行目はサイズ: [右文脈IDの数] [左文脈IDの数]
String s = rl.readEof();
- final int leftNum = Integer.valueOf(s.substring(0,s.indexOf(' ')));
- final int rightNum= Integer.valueOf(s.substring(s.indexOf(' ')+1));
+ final int rightNum = Integer.valueOf(s.substring(0,s.indexOf(' ')));
+ final int leftNum= Integer.valueOf(s.substring(s.indexOf(' ')+1));
final FileMappedOutputStream fmos =
new FileMappedOutputStream(outputDir+"/matrix.bin", 4*2+leftNum*rightNum*2);
try {
+ fmos.putInt(rightNum);
fmos.putInt(leftNum);
- fmos.putInt(rightNum);
// 二行目以降はデータ: [左文脈ID] [右文脈ID] [連接コスト]
final short[] tmpMatrix = new short[leftNum*rightNum];
- for(int i=0; i < leftNum; i++)
- for(int j=0; j < rightNum; j++) {
+ for(int i=0; i < rightNum; i++)
+ for(int j=0; j < leftNum; j++) {
s = rl.readEof();
final int p1 = s.indexOf(' ');
final int p2 = s.indexOf(' ',p1+1);
- final int lftID = Integer.valueOf(s.substring(0, p1));
- final int rgtID = Integer.valueOf(s.substring(p1+1, p2));
+ final int rgtID = Integer.valueOf(s.substring(0, p1));
+ final int lftID = Integer.valueOf(s.substring(p1+1, p2));
final short cost = Short.valueOf(s.substring(p2+1));
- if(i != lftID) throw new ParseException
- ("Unexpected left context ID. ID="+lftID+", expedted="+i+"\t"+
+ if(i != rgtID) throw new ParseException
+ ("Unexpected left context ID. ID="+rgtID+", expedted="+i+"\t"+
"{file: matrix.def, line: "+rl.lineNumber()+"}", rl.lineNumber());
- if(j != rgtID) throw new ParseException
- ("Unexpected right context ID. ID="+rgtID+", expedted="+j+"\t"+
+ if(j != lftID) throw new ParseException
+ ("Unexpected right context ID. ID="+lftID+", expedted="+j+"\t"+
"{file: matrix.def, line: "+rl.lineNumber()+"}", rl.lineNumber());
// NOTE: tmpMatrixという一時配列を用いている理由
@@ -61,7 +61,7 @@
// そのためtmpMatrix配列を用いて、コスト値の並び順を変更し、
// matrix[rightId][leftId]とったように、rightIdが第一添字になるようにした方が
// メモリアクセスの局所性が高まり(多分)、若干だが処理速度が向上する。
- tmpMatrix[j*rightNum + i] = cost;
+ tmpMatrix[i*leftNum + j] = cost;
}
for(short cost : tmpMatrix)
fmos.putShort(cost);
Index: igo/src/net/reduls/igo/dictionary/Matrix.java
===================================================================
--- igo/src/net/reduls/igo/dictionary/Matrix.java (リビジョン 107)
+++ igo/src/net/reduls/igo/dictionary/Matrix.java (作業コピー)
@@ -14,8 +14,8 @@
public Matrix(String dataDir) throws IOException {
final FileMappedInputStream fmis = new FileMappedInputStream(dataDir+"/matrix.bin");
try {
- leftSize = fmis.getInt();
- rightSize= fmis.getInt();
+ rightSize = fmis.getInt();
+ leftSize= fmis.getInt();
matrix = fmis.getShortArray(leftSize*rightSize);
} finally {
fmis.close();
@@ -25,7 +25,7 @@
/**
* 形態素同士の連接コストを求める
*/
- public short linkCost(int leftId, int rightId) {
- return matrix[rightId*rightSize + leftId];
+ public short linkCost(int rightId, int leftId) {
+ return matrix[rightId*leftSize + leftId];
}
}
\ No newline at end of file
diff -cr igo-python-0.9/igo/dictionary.py igo-python-0.9-fixed/igo/dictionary.py
*** igo-python-0.9/igo/dictionary.py Thu Sep 1 02:46:40 2011
--- igo-python-0.9-fixed/igo/dictionary.py Sat Feb 25 21:51:06 2012
***************
*** 82,98 ****
def __init__(self, dataDir, bigendian=False):
fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian)
try:
- self.leftSize = fmis.getInt()
self.rightSize = fmis.getInt()
self.matrix = fmis.getShortArray(self.leftSize * self.rightSize)
finally:
fmis.close()
! def linkCost(self, leftId, rightId):
"""
形態素同士の連接コストを求める
"""
! return self.matrix[rightId * self.rightSize + leftId]
class Unknown:
--- 82,98 ----
def __init__(self, dataDir, bigendian=False):
fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian)
try:
self.rightSize = fmis.getInt()
+ self.leftSize = fmis.getInt()
self.matrix = fmis.getShortArray(self.leftSize * self.rightSize)
finally:
fmis.close()
! def linkCost(self, rightId, leftId):
"""
形態素同士の連接コストを求める
"""
! return self.matrix[rightId * self.leftSize + leftId]
class Unknown:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment