mecab dict make with auto scoring
#!/bin/sh | |
php scoring.php | |
/usr/local/libexec/mecab/mecab-dict-index -d/usr/local/lib64/mecab/dic/ipadic -u mecabuser.dic -f utf8 -t utf8 mecabuser.tmp | |
echo "cleaning temporary file" | |
rm mecabuser.tmp | |
exit; |
<?php | |
echo "[scoring process]\n"; | |
echo "reading file and processing ... "; | |
$csv = fopen("mecabuser.csv","r"); | |
$tmp = array(); | |
while (($dat = fgetcsv($csv)) !== FALSE){ | |
$leng = strlen($dat[0]); | |
$dat[3] = (int) max(-36000, -400 * ($leng^1.5)); | |
$tmp[] = $dat; | |
} | |
fclose($csv); | |
echo "ok\nwriting file ... "; | |
$csv2 = fopen("mecabuser.tmp","w"); | |
$xl=0; | |
foreach($tmp as $wri) { | |
fputcsv($csv2,$wri); | |
$xl++; | |
} | |
fclose($csv2); | |
echo "ok, $xl lines wrote.\n[mecab dictonary index process]\n"; | |
exit; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment