-
-
Save rubyu/cb04e8457993a5e53cc6c1bc1d8d4796 to your computer and use it in GitHub Desktop.
import sbt.Keys._ | |
import sbt._ | |
import sbtassembly.AssemblyPlugin.autoImport._ | |
object Build extends sbt.Build { | |
lazy val commonSettings = | |
Defaults.coreDefaultSettings ++ | |
Seq( | |
version := "0.3.2", | |
scalaVersion := "2.11.8", | |
organization := "com.github.rubyu", | |
name := "ebquery" | |
) | |
lazy val project = | |
Project("ebquery", file(".")) | |
.settings(commonSettings: _*) | |
.settings(Seq( | |
mainClass in assembly := Some("com.github.rubyu.ebquery.Main"), | |
assemblyJarName in assembly := name.value + "-" + version.value + ".jar" | |
)) | |
.settings(Seq( | |
scalacOptions := Seq( | |
"-deprecation", | |
"-unchecked", | |
"-feature" | |
) | |
)) | |
.settings( | |
libraryDependencies ++= Seq( | |
"com.github.tototoshi" %% "scala-csv" % "1.3.6", | |
"org.scala-lang.modules" % "scala-xml_2.11" % "1.0.4", | |
"org.slf4j" % "slf4j-api" % "1.7.21", | |
"org.slf4j" % "slf4j-simple" % "1.7.21", | |
"args4j" % "args4j" % "2.0.26", | |
"commons-codec" % "commons-codec" % "1.9", | |
"commons-lang" % "commons-lang" % "2.4", | |
"org.specs2" %% "specs2-core" % "3.7.2" % "test", | |
"junit" % "junit" % "4.7" % "test", | |
"com.rexsl" % "rexsl-w3c" % "0.13" % "test", | |
"com.rexsl" % "rexsl-test" % "0.4.12" % "test", | |
"javax.json" % "javax.json-api" % "1.0" % "test", | |
// halt warning messages for multiple dependencies | |
"org.scala-lang" % "scala-reflect" % "2.11.8" % "test", | |
"org.scala-lang" % "scala-compiler" % "2.11.8" % "test", | |
// halt warning messages for circular dependencies | |
"com.jcabi" % "jcabi-log" % "0.12.1" % "test" | |
) | |
) | |
} |
package io.github.eb4j; | |
import java.io.UnsupportedEncodingException; | |
import io.github.eb4j.hook.Hook; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import io.github.eb4j.io.EBFile; | |
import io.github.eb4j.io.BookInputStream; | |
import io.github.eb4j.util.ByteUtil; | |
import io.github.eb4j.util.HexUtil; | |
import com.github.rubyu.ebquery.IExporter; | |
/** | |
* Search class for searching with a single word. | |
* | |
* @author Hisaya FUKUMOTO | |
* | |
* Copied from: https://github.com/eb4j/eb4j/blob/5c1dd0a8aa6eca5ae7489787456333d7eef5fa2a/eb4j-core/src/main/java/io/github/eb4j/SingleWordSearcher.java | |
*/ | |
public class EntryEnumerator implements Searcher { | |
private Hook _hook = null; | |
private com.github.rubyu.ebquery.IExporter _exporter = null; | |
public static EntryEnumerator Create(SubBook sub, Hook<String> hook, IExporter exporter) { | |
IndexStyle[] wordStyles = new IndexStyle[3]; | |
wordStyles[0] = sub.getWordIndexStyle(0); // KANA | |
wordStyles[1] = sub.getWordIndexStyle(1); // KANJI | |
wordStyles[2] = sub.getWordIndexStyle(2); // ALPHABET | |
IndexStyle wordStyle; | |
if (wordStyles[2] != null) { | |
wordStyle = wordStyles[2]; | |
} else { | |
wordStyle = wordStyles[1]; | |
} | |
return new EntryEnumerator(sub, hook, exporter, wordStyle, EXACTWORD); | |
} | |
/** 前方一致検索を示す定数 */ | |
protected static final int WORD = 0; | |
/** 後方一致検索を示す定数 */ | |
protected static final int ENDWORD = 1; | |
/** 完全一致検索を示す定数 */ | |
protected static final int EXACTWORD = 2; | |
/** 条件検索を示す定数 */ | |
protected static final int KEYWORD = 3; | |
/** クロス検索を示す定数 */ | |
protected static final int CROSS = 4; | |
/** 複合検索を示す定数 */ | |
protected static final int MULTI = 5; | |
/** 最大インデックス深さ */ | |
private static final int MAX_INDEX_DEPTH = 6; | |
/** 項目の配置スタイル */ | |
private static final int VARIABLE = 0; | |
/** 項目の配置スタイル */ | |
private static final int FIXED = 1; | |
/** ログ */ | |
private Logger _logger = null; | |
/** 副本 */ | |
private SubBook _sub = null; | |
/** インデックススタイル */ | |
private IndexStyle _style = null; | |
/** 現在の検索種別 */ | |
private int _type = 0; | |
/** 検索語 */ | |
private byte[] _word = null; | |
/** 検索キー */ | |
private byte[] _canonical = null; | |
/** 検索するファイル */ | |
private EBFile _file = null; | |
/** キャッシュ */ | |
private byte[] _cache = new byte[BookInputStream.PAGE_SIZE]; | |
/** キャシュのページ位置 */ | |
private long _cachePage = 0L; | |
/** キャシュのオフセット位置 */ | |
private int _off = 0; | |
/** データのページ位置 */ | |
private long _page = 0L; | |
/** データのページID */ | |
private int _pageID = 0; | |
/** エントリのサイズ */ | |
private int _entryLength = 0; | |
/** エントリの配置方法 */ | |
private int _entryArrangement = 0; | |
/** エントリの数 */ | |
private int _entryCount = 0; | |
/** エントリのインデックス */ | |
private int _entryIndex = 0; | |
/** グループエントリ内であることを示すフラグ */ | |
private boolean _inGroupEntry = false; | |
/** 比較結果 */ | |
private int _comparison = -1; | |
/** キーワード検索用見出し位置 */ | |
private long _keywordHeading = 0L; | |
private byte[] _currentGroupEntryIndex = null; | |
/** | |
* Build searcher object. | |
* | |
* @param sub subbook. | |
* @param style index style. | |
* @param type search type. | |
* @see SingleWordSearcher#WORD | |
* @see SingleWordSearcher#ENDWORD | |
* @see SingleWordSearcher#EXACTWORD | |
* @see SingleWordSearcher#KEYWORD | |
* @see SingleWordSearcher#CROSS | |
* @see SingleWordSearcher#MULTI | |
*/ | |
protected EntryEnumerator(final SubBook sub, final Hook<String> hook, final IExporter exporter, final IndexStyle style, final int type) { | |
super(); | |
_logger = LoggerFactory.getLogger(getClass()); | |
_hook = hook; | |
_exporter = exporter; | |
_sub = sub; | |
_file = sub.getTextFile(); | |
_style = style; | |
_type = type; | |
search(); | |
} | |
private int _comparePre(final byte[] key, final byte[] pattern) { | |
return 0; | |
} | |
private int _compareSingle(byte[] key, byte[] pattern) { | |
return 1; | |
} | |
private int _compareGroup(byte[] key, byte[] pattern) { | |
return 0; | |
} | |
private void search() { | |
try { | |
search("dummy search string".getBytes()); | |
} catch (EBException ex) {} | |
} | |
private void export(byte[] indexBytes, Result result) { | |
try { | |
String indexValue = null; | |
try { | |
indexValue = new String(indexBytes, "x-JIS0208"); | |
} catch (UnsupportedEncodingException ex) {} | |
if (indexValue == null || indexValue.contains("\uFFFD")) { | |
return; | |
} | |
String heading = result.getHeading(this._hook).toString(); | |
String description = result.getText(this._hook).toString(); | |
this._exporter.export(indexValue, heading, description); | |
} catch (EBException ex) {} | |
} | |
/** | |
* Set a word to search. | |
* | |
* @param word a search word. | |
*/ | |
private void _setWord(final byte[] word) { | |
int len = word.length; | |
_word = new byte[len]; | |
System.arraycopy(word, 0, _word, 0, len); | |
_canonical = new byte[len]; | |
System.arraycopy(word, 0, _canonical, 0, len); | |
if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
_style.fixWordLatin(_canonical); | |
} else { | |
_style.fixWord(_canonical); | |
} | |
if (_style.getIndexID() != 0x70 && _style.getIndexID() != 0x90) { | |
System.arraycopy(_canonical, 0, _word, 0, len); | |
} | |
// 後方検索の場合、反転する | |
if (_type == ENDWORD) { | |
if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
ByteUtil.reverseWordLatin(_word); | |
ByteUtil.reverseWordLatin(_canonical); | |
} else { | |
ByteUtil.reverseWord(_word); | |
ByteUtil.reverseWord(_canonical); | |
} | |
} | |
try { | |
_logger.debug("search word: '" + new String(_word, "x-JIS0208") + "'"); | |
_logger.debug("search canonical word: '" + new String(_canonical, "x-JIS0208") + "'"); | |
} catch (UnsupportedEncodingException e) { | |
} | |
} | |
// /** | |
// * キーとパターンを比較します。 | |
// * | |
// * @param key キー | |
// * @param pattern パターン | |
// * @return キーがパターンと同じ場合:0、 | |
// * キーがパターンより大きい場合:1以上、 | |
// * キーがパターンより小さい場合:-1以下 | |
// */ | |
// private int _comparePre(final byte[] key, final byte[] pattern) { | |
// int comp = 0; | |
// switch (_type) { | |
// case EXACTWORD: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, true); | |
// } else { | |
// comp = CompareUtil.compareToJISX0208(key, pattern, true); | |
// } | |
// break; | |
// case MULTI: | |
// if (_style.getCandidatePage() == 0) { | |
// comp = CompareUtil.compareToByte(key, pattern, true); | |
// } else { | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, true); | |
// } else { | |
// comp = CompareUtil.compareToJISX0208(key, pattern, true); | |
// } | |
// } | |
// break; | |
// case WORD: | |
// case ENDWORD: | |
// case KEYWORD: | |
// case CROSS: | |
// default: | |
// comp = CompareUtil.compareToByte(key, pattern, true); | |
// break; | |
// } | |
// try { | |
// _logger.debug("compare key word: (" + comp + ") '" | |
// + new String(key, "x-JIS0208") + "' '" | |
// + new String(pattern, "x-JIS0208") + "'"); | |
// } catch (UnsupportedEncodingException e) { | |
// } | |
// return comp; | |
// } | |
// | |
// /** | |
// * キーとパターンを比較します。 | |
// * | |
// * @param key キー | |
// * @param pattern パターン | |
// * @return キーがパターンと同じ場合:0、 | |
// * キーがパターンより大きい場合:1以上、 | |
// * キーがパターンより小さい場合:-1以下 | |
// */ | |
// private int _compareSingle(final byte[] key, final byte[] pattern) { | |
// int comp = 0; | |
// switch (_type) { | |
// case ENDWORD: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } else { | |
// IndexStyle style = _sub.getEndwordIndexStyle(SubBook.KANA); | |
// if (style != null && _style.getStartPage() == style.getStartPage()) { | |
// comp = CompareUtil.compareToKanaSingle(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } | |
// } | |
// break; | |
// case EXACTWORD: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, false); | |
// } else { | |
// IndexStyle style = _sub.getWordIndexStyle(SubBook.KANA); | |
// if (style != null && _style.getStartPage() == style.getStartPage()) { | |
// comp = CompareUtil.compareToKanaSingle(key, pattern, true); | |
// } else { | |
// comp = CompareUtil.compareToJISX0208(key, pattern, false); | |
// } | |
// } | |
// break; | |
// case KEYWORD: | |
// case CROSS: | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// break; | |
// case MULTI: | |
// if (_style.getCandidatePage() == 0) { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } else { | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToJISX0208(key, pattern, false); | |
// } | |
// } | |
// break; | |
// case WORD: | |
// default: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } else { | |
// IndexStyle style = _sub.getWordIndexStyle(SubBook.KANA); | |
// if (style != null && _style.getStartPage() == style.getStartPage()) { | |
// comp = CompareUtil.compareToKanaSingle(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } | |
// } | |
// break; | |
// } | |
// try { | |
// _logger.debug("compare key word: (" + comp + ") '" | |
// + new String(key, "x-JIS0208") + "' '" | |
// + new String(pattern, "x-JIS0208") + "'"); | |
// } catch (UnsupportedEncodingException e) { | |
// } | |
// return comp; | |
// } | |
// | |
// /** | |
// * キーとパターンを比較します。 | |
// * | |
// * @param key キー | |
// * @param pattern パターン | |
// * @return キーがパターンと同じ場合:0、 | |
// * キーがパターンより大きい場合:1以上、 | |
// * キーがパターンより小さい場合:-1以下 | |
// */ | |
// private int _compareGroup(final byte[] key, final byte[] pattern) { | |
// int comp = 0; | |
// switch (_type) { | |
// case EXACTWORD: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToKanaGroup(key, pattern, true); | |
// } | |
// break; | |
// case MULTI: | |
// if (_style.getCandidatePage() == 0) { | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToKanaGroup(key, pattern, false); | |
// } | |
// } else { | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToLatin(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToKanaGroup(key, pattern, true); | |
// } | |
// } | |
// break; | |
// case WORD: | |
// case ENDWORD: | |
// case KEYWORD: | |
// case CROSS: | |
// default: | |
// if (_sub.getBook().getCharCode() == Book.CHARCODE_ISO8859_1) { | |
// comp = CompareUtil.compareToByte(key, pattern, false); | |
// } else { | |
// comp = CompareUtil.compareToKanaGroup(key, pattern, false); | |
// } | |
// break; | |
// } | |
// try { | |
// _logger.debug("compare key word: (" + comp + ") '" | |
// + new String(key, "x-JIS0208") + "' '" | |
// + new String(pattern, "x-JIS0208") + "'"); | |
// } catch (UnsupportedEncodingException e) { | |
// } | |
// return comp; | |
// } | |
/** | |
* 検索を行います。 | |
* | |
* @param word 検索語 | |
* @exception EBException 前処理中にエラーが発生した場合 | |
*/ | |
protected void search(final byte[] word) throws EBException { | |
_setWord(word); | |
_page = _style.getStartPage(); | |
// pre-search | |
BookInputStream bis = _file.getInputStream(); | |
try { | |
long nextPage = _page; | |
int depth; | |
for (depth=0; depth<MAX_INDEX_DEPTH; depth++) { | |
// データをキャッシュへ読み込む | |
bis.seek(_page, 0); | |
bis.readFully(_cache, 0, _cache.length); | |
_cachePage = _page; | |
_pageID = _cache[0] & 0xff; | |
_entryLength = _cache[1] & 0xff; | |
if (_entryLength == 0) { | |
_entryArrangement = VARIABLE; | |
} else { | |
_entryArrangement = FIXED; | |
} | |
_entryCount = ByteUtil.getInt2(_cache, 2); | |
_off = 4; | |
_logger.debug("page=0x" + HexUtil.toHexString(_page) | |
+ ", ID=0x" + HexUtil.toHexString(_pageID)); | |
// リーフインデックスに達っしたらループ終了 | |
if (_isLeafLayer(_pageID)) { | |
break; | |
} | |
// 次のレベルのインデックスを取得する | |
byte[] b = new byte[_entryLength]; | |
for (_entryIndex=0; _entryIndex<_entryCount; _entryIndex++) { | |
if (_off + _entryLength + 4 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
System.arraycopy(_cache, _off, b, 0, b.length); | |
_off += _entryLength; | |
if (_comparePre(_canonical, b) <= 0) { | |
nextPage = ByteUtil.getLong4(_cache, _off); | |
break; | |
} | |
_off += 4; | |
} | |
if (_entryIndex >= _entryCount || nextPage == _page) { | |
_comparison = -1; | |
return; | |
} | |
_page = nextPage; | |
} | |
// インデックス深さのチェック | |
if (depth == MAX_INDEX_DEPTH) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
} finally { | |
bis.close(); | |
} | |
_entryIndex = 0; | |
_comparison = 1; | |
_inGroupEntry = false; | |
} | |
/** | |
* 次の検索結果を返します。 | |
* | |
* @return 検索結果 (次の検索結果がない場合null) | |
* @exception EBException 検索中にエラーが発生した場合 | |
*/ | |
@Override | |
public Result getNextResult() throws EBException { | |
if (_comparison < 0) { | |
return null; | |
} | |
while (true) { | |
refreshCache(); | |
if (!_isLeafLayer(_pageID)) { | |
// リーフインデックスでなければ例外 | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
if (!_hasGroupEntry(_pageID)) { | |
while (_entryIndex < _entryCount) { | |
parseNonGroupEntry(); | |
} | |
} else { | |
while (_entryIndex < _entryCount) { | |
parseGroupedEntry(); | |
} | |
} | |
// 次ページが存在すれば続行、存在しなければ終了 | |
if (_isLayerEnd(_pageID)) { | |
_comparison = -1; | |
break; | |
} | |
_page++; | |
_entryIndex = 0; | |
} | |
return null; | |
} | |
/** | |
* 指定されたページが最下層かどうかを判別します。 | |
* | |
* @param id ページID | |
* @return 最下層である場合はtrue、そうでない場合はfalse | |
*/ | |
private boolean _isLeafLayer(final int id) { | |
if ((id & 0x80) == 0x80) { | |
return true; | |
} | |
return false; | |
} | |
// /** | |
// * 指定されたページが階層開始ページかどうかを判別します。 | |
// * | |
// * @param id ページID | |
// * @return 階層開始ページである場合はtrue、そうでない場合はfalse | |
// */ | |
// private boolean _isLayerStart(int id) { | |
// if ((id & 0x40) == 0x40) { | |
// return true; | |
// } | |
// return false; | |
// } | |
/** | |
* 指定されたページが階層終了ページかどうかを判別します。 | |
* | |
* @param id ページID | |
* @return 階層終了ページである場合はtrue、そうでない場合はfalse | |
*/ | |
private boolean _isLayerEnd(final int id) { | |
if ((id & 0x20) == 0x20) { | |
return true; | |
} | |
return false; | |
} | |
/** | |
* 指定されたページがグループエントリを含んでいるかどうか判別します。 | |
* | |
* @param id ページID | |
* @return グループエントリを含んでいる場合はtrue、そうでない場合はfalse | |
*/ | |
private boolean _hasGroupEntry(final int id) { | |
if ((id & 0x10) == 0x10) { | |
return true; | |
} | |
return false; | |
} | |
// キャッシュとデータのページが異なれば読み込む | |
private void refreshCache() throws EBException { | |
if (_cachePage != _page) { | |
BookInputStream bis = _file.getInputStream(); | |
try { | |
bis.seek(_page, 0); | |
bis.readFully(_cache, 0, _cache.length); | |
} finally { | |
bis.close(); | |
} | |
_cachePage = _page; | |
if (_entryIndex == 0) { | |
_pageID = _cache[0] & 0xff; | |
_entryLength = _cache[1] & 0xff; | |
if (_entryLength == 0) { | |
_entryArrangement = VARIABLE; | |
} else { | |
_entryArrangement = FIXED; | |
} | |
_entryCount = ByteUtil.getInt2(_cache, 2); | |
_entryIndex = 0; | |
_off = 4; | |
_logger.info("page=0x" + HexUtil.toHexString(_page) | |
+ ", ID=0x" + HexUtil.toHexString(_pageID)); | |
} | |
} | |
} | |
// グループエントリなし | |
private void parseNonGroupEntry() throws EBException { | |
if (_entryArrangement == VARIABLE) { | |
if (_off + 1 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
_entryLength = _cache[_off] & 0xff; | |
_off++; | |
} | |
if (_off + _entryLength + 12 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
byte[] b = new byte[_entryLength]; | |
System.arraycopy(_cache, _off, b, 0, b.length); | |
_off += _entryLength; | |
_comparison = _compareSingle(_word, b); | |
if (_comparison == 1) { | |
// 本文/見出し位置の取得 | |
long tPage = ByteUtil.getLong4(_cache, _off); | |
int tOff = ByteUtil.getInt2(_cache, _off+4); | |
long hPage = ByteUtil.getLong4(_cache, _off+6); | |
int hOff = ByteUtil.getInt2(_cache, _off+10); | |
Result result = new Result(_sub, hPage, hOff, tPage, tOff); | |
export(b, result); | |
} | |
_entryIndex++; | |
_off += 12; | |
} | |
// グループエントリあり | |
private void parseGroupedEntry() throws EBException { | |
if (_off + 2 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
int groupID = _cache[_off] & 0xff; | |
if (groupID == 0x00) { | |
// シングルエントリ | |
_entryLength = _cache[_off+1] & 0xff; | |
if (_off + _entryLength + 14 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
byte[] b = new byte[_entryLength]; | |
System.arraycopy(_cache, _off+2, b, 0, b.length); | |
_off += _entryLength + 2; | |
_comparison = _compareSingle(_canonical, b); | |
if (_comparison == 1) { | |
// 本文/見出し位置の取得 | |
long tPage = ByteUtil.getLong4(_cache, _off); | |
int tOff = ByteUtil.getInt2(_cache, _off+4); | |
long hPage = ByteUtil.getLong4(_cache, _off+6); | |
int hOff = ByteUtil.getInt2(_cache, _off+10); | |
Result result = new Result(_sub, hPage, hOff, tPage, tOff); | |
export(b, result); | |
} | |
_off += 12; | |
_inGroupEntry = false; | |
} else if (groupID == 0x80) { | |
// グループエントリの開始 | |
_entryLength = _cache[_off+1] & 0xff; | |
byte[] b = new byte[_entryLength]; | |
if (_type == KEYWORD || _type == CROSS) { | |
if (_off + _entryLength + 12 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
System.arraycopy(_cache, _off+6, b, 0, b.length); | |
_off += _entryLength + 6; | |
_comparison = _compareSingle(_word, b); | |
long hPage = ByteUtil.getLong4(_cache, _off); | |
int hOff = ByteUtil.getInt2(_cache, _off+4); | |
_keywordHeading = | |
BookInputStream.getPosition(hPage, hOff); | |
_off += 6; | |
} else if (_type == MULTI) { | |
if (_off + _entryLength + 6 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
System.arraycopy(_cache, _off+6, b, 0, b.length); | |
_comparison = _compareSingle(_word, b); | |
_off += _entryLength + 6; | |
} else { | |
if (_off + _entryLength + 4 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
System.arraycopy(_cache, _off+4, b, 0, b.length); | |
_comparison = _compareSingle(_canonical, b); | |
_off += _entryLength + 4; | |
} | |
_currentGroupEntryIndex = b.clone(); | |
_inGroupEntry = true; | |
} else if (groupID == 0xc0) { | |
// グループエントリの要素 | |
if (_type == KEYWORD || _type == CROSS) { | |
if (_off + 7 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
if (_comparison == 1 && _inGroupEntry) { | |
// 本文/見出し位置の取得 | |
long tPage = ByteUtil.getLong4(_cache, _off+1); | |
int tOff = ByteUtil.getInt2(_cache, _off+5); | |
Result result = new Result(_sub, _keywordHeading, tPage, tOff); | |
_keywordHeading = | |
_sub.getNextHeadingPosition(_keywordHeading); | |
export(_currentGroupEntryIndex, result); | |
} | |
_off += 7; | |
} else if (_type == MULTI) { | |
if (_off + 13 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
if (_comparison == 1 && _inGroupEntry) { | |
// 本文/見出し位置の取得 | |
long tPage = ByteUtil.getLong4(_cache, _off+1); | |
int tOff = ByteUtil.getInt2(_cache, _off+5); | |
long hPage = ByteUtil.getLong4(_cache, _off+7); | |
int hOff = ByteUtil.getInt2(_cache, _off+11); | |
Result result = new Result(_sub, hPage, hOff, tPage, tOff); | |
export(_currentGroupEntryIndex, result); | |
} | |
_off += 13; | |
} else { | |
_entryLength = _cache[_off+1] & 0xff; | |
if (_off + _entryLength + 14 > BookInputStream.PAGE_SIZE) { | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
byte[] b = new byte[_entryLength]; | |
System.arraycopy(_cache, _off+2, b, 0, b.length); | |
_off += _entryLength + 2; | |
if (_comparison == 1 && _inGroupEntry | |
&& _compareGroup(_word, b) == 0) { | |
// 本文/見出し位置の取得 | |
long tPage = ByteUtil.getLong4(_cache, _off); | |
int tOff = ByteUtil.getInt2(_cache, _off+4); | |
long hPage = ByteUtil.getLong4(_cache, _off+6); | |
int hOff = ByteUtil.getInt2(_cache, _off+10); | |
Result result = new Result(_sub, hPage, hOff, tPage, tOff); | |
export(b, result); | |
} | |
_off += 12; | |
} | |
} else { | |
// 未知のID | |
throw new EBException(EBException.UNEXP_FILE, _file.getPath()); | |
} | |
_entryIndex++; | |
} | |
} | |
// end of SingleWordSearcher.java |
package com.github.rubyu.ebquery; | |
public interface IExporter { | |
public void export(String indexValue, String heading, String description); | |
} |
package com.github.rubyu.ebquery | |
import java.io.{File, PrintWriter} | |
import io.github.eb4j.{Book, EntryEnumerator, SingleWordEnumerator} | |
import io.github.eb4j.io.{BookReader, EPWINGInputStream} | |
import java.lang.reflect.Method | |
import org.specs2.mutable.Specification | |
class JSONExporterTest extends Specification { | |
"EBLeaf" should { | |
"equal with a instance be of the same string" in { | |
val dir = "/Users/i.y.nishiseki/Dropbox/work/dictionary/SRD" | |
var mapFilePath = "/Users/i.y.nishiseki/Dropbox/work/dictionary/SRD/SRD.map" | |
val subBook = new Book(dir).getSubBook(0) | |
val mapper = new ExternalCharacterMapper(mapFilePath) | |
val proc = new EBProcessor | |
proc.newline = new EBProcessorImpl.text.Newline | |
proc.externalCharacter = new EBProcessorImpl.text.ReplacementCharacter | |
proc.text = new EBProcessorImpl.text.Text | |
val hook = new EBProcessorAdapter(subBook, mapper, proc) | |
val exporter = new TSVExporter(new PrintWriter((new File("SRD.dump.tsv")))) | |
val enumerator = EntryEnumerator.Create(subBook, hook, exporter) | |
// println (Iterator.continually(searcher.getNextResult()) takeWhile(_ != null) size) | |
// 1118269 | |
// Iterator.continually(enumerator.getNextResult()) takeWhile(_ != null) foreach { result => | |
// val h = result.getHeading(hook) | |
// val t = result.getText(hook) | |
// if (h.startsWith("get")) { | |
// println(s"{heading=$h, text=$t".replace("\n", "¥n")) | |
// } | |
// } | |
enumerator.getNextResult(); | |
// {heading=ab·so·lute, text=ABSOLUTE ADDRESS.¥n━n.⇑¥n【1】絶対的なもの(↔relative).¥n | |
// {heading=act, text=ACT OF TOLERATION [UNIFORMITY].¥n(2)⦅しばしば A-⦆(会議の)記録, | |
// {heading=ac·tor, text=a BAD ACTOR.¥n【4】〘ローマ法〙原告(plaintiff), 弁護人(advocat | |
// {heading=age, text=STONE AGE¥nBRONZE AGE¥nIRON AGE¥nDARK AGES¥nMIDDLE AGES¥nthe | |
// {heading=age, text=BRONZE AGE¥nIRON AGE¥nDARK AGES¥nMIDDLE AGES¥nthe age of elec | |
// {heading=age, text=IRON AGE¥nDARK AGES¥nMIDDLE AGES¥nthe age of electronic commu | |
true mustEqual true | |
} | |
} | |
} |
package com.github.rubyu.ebquery | |
import java.io.Writer | |
import com.github.tototoshi.csv._ | |
class TSVExporter(w: Writer) extends IExporter { | |
implicit val format = new TSVFormat {} | |
val writer = CSVWriter.open(w)(format) | |
private def escape(s: String): String = | |
s.replaceAll(raw"\\", raw"\\\\").replaceAll(raw"\n", raw"\\n") | |
override def export(indexValue: String, heading: String, description: String): Unit = | |
writer.writeRow(List(escape(indexValue), escape(heading), escape(description))) | |
def close(): Unit = writer.close() | |
} |
https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
{'base':'A', 'letters':'\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F'},
{'base':'AA','letters':'\uA732'},
{'base':'AE','letters':'\u00C6\u01FC\u01E2'},
{'base':'AO','letters':'\uA734'},
{'base':'AU','letters':'\uA736'},
{'base':'AV','letters':'\uA738\uA73A'},
{'base':'AY','letters':'\uA73C'},
{'base':'B', 'letters':'\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181'},
{'base':'C', 'letters':'\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E'},
{'base':'D', 'letters':'\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0'},
{'base':'DZ','letters':'\u01F1\u01C4'},
{'base':'Dz','letters':'\u01F2\u01C5'},
{'base':'E', 'letters':'\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E'},
{'base':'F', 'letters':'\u0046\u24BB\uFF26\u1E1E\u0191\uA77B'},
{'base':'G', 'letters':'\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E'},
{'base':'H', 'letters':'\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D'},
{'base':'I', 'letters':'\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197'},
{'base':'J', 'letters':'\u004A\u24BF\uFF2A\u0134\u0248'},
{'base':'K', 'letters':'\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2'},
{'base':'L', 'letters':'\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780'},
{'base':'LJ','letters':'\u01C7'},
{'base':'Lj','letters':'\u01C8'},
{'base':'M', 'letters':'\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C'},
{'base':'N', 'letters':'\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4'},
{'base':'NJ','letters':'\u01CA'},
{'base':'Nj','letters':'\u01CB'},
{'base':'O', 'letters':'\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C'},
{'base':'OI','letters':'\u01A2'},
{'base':'OO','letters':'\uA74E'},
{'base':'OU','letters':'\u0222'},
{'base':'OE','letters':'\u008C\u0152'},
{'base':'oe','letters':'\u009C\u0153'},
{'base':'P', 'letters':'\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754'},
{'base':'Q', 'letters':'\u0051\u24C6\uFF31\uA756\uA758\u024A'},
{'base':'R', 'letters':'\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782'},
{'base':'S', 'letters':'\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784'},
{'base':'T', 'letters':'\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786'},
{'base':'TZ','letters':'\uA728'},
{'base':'U', 'letters':'\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244'},
{'base':'V', 'letters':'\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245'},
{'base':'VY','letters':'\uA760'},
{'base':'W', 'letters':'\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72'},
{'base':'X', 'letters':'\u0058\u24CD\uFF38\u1E8A\u1E8C'},
{'base':'Y', 'letters':'\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE'},
{'base':'Z', 'letters':'\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762'},
{'base':'a', 'letters':'\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250'},
{'base':'aa','letters':'\uA733'},
{'base':'ae','letters':'\u00E6\u01FD\u01E3'},
{'base':'ao','letters':'\uA735'},
{'base':'au','letters':'\uA737'},
{'base':'av','letters':'\uA739\uA73B'},
{'base':'ay','letters':'\uA73D'},
{'base':'b', 'letters':'\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253'},
{'base':'c', 'letters':'\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184'},
{'base':'d', 'letters':'\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A'},
{'base':'dz','letters':'\u01F3\u01C6'},
{'base':'e', 'letters':'\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD'},
{'base':'f', 'letters':'\u0066\u24D5\uFF46\u1E1F\u0192\uA77C'},
{'base':'g', 'letters':'\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F'},
{'base':'h', 'letters':'\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265'},
{'base':'hv','letters':'\u0195'},
{'base':'i', 'letters':'\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131'},
{'base':'j', 'letters':'\u006A\u24D9\uFF4A\u0135\u01F0\u0249'},
{'base':'k', 'letters':'\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3'},
{'base':'l', 'letters':'\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747'},
{'base':'lj','letters':'\u01C9'},
{'base':'m', 'letters':'\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F'},
{'base':'n', 'letters':'\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5'},
{'base':'nj','letters':'\u01CC'},
{'base':'o', 'letters':'\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275'},
{'base':'oi','letters':'\u01A3'},
{'base':'ou','letters':'\u0223'},
{'base':'oo','letters':'\uA74F'},
{'base':'p','letters':'\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755'},
{'base':'q','letters':'\u0071\u24E0\uFF51\u024B\uA757\uA759'},
{'base':'r','letters':'\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783'},
{'base':'s','letters':'\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B'},
{'base':'t','letters':'\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787'},
{'base':'tz','letters':'\uA729'},
{'base':'u','letters': '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289'},
{'base':'v','letters':'\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C'},
{'base':'vy','letters':'\uA761'},
{'base':'w','letters':'\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73'},
{'base':'x','letters':'\u0078\u24E7\uFF58\u1E8B\u1E8D'},
{'base':'y','letters':'\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF'},
{'base':'z','letters':'\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763'}
\s+{'base':'([^']+)',\s*'letters':\s*'([^']+?)'}.*
c("$1", "$2") + \
c("A", "\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F") + \
c("AA", "\uA732") + \
c("AE", "\u00C6\u01FC\u01E2") + \
c("AO", "\uA734") + \
c("AU", "\uA736") + \
c("AV", "\uA738\uA73A") + \
c("AY", "\uA73C") + \
c("B", "\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181") + \
c("C", "\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E") + \
c("D", "\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0") + \
c("DZ", "\u01F1\u01C4") + \
c("Dz", "\u01F2\u01C5") + \
c("E", "\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E") + \
c("F", "\u0046\u24BB\uFF26\u1E1E\u0191\uA77B") + \
c("G", "\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E") + \
c("H", "\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D") + \
c("I", "\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197") + \
c("J", "\u004A\u24BF\uFF2A\u0134\u0248") + \
c("K", "\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2") + \
c("L", "\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780") + \
c("LJ", "\u01C7") + \
c("Lj", "\u01C8") + \
c("M", "\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C") + \
c("N", "\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4") + \
c("NJ", "\u01CA") + \
c("Nj", "\u01CB") + \
c("O", "\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C") + \
c("OI", "\u01A2") + \
c("OO", "\uA74E") + \
c("OU", "\u0222") + \
c("OE", "\u008C\u0152") + \
c("oe", "\u009C\u0153") + \
c("P", "\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754") + \
c("Q", "\u0051\u24C6\uFF31\uA756\uA758\u024A") + \
c("R", "\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782") + \
c("S", "\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784") + \
c("T", "\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786") + \
c("TZ", "\uA728") + \
c("U", "\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244") + \
c("V", "\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245") + \
c("VY", "\uA760") + \
c("W", "\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72") + \
c("X", "\u0058\u24CD\uFF38\u1E8A\u1E8C") + \
c("Y", "\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE") + \
c("Z", "\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762") + \
c("a", "\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250") + \
c("aa", "\uA733") + \
c("ae", "\u00E6\u01FD\u01E3") + \
c("ao", "\uA735") + \
c("au", "\uA737") + \
c("av", "\uA739\uA73B") + \
c("ay", "\uA73D") + \
c("b", "\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253") + \
c("c", "\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184") + \
c("d", "\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A") + \
c("dz", "\u01F3\u01C6") + \
c("e", "\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD") + \
c("f", "\u0066\u24D5\uFF46\u1E1F\u0192\uA77C") + \
c("g", "\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F") + \
c("h", "\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265") + \
c("hv", "\u0195") + \
c("i", "\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131") + \
c("j", "\u006A\u24D9\uFF4A\u0135\u01F0\u0249") + \
c("k", "\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3") + \
c("l", "\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747") + \
c("lj", "\u01C9") + \
c("m", "\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F") + \
c("n", "\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5") + \
c("nj", "\u01CC") + \
c("o", "\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275") + \
c("oi", "\u01A3") + \
c("ou", "\u0223") + \
c("oo", "\uA74F") + \
c("p", "\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755") + \
c("q", "\u0071\u24E0\uFF51\u024B\uA757\uA759") + \
c("r", "\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783") + \
c("s", "\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B") + \
c("t", "\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787") + \
c("tz", "\uA729") + \
c("u", "\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289") + \
c("v", "\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C") + \
c("vy", "\uA761") + \
c("w", "\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73") + \
c("x", "\u0078\u24E7\uFF58\u1E8B\u1E8D") + \
c("y", "\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF") + \
c("z", "\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763") + \
csvq -o SRD.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '==connected==') AS agg_c3 FROM (SELECT LOWER(c2) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.dump.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
csvq -o SRD.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '==connected==') AS agg_c3 FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.dump.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
動かない…
csvq -o SRD.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '==connected==') WITHIN GROUP (ORDER BY LEN(c3) ASC) FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.dump.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
ダメ…
1 h1 d1
2 h1 d2
3 h2 d3
4 h3 d3
csvq -no-header "SELECT c2, LISTAGG(c3, '-') FROM test.tsv
GROUP BY c2"
OK
csvq -no-header "SELECT lower_c2, LISTAGG(c3, '-') FROM (SELECT LOWER(c2) AS lower_c2, c3 FROM test.tsv
) GROUP BY lower_c2"
OK
csvq -no-header "SELECT lower_c2, LISTAGG(c3, '-') FROM (SELECT LOWER(c2) AS lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM test.tsv
)) GROUP BY lower_c2"
OK
あれ…?
grep -E "\tA" SRD.dump.tsv > SRD.A.tsv
csvq -o SRD.A.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '==connected==') WITHIN GROUP (ORDER BY LEN(c3) ASC) FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.A.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
再現した
rm SRD.A.distinct.sort.agg.tsv; csvq -o SRD.A.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '==connected==') WITHIN GROUP (ORDER BY LEN(c3) ASC) FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.A.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
rm SRD.A.distinct.sort.agg.tsv; csvq -o SRD.A.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '-') FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.A.tsv
)) GROUP BY lower_c2, c3 ORDER BY lower_c2 ASC"
rm SRD.A.distinct.sort.agg.tsv; csvq -o SRD.A.distinct.sort.agg.tsv -f TSV -without-header -no-header "SELECT lower_c2 , LISTAGG(c3, '-') FROM (SELECT LOWER(TRIM(c2)) as lower_c2, c3 FROM (SELECT DISTINCT c2, c3 FROM SRD.A.tsv
)) GROUP BY lower_c2 ORDER BY lower_c2 ASC"
OK!!!!
GROUP BYの誤用だった…
dump dump1 get get1 get2
のような、末尾に数字がついてる系の見出しをうまく拾えない
1 h1 d1
2 h1 d2
3 h2 d3
4 h3 d3
csvq -no-header "
SELECT
c2,
SUBSTR(c2, 0,
CASE SUBSTR(c2, LEN(c2) - 1, 1)
WHEN '1' THEN LEN(c2) - 1
WHEN '2' THEN LEN(c2) - 1
WHEN '3' THEN LEN(c2) - 1
WHEN '4' THEN LEN(c2) - 1
WHEN '5' THEN LEN(c2) - 1
WHEN '6' THEN LEN(c2) - 1
WHEN '7' THEN LEN(c2) - 1
WHEN '8' THEN LEN(c2) - 1
WHEN '9' THEN LEN(c2) - 1
ELSE LEN(c2)
END) AS tail_trimmed
FROM test.tsv
GROUP BY c2"
+----+--------------+
| c2 | tail_trimmed |
+----+--------------+
| h1 | h |
| h2 | h |
| h3 | h |
+----+--------------+
csvq のSUBSTRは0オリジン…?
1 ha1 d1
2 haa1 d2
3 haaa2 d3
4 haaaa3 d3
csvq -no-header "
SELECT
c2,
SUBSTR(c2, 0,
CASE SUBSTR(c2, LEN(c2) - 1, 1)
WHEN '1' THEN LEN(c2) - 1
WHEN '2' THEN LEN(c2) - 1
WHEN '3' THEN LEN(c2) - 1
WHEN '4' THEN LEN(c2) - 1
WHEN '5' THEN LEN(c2) - 1
WHEN '6' THEN LEN(c2) - 1
WHEN '7' THEN LEN(c2) - 1
WHEN '8' THEN LEN(c2) - 1
WHEN '9' THEN LEN(c2) - 1
ELSE LEN(c2)
END) AS tail_trimmed
FROM test.tsv
GROUP BY c2"
+--------+--------------+
| c2 | tail_trimmed |
+--------+--------------+
| ha1 | ha |
| haa1 | haa |
| haaa2 | haaa |
| haaaa3 | haaaa |
+--------+--------------+
csvq "SELECT INSTR('foo@example.com', '@');"
+-------------------------------+
| INSTR('foo@example.com', '@') |
+-------------------------------+
| 3 |
+-------------------------------+
BQでやる
Google Driveにgzを置いて、外部テーブルとしてBQで設定
INSERT INTO `test.srd`
SELECT * FROM `test.test`
インデックスが0x00で開始されてるものだけフィルタすると、0xFFFDが含まれてるものをフィルタしたのより増加した。これは想定に反してる
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(target_column, '·', ''), '-', ''), 'ꝣ', 'z'), 'ⱬ', 'z'), 'ɀ', 'z'), 'ȥ', 'z'), 'ƶ', 'z'), 'ẕ', 'z'), 'ẓ', 'z'), 'ž', 'z'), 'ż', 'z'), 'ẑ', 'z'), 'ź', 'z'), 'z', 'z'), 'ⓩ', 'z'), 'z', 'z'), 'ỿ', 'y'), 'ɏ', 'y'), 'ƴ', 'y'), 'ỵ', 'y'), 'ẙ', 'y'), 'ỷ', 'y'), 'ÿ', 'y'), 'ẏ', 'y'), 'ȳ', 'y'), 'ỹ', 'y'), 'ŷ', 'y'), 'ý', 'y'), 'ỳ', 'y'), 'y', 'y'), 'ⓨ', 'y'), 'y', 'y'), 'ẍ', 'x'), 'ẋ', 'x'), 'x', 'x'), 'ⓧ', 'x'), 'x', 'x'), 'ⱳ', 'w'), 'ẉ', 'w'), 'ẘ', 'w'), 'ẅ', 'w'), 'ẇ', 'w'), 'ŵ', 'w'), 'ẃ', 'w'), 'ẁ', 'w'), 'w', 'w'), 'ⓦ', 'w'), 'w', 'w'), 'ʌ', 'v'), 'ꝟ', 'v'), 'ʋ', 'v'), 'ṿ', 'v'), 'ṽ', 'v'), 'v', 'v'), 'ⓥ', 'v'), 'v', 'v'), 'ʉ', 'u'), 'ṵ', 'u'), 'ṷ', 'u'), 'ų', 'u'), 'ṳ', 'u'), 'ụ', 'u'), 'ự', 'u'), 'ử', 'u'), 'ữ', 'u'), 'ứ', 'u'), 'ừ', 'u'), 'ư', 'u'), 'ȗ', 'u'), 'ȕ', 'u'), 'ǔ', 'u'), 'ű', 'u'), 'ů', 'u'), 'ủ', 'u'), 'ǚ', 'u'), 'ǖ', 'u'), 'ǘ', 'u'), 'ǜ', 'u'), 'ü', 'u'), 'ŭ', 'u'), 'ṻ', 'u'), 'ū', 'u'), 'ṹ', 'u'), 'ũ', 'u'), 'û', 'u'), 'ú', 'u'), 'ù', 'u'), 'u', 'u'), 'ⓤ', 'u'), 'u', 'u'), 'ꞇ', 't'), 'ⱦ', 't'), 'ʈ', 't'), 'ƭ', 't'), 'ŧ', 't'), 'ṯ', 't'), 'ṱ', 't'), 'ţ', 't'), 'ț', 't'), 'ṭ', 't'), 'ť', 't'), 'ẗ', 't'), 'ṫ', 't'), 't', 't'), 'ⓣ', 't'), 't', 't'), 'ẛ', 's'), 'ꞅ', 's'), 'ꞩ', 's'), 'ȿ', 's'), 'ş', 's'), 'ș', 's'), 'ṩ', 's'), 'ṣ', 's'), 'ṧ', 's'), 'š', 's'), 'ṡ', 's'), 'ŝ', 's'), 'ṥ', 's'), 'ś', 's'), 'ß', 's'), 's', 's'), 'ⓢ', 's'), 's', 's'), 'ꞃ', 'r'), 'ꞧ', 'r'), 'ꝛ', 'r'), 'ɽ', 'r'), 'ɍ', 'r'), 'ṟ', 'r'), 'ŗ', 'r'), 'ṝ', 'r'), 'ṛ', 'r'), 'ȓ', 'r'), 'ȑ', 'r'), 'ř', 'r'), 'ṙ', 'r'), 'ŕ', 'r'), 'r', 'r'), 'ⓡ', 'r'), 'r', 'r'), 'ꝙ', 'q'), 'ꝗ', 'q'), 'ɋ', 'q'), 'q', 'q'), 'ⓠ', 'q'), 'q', 'q'), 'ꝕ', 'p'), 'ꝓ', 'p'), 'ꝑ', 'p'), 'ᵽ', 'p'), 'ƥ', 'p'), 'ṗ', 'p'), 'ṕ', 'p'), 'p', 'p'), 'ⓟ', 'p'), 'p', 'p'), 'ɵ', 'o'), 'ꝍ', 'o'), 'ꝋ', 'o'), 'ɔ', 'o'), 'ǿ', 'o'), 'ø', 'o'), 'ǭ', 'o'), 'ǫ', 'o'), 'ộ', 'o'), 'ọ', 'o'), 'ợ', 'o'), 'ở', 'o'), 'ỡ', 'o'), 'ớ', 'o'), 'ờ', 'o'), 'ơ', 'o'), 'ȏ', 'o'), 'ȍ', 'o'), 'ǒ', 'o'), 'ő', 'o'), 'ỏ', 'o'), 'ȫ', 'o'), 'ö', 'o'), 'ȱ', 'o'), 'ȯ', 'o'), 'ŏ', 'o'), 'ṓ', 'o'), 'ṑ', 'o'), 'ō', 'o'), 'ṏ', 'o'), 'ȭ', 'o'), 'ṍ', 'o'), 'õ', 'o'), 'ổ', 'o'), 'ỗ', 'o'), 'ố', 'o'), 'ồ', 'o'), 'ô', 'o'), 'ó', 'o'), 'ò', 'o'), 'o', 'o'), 'ⓞ', 'o'), 'o', 'o'), 'ꞥ', 'n'), 'ꞑ', 'n'), 'ʼn', 'n'), 'ɲ', 'n'), 'ƞ', 'n'), 'ṉ', 'n'), 'ṋ', 'n'), 'ņ', 'n'), 'ṇ', 'n'), 'ň', 'n'), 'ṅ', 'n'), 'ñ', 'n'), 'ń', 'n'), 'ǹ', 'n'), 'n', 'n'), 'ⓝ', 'n'), 'n', 'n'), 'ɯ', 'm'), 'ɱ', 'm'), 'ṃ', 'm'), 'ṁ', 'm'), 'ḿ', 'm'), 'm', 'm'), 'ⓜ', 'm'), 'm', 'm'), 'ꝇ', 'l'), 'ꞁ', 'l'), 'ꝉ', 'l'), 'ⱡ', 'l'), 'ɫ', 'l'), 'ƚ', 'l'), 'ł', 'l'), 'ſ', 'l'), 'ḻ', 'l'), 'ḽ', 'l'), 'ļ', 'l'), 'ḹ', 'l'), 'ḷ', 'l'), 'ľ', 'l'), 'ĺ', 'l'), 'ŀ', 'l'), 'l', 'l'), 'ⓛ', 'l'), 'l', 'l'), 'ꞣ', 'k'), 'ꝅ', 'k'), 'ꝃ', 'k'), 'ꝁ', 'k'), 'ⱪ', 'k'), 'ƙ', 'k'), 'ḵ', 'k'), 'ķ', 'k'), 'ḳ', 'k'), 'ǩ', 'k'), 'ḱ', 'k'), 'k', 'k'), 'ⓚ', 'k'), 'k', 'k'), 'ɉ', 'j'), 'ǰ', 'j'), 'ĵ', 'j'), 'j', 'j'), 'ⓙ', 'j'), 'j', 'j'), 'ı', 'i'), 'ɨ', 'i'), 'ḭ', 'i'), 'į', 'i'), 'ị', 'i'), 'ȋ', 'i'), 'ȉ', 'i'), 'ǐ', 'i'), 'ỉ', 'i'), 'ḯ', 'i'), 'ï', 'i'), 'ĭ', 'i'), 'ī', 'i'), 'ĩ', 'i'), 'î', 'i'), 'í', 'i'), 'ì', 'i'), 'i', 'i'), 'ⓘ', 'i'), 'i', 'i'), 'ɥ', 'h'), 'ⱶ', 'h'), 'ⱨ', 'h'), 'ħ', 'h'), 'ẖ', 'h'), 'ḫ', 'h'), 'ḩ', 'h'), 'ḥ', 'h'), 'ȟ', 'h'), 'ḧ', 'h'), 'ḣ', 'h'), 'ĥ', 'h'), 'h', 'h'), 'ⓗ', 'h'), 'h', 'h'), 'ꝿ', 'g'), 'ᵹ', 'g'), 'ꞡ', 'g'), 'ɠ', 'g'), 'ǥ', 'g'), 'ģ', 'g'), 'ǧ', 'g'), 'ġ', 'g'), 'ğ', 'g'), 'ḡ', 'g'), 'ĝ', 'g'), 'ǵ', 'g'), 'g', 'g'), 'ⓖ', 'g'), 'g', 'g'), 'ꝼ', 'f'), 'ƒ', 'f'), 'ḟ', 'f'), 'f', 'f'), 'ⓕ', 'f'), 'f', 'f'), 'ǝ', 'e'), 'ɛ', 'e'), 'ɇ', 'e'), 'ḛ', 'e'), 'ḙ', 'e'), 'ę', 'e'), 'ḝ', 'e'), 'ȩ', 'e'), 'ệ', 'e'), 'ẹ', 'e'), 'ȇ', 'e'), 'ȅ', 'e'), 'ě', 'e'), 'ẻ', 'e'), 'ë', 'e'), 'ė', 'e'), 'ĕ', 'e'), 'ḗ', 'e'), 'ḕ', 'e'), 'ē', 'e'), 'ẽ', 'e'), 'ể', 'e'), 'ễ', 'e'), 'ế', 'e'), 'ề', 'e'), 'ê', 'e'), 'é', 'e'), 'è', 'e'), 'e', 'e'), 'ⓔ', 'e'), 'e', 'e'), 'ꝺ', 'd'), 'ɗ', 'd'), 'ɖ', 'd'), 'ƌ', 'd'), 'đ', 'd'), 'ḏ', 'd'), 'ḓ', 'd'), 'ḑ', 'd'), 'ḍ', 'd'), 'ď', 'd'), 'ḋ', 'd'), 'd', 'd'), 'ⓓ', 'd'), 'd', 'd'), 'ↄ', 'c'), 'ꜿ', 'c'), 'ȼ', 'c'), 'ƈ', 'c'), 'ḉ', 'c'), 'ç', 'c'), 'č', 'c'), 'ċ', 'c'), 'ĉ', 'c'), 'ć', 'c'), 'c', 'c'), 'ⓒ', 'c'), 'c', 'c'), 'ɓ', 'b'), 'ƃ', 'b'), 'ƀ', 'b'), 'ḇ', 'b'), 'ḅ', 'b'), 'ḃ', 'b'), 'b', 'b'), 'ⓑ', 'b'), 'b', 'b'), 'ɐ', 'a'), 'ⱥ', 'a'), 'ą', 'a'), 'ḁ', 'a'), 'ặ', 'a'), 'ậ', 'a'), 'ạ', 'a'), 'ȃ', 'a'), 'ȁ', 'a'), 'ǎ', 'a'), 'ǻ', 'a'), 'å', 'a'), 'ả', 'a'), 'ǟ', 'a'), 'ä', 'a'), 'ǡ', 'a'), 'ȧ', 'a'), 'ẳ', 'a'), 'ẵ', 'a'), 'ắ', 'a'), 'ằ', 'a'), 'ă', 'a'), 'ā', 'a'), 'ã', 'a'), 'ẩ', 'a'), 'ẫ', 'a'), 'ấ', 'a'), 'ầ', 'a'), 'â', 'a'), 'á', 'a'), 'à', 'a'), 'ẚ', 'a'), 'a', 'a'), 'ⓐ', 'a'), 'a', 'a'), 'Ꝣ', 'Z'), 'Ⱬ', 'Z'), 'Ɀ', 'Z'), 'Ȥ', 'Z'), 'Ƶ', 'Z'), 'Ẕ', 'Z'), 'Ẓ', 'Z'), 'Ž', 'Z'), 'Ż', 'Z'), 'Ẑ', 'Z'), 'Ź', 'Z'), 'Z', 'Z'), 'Ⓩ', 'Z'), 'Z', 'Z'), 'Ỿ', 'Y'), 'Ɏ', 'Y'), 'Ƴ', 'Y'), 'Ỵ', 'Y'), 'Ỷ', 'Y'), 'Ÿ', 'Y'), 'Ẏ', 'Y'), 'Ȳ', 'Y'), 'Ỹ', 'Y'), 'Ŷ', 'Y'), 'Ý', 'Y'), 'Ỳ', 'Y'), 'Y', 'Y'), 'Ⓨ', 'Y'), 'Y', 'Y'), 'Ẍ', 'X'), 'Ẋ', 'X'), 'X', 'X'), 'Ⓧ', 'X'), 'X', 'X'), 'Ⱳ', 'W'), 'Ẉ', 'W'), 'Ẅ', 'W'), 'Ẇ', 'W'), 'Ŵ', 'W'), 'Ẃ', 'W'), 'Ẁ', 'W'), 'W', 'W'), 'Ⓦ', 'W'), 'W', 'W'), 'Ʌ', 'V'), 'Ꝟ', 'V'), 'Ʋ', 'V'), 'Ṿ', 'V'), 'Ṽ', 'V'), 'V', 'V'), 'Ⓥ', 'V'), 'V', 'V'), 'Ʉ', 'U'), 'Ṵ', 'U'), 'Ṷ', 'U'), 'Ų', 'U'), 'Ṳ', 'U'), 'Ụ', 'U'), 'Ự', 'U'), 'Ử', 'U'), 'Ữ', 'U'), 'Ứ', 'U'), 'Ừ', 'U'), 'Ư', 'U'), 'Ȗ', 'U'), 'Ȕ', 'U'), 'Ǔ', 'U'), 'Ű', 'U'), 'Ů', 'U'), 'Ủ', 'U'), 'Ǚ', 'U'), 'Ǖ', 'U'), 'Ǘ', 'U'), 'Ǜ', 'U'), 'Ü', 'U'), 'Ŭ', 'U'), 'Ṻ', 'U'), 'Ū', 'U'), 'Ṹ', 'U'), 'Ũ', 'U'), 'Û', 'U'), 'Ú', 'U'), 'Ù', 'U'), 'U', 'U'), 'Ⓤ', 'U'), 'U', 'U'), 'Ꞇ', 'T'), 'Ⱦ', 'T'), 'Ʈ', 'T'), 'Ƭ', 'T'), 'Ŧ', 'T'), 'Ṯ', 'T'), 'Ṱ', 'T'), 'Ţ', 'T'), 'Ț', 'T'), 'Ṭ', 'T'), 'Ť', 'T'), 'Ṫ', 'T'), 'T', 'T'), 'Ⓣ', 'T'), 'T', 'T'), 'Ꞅ', 'S'), 'Ꞩ', 'S'), 'Ȿ', 'S'), 'Ş', 'S'), 'Ș', 'S'), 'Ṩ', 'S'), 'Ṣ', 'S'), 'Ṧ', 'S'), 'Š', 'S'), 'Ṡ', 'S'), 'Ŝ', 'S'), 'Ṥ', 'S'), 'Ś', 'S'), 'ẞ', 'S'), 'S', 'S'), 'Ⓢ', 'S'), 'S', 'S'), 'Ꞃ', 'R'), 'Ꞧ', 'R'), 'Ꝛ', 'R'), 'Ɽ', 'R'), 'Ɍ', 'R'), 'Ṟ', 'R'), 'Ŗ', 'R'), 'Ṝ', 'R'), 'Ṛ', 'R'), 'Ȓ', 'R'), 'Ȑ', 'R'), 'Ř', 'R'), 'Ṙ', 'R'), 'Ŕ', 'R'), 'R', 'R'), 'Ⓡ', 'R'), 'R', 'R'), 'Ɋ', 'Q'), 'Ꝙ', 'Q'), 'Ꝗ', 'Q'), 'Q', 'Q'), 'Ⓠ', 'Q'), 'Q', 'Q'), 'Ꝕ', 'P'), 'Ꝓ', 'P'), 'Ꝑ', 'P'), 'Ᵽ', 'P'), 'Ƥ', 'P'), 'Ṗ', 'P'), 'Ṕ', 'P'), 'P', 'P'), 'Ⓟ', 'P'), 'P', 'P'), 'Ꝍ', 'O'), 'Ꝋ', 'O'), 'Ɵ', 'O'), 'Ɔ', 'O'), 'Ǿ', 'O'), 'Ø', 'O'), 'Ǭ', 'O'), 'Ǫ', 'O'), 'Ộ', 'O'), 'Ọ', 'O'), 'Ợ', 'O'), 'Ở', 'O'), 'Ỡ', 'O'), 'Ớ', 'O'), 'Ờ', 'O'), 'Ơ', 'O'), 'Ȏ', 'O'), 'Ȍ', 'O'), 'Ǒ', 'O'), 'Ő', 'O'), 'Ỏ', 'O'), 'Ȫ', 'O'), 'Ö', 'O'), 'Ȱ', 'O'), 'Ȯ', 'O'), 'Ŏ', 'O'), 'Ṓ', 'O'), 'Ṑ', 'O'), 'Ō', 'O'), 'Ṏ', 'O'), 'Ȭ', 'O'), 'Ṍ', 'O'), 'Õ', 'O'), 'Ổ', 'O'), 'Ỗ', 'O'), 'Ố', 'O'), 'Ồ', 'O'), 'Ô', 'O'), 'Ó', 'O'), 'Ò', 'O'), 'O', 'O'), 'Ⓞ', 'O'), 'O', 'O'), 'Ꞥ', 'N'), 'Ꞑ', 'N'), 'Ɲ', 'N'), 'Ƞ', 'N'), 'Ṉ', 'N'), 'Ṋ', 'N'), 'Ņ', 'N'), 'Ṇ', 'N'), 'Ň', 'N'), 'Ṅ', 'N'), 'Ñ', 'N'), 'Ń', 'N'), 'Ǹ', 'N'), 'N', 'N'), 'Ⓝ', 'N'), 'N', 'N'), 'Ɯ', 'M'), 'Ɱ', 'M'), 'Ṃ', 'M'), 'Ṁ', 'M'), 'Ḿ', 'M'), 'M', 'M'), 'Ⓜ', 'M'), 'M', 'M'), 'Ꞁ', 'L'), 'Ꝇ', 'L'), 'Ꝉ', 'L'), 'Ⱡ', 'L'), 'Ɫ', 'L'), 'Ƚ', 'L'), 'Ł', 'L'), 'Ḻ', 'L'), 'Ḽ', 'L'), 'Ļ', 'L'), 'Ḹ', 'L'), 'Ḷ', 'L'), 'Ľ', 'L'), 'Ĺ', 'L'), 'Ŀ', 'L'), 'L', 'L'), 'Ⓛ', 'L'), 'L', 'L'), 'Ꞣ', 'K'), 'Ꝅ', 'K'), 'Ꝃ', 'K'), 'Ꝁ', 'K'), 'Ⱪ', 'K'), 'Ƙ', 'K'), 'Ḵ', 'K'), 'Ķ', 'K'), 'Ḳ', 'K'), 'Ǩ', 'K'), 'Ḱ', 'K'), 'K', 'K'), 'Ⓚ', 'K'), 'K', 'K'), 'Ɉ', 'J'), 'Ĵ', 'J'), 'J', 'J'), 'Ⓙ', 'J'), 'J', 'J'), 'Ɨ', 'I'), 'Ḭ', 'I'), 'Į', 'I'), 'Ị', 'I'), 'Ȋ', 'I'), 'Ȉ', 'I'), 'Ǐ', 'I'), 'Ỉ', 'I'), 'Ḯ', 'I'), 'Ï', 'I'), 'İ', 'I'), 'Ĭ', 'I'), 'Ī', 'I'), 'Ĩ', 'I'), 'Î', 'I'), 'Í', 'I'), 'Ì', 'I'), 'I', 'I'), 'Ⓘ', 'I'), 'I', 'I'), 'Ɥ', 'H'), 'Ⱶ', 'H'), 'Ⱨ', 'H'), 'Ħ', 'H'), 'Ḫ', 'H'), 'Ḩ', 'H'), 'Ḥ', 'H'), 'Ȟ', 'H'), 'Ḧ', 'H'), 'Ḣ', 'H'), 'Ĥ', 'H'), 'H', 'H'), 'Ⓗ', 'H'), 'H', 'H'), 'Ꝿ', 'G'), 'Ᵹ', 'G'), 'Ꞡ', 'G'), 'Ɠ', 'G'), 'Ǥ', 'G'), 'Ģ', 'G'), 'Ǧ', 'G'), 'Ġ', 'G'), 'Ğ', 'G'), 'Ḡ', 'G'), 'Ĝ', 'G'), 'Ǵ', 'G'), 'G', 'G'), 'Ⓖ', 'G'), 'G', 'G'), 'Ꝼ', 'F'), 'Ƒ', 'F'), 'Ḟ', 'F'), 'F', 'F'), 'Ⓕ', 'F'), 'F', 'F'), 'Ǝ', 'E'), 'Ɛ', 'E'), 'Ḛ', 'E'), 'Ḙ', 'E'), 'Ę', 'E'), 'Ḝ', 'E'), 'Ȩ', 'E'), 'Ệ', 'E'), 'Ẹ', 'E'), 'Ȇ', 'E'), 'Ȅ', 'E'), 'Ě', 'E'), 'Ẻ', 'E'), 'Ë', 'E'), 'Ė', 'E'), 'Ĕ', 'E'), 'Ḗ', 'E'), 'Ḕ', 'E'), 'Ē', 'E'), 'Ẽ', 'E'), 'Ể', 'E'), 'Ễ', 'E'), 'Ế', 'E'), 'Ề', 'E'), 'Ê', 'E'), 'É', 'E'), 'È', 'E'), 'E', 'E'), 'Ⓔ', 'E'), 'E', 'E'), 'Ð', 'D'), 'Ꝺ', 'D'), 'Ɖ', 'D'), 'Ɗ', 'D'), 'Ƌ', 'D'), 'Đ', 'D'), 'Ḏ', 'D'), 'Ḓ', 'D'), 'Ḑ', 'D'), 'Ḍ', 'D'), 'Ď', 'D'), 'Ḋ', 'D'), 'D', 'D'), 'Ⓓ', 'D'), 'D', 'D'), 'Ꜿ', 'C'), 'Ȼ', 'C'), 'Ƈ', 'C'), 'Ḉ', 'C'), 'Ç', 'C'), 'Č', 'C'), 'Ċ', 'C'), 'Ĉ', 'C'), 'Ć', 'C'), 'C', 'C'), 'Ⓒ', 'C'), 'C', 'C'), 'Ɓ', 'B'), 'Ƃ', 'B'), 'Ƀ', 'B'), 'Ḇ', 'B'), 'Ḅ', 'B'), 'Ḃ', 'B'), 'B', 'B'), 'Ⓑ', 'B'), 'B', 'B'), 'Ɐ', 'A'), 'Ⱥ', 'A'), 'Ą', 'A'), 'Ḁ', 'A'), 'Ặ', 'A'), 'Ậ', 'A'), 'Ạ', 'A'), 'Ȃ', 'A'), 'Ȁ', 'A'), 'Ǎ', 'A'), 'Ǻ', 'A'), 'Å', 'A'), 'Ả', 'A'), 'Ǟ', 'A'), 'Ä', 'A'), 'Ǡ', 'A'), 'Ȧ', 'A'), 'Ẳ', 'A'), 'Ẵ', 'A'), 'Ắ', 'A'), 'Ằ', 'A'), 'Ă', 'A'), 'Ā', 'A'), 'Ã', 'A'), 'Ẩ', 'A'), 'Ẫ', 'A'), 'Ấ', 'A'), 'Ầ', 'A'), 'Â', 'A'), 'Á', 'A'), 'À', 'A'), 'A', 'A'), 'Ⓐ', 'A'), 'A', 'A')