安装
brew install tesseract
源代码安装
git clone https://github.com/tesseract-ocr/tesseract.git
./autogen.sh ./configure make make install sudo ldconfig
安装语言包
brew install tesseract --all-languages
使用
tesseract article.png text -l chi_sim
- article.png 提取文字的图片
- text 提取后 存放的文件 后缀名 .txt;
- -l chi_sim 语言 中文
语言
- afr (Afrikaans)
- amh (Amharic)
- ara (Arabic)
- asm (Assamese)
- aze (Azerbaijani)
- aze_cyrl (Azerbaijani - Cyrilic)
- bel (Belarusian)
- ben (Bengali)
- bod (Tibetan)
- bos (Bosnian)
- bul (Bulgarian)
- cat (Catalan; Valencian)
- ceb (Cebuano)
- ces (Czech)
- chi_sim (Chinese - Simplified)
- chi_tra (Chinese - Traditional)
- chr (Cherokee)
- cym (Welsh)
- dan (Danish)
- dan_frak (Danish - Fraktur)
- deu (German)
- deu_frak (German - Fraktur)
- dzo (Dzongkha)
- ell (Greek, Modern (1453-))
- eng (English)
- enm (English, Middle (1100-1500))
- epo (Esperanto)
- equ (Math / equation detection module)
- est (Estonian)
- eus (Basque)
- fas (Persian)
- fin (Finnish)
- fra (French)
- frk (Frankish)
- frm (French, Middle (ca.1400-1600))
- gle (Irish)
- glg (Galician)
- grc (Greek, Ancient (to 1453))
- guj (Gujarati)
- hat (Haitian; Haitian Creole)
- heb (Hebrew)
- hin (Hindi)
- hrv (Croatian)
- hun (Hungarian)
- iku (Inuktitut)
- ind (Indonesian)
- isl (Icelandic)
- ita (Italian)
- ita_old (Italian - Old)
- jav (Javanese)
- jpn (Japanese)
- kan (Kannada)
- kat (Georgian)
- kat_old (Georgian - Old)
- kaz (Kazakh)
- khm (Central Khmer)
- kir (Kirghiz; Kyrgyz)
- kor (Korean)
- kur (Kurdish)
- lao (Lao)
- lat (Latin)
- lav (Latvian)
- lit (Lithuanian)
- mal (Malayalam)
- mar (Marathi)
- mkd (Macedonian)
- mlt (Maltese)
- msa (Malay)
- mya (Burmese)
- nep (Nepali)
- nld (Dutch; Flemish)
- nor (Norwegian)
- ori (Oriya)
- osd (Orientation and script detection module)
- pan (Panjabi; Punjabi)
- pol (Polish)
- por (Portuguese)
- pus (Pushto; Pashto)
- ron (Romanian; Moldavian; Moldovan)
- rus (Russian)
- san (Sanskrit)
- sin (Sinhala; Sinhalese)
- slk (Slovak)
- slk_frak (Slovak - Fraktur)
- slv (Slovenian)
- spa (Spanish; Castilian)
- spa_old (Spanish; Castilian - Old)
- sqi (Albanian)
- srp (Serbian)
- srp_latn (Serbian - Latin)
- swa (Swahili)
- swe (Swedish)
- syr (Syriac)
- tam (Tamil)
- tel (Telugu)
- tgk (Tajik)
- tgl (Tagalog)
- tha (Thai)
- tir (Tigrinya)
- tur (Turkish)
- uig (Uighur; Uyghur)
- ukr (Ukrainian)
- urd (Urdu)
- uzb (Uzbek)
- uzb_cyrl (Uzbek - Cyrilic)
- vie (Vietnamese)
- yid (Yiddish)