Skip to content

Instantly share code, notes, and snippets.

@MahmoudMabrok
Created August 13, 2020 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MahmoudMabrok/81686affbeabaf21588ce7ec2dc14808 to your computer and use it in GitHub Desktop.
Save MahmoudMabrok/81686affbeabaf21588ce7ec2dc14808 to your computer and use it in GitHub Desktop.
prepare data to train OCR
for i in `seq 1001`;
do
text2image --text $i.gt.txt --outputbase /home/owner/Downloads/output/im$i --xsize 680 --ysize 200 --fonts_dir /usr/share/fonts/truetype/Amiri/ --font 'Amiri' --margin 0 --ptsize 30 --degrade_image --char_spacing 1;
done
@MahmoudMabrok
Copy link
Author

fun main(){

    val rand = Random(1200)
    for (i in 0..1000){
        val file = File("${i}.gt.txt")
        val num = String.format(Locale.forLanguageTag("ar") , "%d",rand.nextInt(98785)).apply {
            padStart(5 , '0')
        }

        print(convertToArabic(num))
        file.writeText(convertToArabic(num))
    }

}


fun convertToArabic(arabicStr: String): String {
    return (arabicStr + "")
            .replace("1".toRegex(), "١").replace("2".toRegex(), "٢")
            .replace("3".toRegex(), "٣").replace("4".toRegex(), "٤")
            .replace("5".toRegex(), "٥").replace("6".toRegex(), "٦")
            .replace("7".toRegex(), "٧").replace("8".toRegex(), "٨")
            .replace("9".toRegex(), "٩").replace("0".toRegex(), "٠")
}

@MahmoudMabrok
Copy link
Author

for i in `seq 100`;
 do
tesseract /home/owner/Downloads/output/$i.tif  /home/owner/Downloads/data/output$i  -l ara_number
 done

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment