Andrew Ollett aso2101

## d2r.py
import re

f = open('surasundari.xml','r')
w = open('surasundari-lg.xml','w')
num = re.compile(r'"([०१२३४५६७८९]+)"')
d2r_num = {'०':'0', '१':'1', '२':'2', '३':'3', '४':'4', '५':'5', '६':'6', '७':'7','८':'8','९':'9'}
text = f.readlines()
def num_d2r(number):
    rString = ''
    for s in str(number):

## Bha.Vai.148.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aso2101
                / Bha.Vai.148.md
            
            
              Created
              August 30, 2017 01:52
            
              
                Markdown for Annodoc with Universal Dependencies
              
          
layout: entry
title: Vairāgyaśataka 148

bhrāntvā dēśam anēkadurgaviṣamaṁ prāptaṁ na kiṁcit phalaṁ
obj(bhrāntvā, dēśam)
amod(dēśam, anēkadurgaviṣamaṁ)


## in
// --- BEGIN Sanskrit (Ollett) ---
partial alphanumeric_keys
xkb_symbols "san-trans" {
    name[Group1] = "Sanskrit (Transliteration)";
    key.type="FOUR_LEVEL";

    // Roman digits
    key <TLDE>  { [   apostrophe,   asciitilde,   dead_grave,    dead_tilde     ] };
    key <AE01>  { [   1,            exclam,       U0323          ] };
    key <AE02>  { [   2,            at,           U0324          ] };

## verses_notes.org

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aso2101
                / verses_notes.org
            
            
              Last active
              October 30, 2017 21:02
            
              
                A collection of Sanskrit verses for beginning students
              
          
    [X] Ind.Sp. 101 = PaTa.3.96                         :simple:nominal:kr̥tya:

anityāni śarīrāni vibhavo naiva śāśvataḥ |
  nityaṁ saṁnihitō mr̥tyuḥ kartavyō dharmasaṁgrahaḥ ||
[X] Sūktimuktāvalī 4.75                 :singular:dual:plural:satisaptamī:

jāte jagati vālmīkau śabdaḥ kavir iti sthitaḥ |
  vyāse jāte kavī ceti kavayaś ceti daṇḍini ||
  http://prakrit.info/sanskrit/readings/sumu-4-75.html
[ ] Unknown 1


## sgt.json
{
    "data": {
	"General": {
	    "intro": "<p>These are general terms which apply to various aspects of Sanskrit grammar.</p>",
	    "glossary": [
		{
		    "id" : "prakrtih",
		    "skt" : "prakr̥tiḥ",
		    "eng" : "base",
		    "comm" : "That to which an <a href='#pratyayah'>affix</a> is added. A base can be nominal (see <a href='#pratipadikam'>nominal stem</a>) or verbal. This is a synonym of <a href='#angam'>aṅgam</a>."

## verify_gatha.py
# -*- coding: utf-8 -*-

""" Usage: python3 verify_gatha.py FILENAME """
""" Results in FILENAME.err (a list of errors)
            and FILENAME.log (metrical data) """

""" Take a file in UTF-8 encoding, in the ISO-15919 transliteration
    scheme, and try to scan its verses and match them against the
    canonical pattern of the Prakrit gāthā. If there are any errors,
    output them to an error file. """

## versify.py
# -*- coding: utf-8 -*-

""" Usage: python3 versify.py FILENAME """
""" Results in FILENAME.log (errors and statistics)
            and FILENAME.json (a json file of metrically parsed text) """

""" This program expects the text to be in the format
    represented by the GRETIL Kuṟuntokai
      (http://gretil.sub.uni-goettingen.de/gretil/4_drav/tamil/pm/pm110__u.htm)
    namely: the

## kannadaMap.json

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                aso2101
                / kannadaMap.json
            
            
              Created
              February 27, 2020 16:01
            
              
                GEOJson data for the place-names mentioned in the Way of the Poet King (Kavirājamārgaṁ)
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## text_detect.py
"""OCR with PDF/TIFF as source files on GCS"""
# USAGE: python text_detect.py SOURCE_FILE OUTPUT_FILE
#   Note that both SOURCE_FILE and OUTPUT_FILE must be
#   in the Google Cloud bucket. For example:
#
#   python text_detect.py gs://project-name/file.pdf gs://project-name/read
#
# The API will gather the responses for each page into
#   a JSON file on the Google Cloud bucket, e.g.
#   OUTPUT_FILE-output-1-to-1.json.

## OSD.py
# Uses Tesseract (tesserocr) to recognize files
#   in a directory. I use this as follows:
#
#   1. split the PDF into JPG images in a directory
#        called "IMAGES" (e.g., pdftoppm -jpeg input.pdf IMAGES/output)
#   2. run this script (python OSD.py), which will
#        produce a text file for each image in "IMAGES".
#   3. concatenate the text files with tail
#   4. if desired,
	import re

	f = open('surasundari.xml','r')
	w = open('surasundari-lg.xml','w')
	num = re.compile(r'"([०१२३४५६७८९]+)"')
	d2r_num = {'०':'0', '१':'1', '२':'2', '३':'3', '४':'4', '५':'5', '६':'6', '७':'7','८':'8','९':'9'}
	text = f.readlines()
	def num_d2r(number):
	rString = ''
	for s in str(number):
	// --- BEGIN Sanskrit (Ollett) ---
	partial alphanumeric_keys
	xkb_symbols "san-trans" {
	name[Group1] = "Sanskrit (Transliteration)";
	key.type="FOUR_LEVEL";

	// Roman digits
	key <TLDE> { [ apostrophe, asciitilde, dead_grave, dead_tilde ] };
	key <AE01> { [ 1, exclam, U0323 ] };
	key <AE02> { [ 2, at, U0324 ] };
	{
	"data": {
	"General": {
	"intro": "<p>These are general terms which apply to various aspects of Sanskrit grammar.</p>",
	"glossary": [
	{
	"id" : "prakrtih",
	"skt" : "prakr̥tiḥ",
	"eng" : "base",
	"comm" : "That to which an <a href='#pratyayah'>affix</a> is added. A base can be nominal (see <a href='#pratipadikam'>nominal stem</a>) or verbal. This is a synonym of <a href='#angam'>aṅgam</a>."
	# -- coding: utf-8 --

	""" Usage: python3 verify_gatha.py FILENAME """
	""" Results in FILENAME.err (a list of errors)
	and FILENAME.log (metrical data) """

	""" Take a file in UTF-8 encoding, in the ISO-15919 transliteration
	scheme, and try to scan its verses and match them against the
	canonical pattern of the Prakrit gāthā. If there are any errors,
	output them to an error file. """
	# -- coding: utf-8 --

	""" Usage: python3 versify.py FILENAME """
	""" Results in FILENAME.log (errors and statistics)
	and FILENAME.json (a json file of metrically parsed text) """

	""" This program expects the text to be in the format
	represented by the GRETIL Kuṟuntokai
	(http://gretil.sub.uni-goettingen.de/gretil/4_drav/tamil/pm/pm110__u.htm)
	namely: the
	"""OCR with PDF/TIFF as source files on GCS"""
	# USAGE: python text_detect.py SOURCE_FILE OUTPUT_FILE
	# Note that both SOURCE_FILE and OUTPUT_FILE must be
	# in the Google Cloud bucket. For example:
	#
	# python text_detect.py gs://project-name/file.pdf gs://project-name/read
	#
	# The API will gather the responses for each page into
	# a JSON file on the Google Cloud bucket, e.g.
	# OUTPUT_FILE-output-1-to-1.json.
	# Uses Tesseract (tesserocr) to recognize files
	# in a directory. I use this as follows:
	#
	# 1. split the PDF into JPG images in a directory
	# called "IMAGES" (e.g., pdftoppm -jpeg input.pdf IMAGES/output)
	# 2. run this script (python OSD.py), which will
	# produce a text file for each image in "IMAGES".
	# 3. concatenate the text files with tail
	# 4. if desired,