Skip to content

Instantly share code, notes, and snippets.

View tshrinivasan's full-sized avatar

Shrinivasan T tshrinivasan

View GitHub Profile
@tshrinivasan
tshrinivasan / CSS for FreeTamilEbooks.com
Last active August 29, 2015 14:03
CSS for FreeTamilEbooks.com
<style type="text/css">
body {
background: white;
font-size: 12pt;
}
strong,h3,h4{
font-weight: 900;
color:midnightblue;
}
import mechanize
import cookielib
# http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/
# Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
@tshrinivasan
tshrinivasan / clean-html.py
Created March 8, 2017 03:12
Clean HTML Pages
import lxml.html.clean as clean
from BeautifulSoup import BeautifulSoup
input_file = 'input.html'
output_file = 'output.html'
orig_content = open(input_file, 'rw').read()
@tshrinivasan
tshrinivasan / வேர்ச்சொல்_வடிகட்டி.py
Created March 1, 2019 13:48
வேர்ச்சொல்_வடிகட்டி.py
from tamilstemmer import TamilStemmer
wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்']
#expected = [u'மலை',u'பாடு', u'ஓடி']
ta_stemmer = TamilStemer()
for word in wordlist:
ta_stemmer.stemWord(word)
@tshrinivasan
tshrinivasan / remove_strings_from_files.py
Created March 31, 2019 11:00
#This program helps to remove the given words in a file to all the files inside a directory, recursively. # Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed
#This program helps to remove the given words in a file to all the files inside a directory, recursively.
# Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed
import sys
import glob
import os
import argparse
parser = argparse.ArgumentParser()
@tshrinivasan
tshrinivasan / OverPassToGoogleSheet.gs
Created May 26, 2019 09:54
OverPassToGoogleSheet.gs
//var langCode ='ta'; -- TODO Make it language independent.
function doGet() {
return HtmlService.createTemplateFromFile('Index.html')
.evaluate();
}
function doSomething() {
Logger.log('I was called!');
}
@tshrinivasan
tshrinivasan / parse-voter-list.py
Created October 3, 2019 12:49
Code to parse voter list pdf - ocred by tesseract
import sys
in_file = sys.argv[1]
content = open(in_file).read()
out = open("result.csv","a")
con = content.split("வாக்காளர்‌ பெயர்‌")
@tshrinivasan
tshrinivasan / fix_records.py
Created November 22, 2019 07:39
A program to find and replace bibliographical data
# program name : fix_records.py
# author : tshrinivasan@gmail.com
# version : 0.1
import sys
import os
import argparse
parser = argparse.ArgumentParser(description='A program to find and replace bibliographical data')
@tshrinivasan
tshrinivasan / make_unique_words.py
Created May 25, 2020 17:17
make unique tamil words from any given big file
import sys
import re
import tamil
infile = sys.argv[1]
spechal_charecers = ["~","`","!","@","#","$","%","^","&","*","(",")","_","-","+","=","[","{","]","}",'\\',"|",";",":","\'",'\"',"<",">",".","?","/",',',"’","\’",'“','”']
numbers = [1,2,3,4,5,6,7,8,9,0]
sandhi_chars = ['க்','ச்', 'த்', 'ப்']
#!/usr/bin/env python3
# Author: T Shrinivasan
# Email : tshrinivasan@gmail.com
# License : GNU GPL 3.0
# sudo apt install libimage-exiftool-perl imagemagick gwenview
# /etc/ImageMagick-6/policy.xml