Skip to content

Instantly share code, notes, and snippets.

View tshrinivasan's full-sized avatar

Shrinivasan T tshrinivasan

View GitHub Profile
import mechanize
import cookielib
# http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/
# Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
@tshrinivasan
tshrinivasan / CSS for FreeTamilEbooks.com
Last active August 29, 2015 14:03
CSS for FreeTamilEbooks.com
<style type="text/css">
body {
background: white;
font-size: 12pt;
}
strong,h3,h4{
font-weight: 900;
color:midnightblue;
}
@tshrinivasan
tshrinivasan / split-page.py
Last active April 15, 2024 19:55
Split a PDF vertically, used for scanned double sided PDF pages
# Source http://stackoverflow.com/a/15741856/1301753
import copy
import sys
import math
import pyPdf
def split_pages(src, dst):
src_f = file(src, 'r+b')
dst_f = file(dst, 'w+b')
@tshrinivasan
tshrinivasan / clean-html.py
Created March 8, 2017 03:12
Clean HTML Pages
import lxml.html.clean as clean
from BeautifulSoup import BeautifulSoup
input_file = 'input.html'
output_file = 'output.html'
orig_content = open(input_file, 'rw').read()
@tshrinivasan
tshrinivasan / வேர்ச்சொல்_வடிகட்டி.py
Created March 1, 2019 13:48
வேர்ச்சொல்_வடிகட்டி.py
from tamilstemmer import TamilStemmer
wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்']
#expected = [u'மலை',u'பாடு', u'ஓடி']
ta_stemmer = TamilStemer()
for word in wordlist:
ta_stemmer.stemWord(word)
@tshrinivasan
tshrinivasan / remove_strings_from_files.py
Created March 31, 2019 11:00
#This program helps to remove the given words in a file to all the files inside a directory, recursively. # Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed
#This program helps to remove the given words in a file to all the files inside a directory, recursively.
# Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed
import sys
import glob
import os
import argparse
parser = argparse.ArgumentParser()
@tshrinivasan
tshrinivasan / OverPassToGoogleSheet.gs
Created May 26, 2019 09:54
OverPassToGoogleSheet.gs
//var langCode ='ta'; -- TODO Make it language independent.
function doGet() {
return HtmlService.createTemplateFromFile('Index.html')
.evaluate();
}
function doSomething() {
Logger.log('I was called!');
}
@tshrinivasan
tshrinivasan / parse-voter-list.py
Created October 3, 2019 12:49
Code to parse voter list pdf - ocred by tesseract
import sys
in_file = sys.argv[1]
content = open(in_file).read()
out = open("result.csv","a")
con = content.split("வாக்காளர்‌ பெயர்‌")
@tshrinivasan
tshrinivasan / fix_records.py
Created November 22, 2019 07:39
A program to find and replace bibliographical data
# program name : fix_records.py
# author : tshrinivasan@gmail.com
# version : 0.1
import sys
import os
import argparse
parser = argparse.ArgumentParser(description='A program to find and replace bibliographical data')
@tshrinivasan
tshrinivasan / tess_ocr_pdf.py
Created May 23, 2020 16:36
Convert a PDF file to text using Tesseract OCR
import os
import sys
import glob
#import telegram_send
all_pdf = glob.glob("*.pdf")
all_pdf_count = len(all_pdf)