Skip to content

Instantly share code, notes, and snippets.

View harshavardhana's full-sized avatar
🌚
I may be slow to respond.

Harshavardhana harshavardhana

🌚
I may be slow to respond.
View GitHub Profile
@harshavardhana
harshavardhana / clean_html.py
Created November 29, 2012 09:31
Clean HTML
def clean_html(html):
""" Remove HTML markup from the given string. """
# remove inline JavaScript / CSS
x = re.sub(r'(?is)<(script|style).*?>.*?(</\1>)', '', html.strip())
# remove html comments. must be done before removing regular tags since comments can contain '>' characters.
x = re.sub(r'(?s)<!--(.*?)-->[\n]?', '', x)
# remove the remaining tags
x = re.sub(r'(?s)<.*?>', ' ', x)
# remove html entities
x = remove_entities(x)
@harshavardhana
harshavardhana / grabURLsharecount.py
Created December 5, 2012 08:00
URL share count on Twitter
#!/usr/bin/env python
import urllib
import pycurl
import time
import sys
import json
import cStringIO
from optparse import OptionParser
@harshavardhana
harshavardhana / grab_followers_tweets_following.py
Created December 6, 2012 04:59
Get Followers Tweets and Following through "https://twitter.com/<userid>/followers" and encode into json
#!/usr/bin/env python
import sys
import logging
import chardet
import json
from optparse import OptionParser
from boilerpipy import (Extractor, isValidhtml,
compat_urllib_request)
@harshavardhana
harshavardhana / get_tasks_asana.py
Created December 25, 2012 05:40
Get Asana tasks from User API key and Project ID
#!/usr/bin/env python
import urllib
import pycurl
import sys
import json
from optparse import OptionParser
parser = OptionParser(usage="%prog: [options] [URL]")
parser.add_option('-p', '--projectid', help="Project ID")
@harshavardhana
harshavardhana / dircmp.go
Last active December 17, 2015 04:39
Trivial dircmp in Golang - [Continue to enhance..]
package main
import (
"path/filepath"
"os"
"flag"
"fmt"
)
type fileattr struct {
@harshavardhana
harshavardhana / trees.sh
Created June 5, 2013 11:08
Print directory tree structure
#!/bin/sh
# 1st sed: remove colons
# 2nd sed: replace higher level folder names with dashes
# 3rd sed: indent graph three spaces
# 4th sed: replace first dash with a vertical bar
if [ $# -eq 1 ]; then
ls -R $1 | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/ /' -e 's/-/|/'
elif [ $# -gt 1 ]; then
echo "Only one directory at a time please!!"
exit 255
@harshavardhana
harshavardhana / tshark.sh
Last active December 19, 2015 07:59
Sharking network traffic
#!/bin/bash
if [ $# -eq 0 ]; then
echo "Please provide the pcap file for sharking.."
exit 255
fi
capinfos $1 1>/dev/null 2>/dev/null
if [ $? -eq 1 ]; then
@harshavardhana
harshavardhana / test_mmap.cpp
Created August 1, 2013 20:30
./test_mmap.static 1>nostrace.rhel63.static.log 2>&1 &
/*
* g++ -O3 --std=c++0x -o test_mmap test_mmap.cpp
*/
#include <vector>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@harshavardhana
harshavardhana / isvalidhostname.py
Created August 17, 2013 03:57
Check for valid hostname
import re
def isvalidhostname(hostname):
"""
Validate hostname
"""
regex = re.compile("[^A-Z\d-]", re.IGNORECASE)
if len(hostname) > 255:
return False
@harshavardhana
harshavardhana / file_changed.sh
Last active December 22, 2015 16:59
Tar read test on Gluster NFS volume
#!/bin/bash
_init()
{
DIRZ=$( seq 1 100 )
}
cleanup()
{
echo "Cleaning the test"