Skip to content

Instantly share code, notes, and snippets.

@jkklapp
jkklapp / scraper.py
Created October 27, 2015 17:18
Gets some concert data from www.wegotickets.com
from lxml import html
import requests
# Set the number of pages to scrap
PAGES_TO_SCRAP = 15
# Base URL for the seach results.
BASE_PAGE = 'http://www.wegottickets.com/searchresults/page/'
# XPATH selector for the event links
EVENT_LINK_SELECTOR = '//a[@class="event_link"]'
@jkklapp
jkklapp / get_xlsx.py
Last active August 29, 2015 14:08
Get XLSX files, convert them to CSV and group them by date
#!/usr/bin/env python
import xml.etree.ElementTree as ET
import os
import urllib, urllib2
# Get the HTML
response = urllib2.urlopen('http://www.cuartopoder.es/multimedia/2014/10/11/gastos-de-los-exdirectivos-de-caja-madrid-uno-a-uno-con-las-tarjetas-negras-tabla/3403')
html = response.read()
# Parse the HTML looking for the links
@jkklapp
jkklapp / bc.html
Last active August 29, 2015 14:07
Gets data from a CSV file of the blackcards and visualize in D3.js template
<!DOCTYPE html>
<meta charset="utf-8">
<style>
span, strong {
font-family: "Helvetica Neue", Helvetica, Arial;
font-size: 14px;
}
text {
@jkklapp
jkklapp / see_data.html
Created October 3, 2014 19:50
HTML page to see the resulting data from get_data.py
<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: auto;
position: relative;
width: 960px;
}
@jkklapp
jkklapp / get_data.py
Last active August 29, 2015 14:07
Prints JSON data of the files, directories and sizes for a given path
#!/usr/bin/env python
import os, sys, json
def get_dir_size(base_dir):
total_size = 0
for dirpath, dirnames, filenames in os.walk(base_dir):
if '/.' in dirpath:
continue
for f in filenames:
@jkklapp
jkklapp / magneter
Last active August 29, 2015 14:01
Get the magnet link for the torrent with more seeders of your favourite show. Run weekly to get new episodes.
#!/usr/bin/env python
import time
import requests
import xmltodict
import sys
from lxml import etree
BASE_URL = 'http://thepiratebay.si/search/'
@jkklapp
jkklapp / bash_command_history.py
Last active August 29, 2015 14:00
Run from command line, and get 2 PNGs with the barchart of command history from your BASH
#!/usr/bin/env python
from os.path import expanduser
from itertools import groupby
import matplotlib.pyplot as plt
import numpy as np
from numpy import arange
def plotTagValues(data,tag):
labels = data.keys()