Skip to content

Instantly share code, notes, and snippets.

Avatar
💭
𝄢

Manuel Aristarán jazzido

💭
𝄢
View GitHub Profile
View gist:8807800
# +files+ es un string con nombres de archivo, uno por línea
files = <<-FILES
~/Dropbox/Indec-informa/pdf/iijul12[1].pdf
~/Dropbox/Indec-informa/pdf/iiago12[1].pdf
~/Dropbox/Indec-informa/pdf/iifeb09.pdf
~/Dropbox/Indec-informa/pdf/iidic11[1].pdf
~/Dropbox/Indec-informa/pdf/iiene12[1].pdf
~/Dropbox/Indec-informa/pdf/iioct11[1].pdf
FILES
View gist:11357558
require 'tabula'
pdf_file_path = "mineriafinal.pdf"
outfilename = "mineriafinalv3.csv"
out = open(outfilename, 'w')
extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, :all)
top, left, bottom, right = [104.46,13,580.54,820.82]
View segunda_seccion.py
import sys
import os
import re
from datetime import datetime
import urlparse
import json
from multiprocessing import Pool
from bs4 import BeautifulSoup
import requests
View gist:f6673936ffa36760ff2d
# coding: utf-8
module IndecScraper
class TableExtractor
attr_reader :extractor
DEFAULT_INDEX_PAGE = 3
PAGE_NUMBER_RE = /.*?(\d+)$/
def initialize(pdf_file)
@jazzido
jazzido / gist:dc5cc9b5126943ae82ea
Created Oct 26, 2014
ipython notebook - transporte publico bahía blanca
View gist:dc5cc9b5126943ae82ea
{
"metadata": {
"name": "",
"signature": "sha256:c6a9ad4d028cadcae6537cdf79e30b9c8ee62ea42be736ab4927fadb9925329e"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
View gist:b41583c7b28eec0bbdcb
(function(){function i(c,a,b){return g(c,a,b)}function g(c,a,b,j){j=j||{};a&&!n(a)&&(b=a,a=void 0);a=a||new Date;b=b||o;b.formats=b.formats||{};var i=a.getTime(),h=j.timezone,e=typeof h;if(j.utc||e=="number"||e=="string")a=p(a);if(h){if(e=="string")var k=h[0]=="-"?-1:1,q=parseInt(h.slice(1,3),10),r=parseInt(h.slice(3,5),10),h=k*(60*q+r);e&&(a=new Date(a.getTime()+h*6E4))}return c.replace(/%([-_0]?.)/g,function(c,e){var d;if(e.length==2){d=e[0];if(d=="-")d="";else if(d=="_")d=" ";else if(d=="0")d=
"0";else return c;e=e[1]}switch(e){case "A":return b.days[a.getDay()];case "a":return b.shortDays[a.getDay()];case "B":return b.months[a.getMonth()];case "b":return b.shortMonths[a.getMonth()];case "C":return f(Math.floor(a.getFullYear()/100),d);case "D":return g(b.formats.D||"%m/%d/%y",a,b);case "d":return f(a.getDate(),d);case "e":return f(a.getDate(),d==null?" ":d);case "F":return g(b.formats.F||"%Y-%m-%d",a,b);case "H":return f(a.getHours(),d);case "h":return b.shortMonths[a.getMonth()];case "I":return f(l(a),
d)
View poly_bahia.topojson
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@jazzido
jazzido / schema.groovy
Last active Aug 29, 2015
schema creation + data insertion
View schema.groovy
conf = new BaseConfiguration()
conf.setProperty("storage.backend","cassandra")
conf.setProperty("storage.hostname", "localhost")
conf.setProperty("schema.default", "none")
conf.setProperty("cache.db-cache", true)
conf.setProperty("index.search.backend", "elasticsearch")
conf.setProperty("index.search.hostname", "localhost")
g = TitanFactory.open(conf)
mgmt = g.getManagementSystem()
@jazzido
jazzido / README.md
Last active Aug 29, 2015 — forked from mbostock/.block
Argentina province grid
View README.md
View gist:9369d7134a8acb64be54
package org.neo4j.elasticsearch;
import io.searchbox.action.BulkableAction;
import io.searchbox.client.JestClient;
import io.searchbox.client.JestResult;
import io.searchbox.client.JestResultHandler;
import io.searchbox.core.Bulk;
import io.searchbox.core.Delete;
import io.searchbox.core.Index;