Skip to content

Instantly share code, notes, and snippets.

@flavioamieiro
flavioamieiro / tasks.py
Created July 24, 2015 12:37
Send documents to pypln with celery
import glob
from celery import Celery
import pymongo
import pypln.api
import settings
app = Celery('tasks', backend="mongodb")
client = pymongo.MongoClient()
database = client["send_to_pypln"]
[2015-06-25 18:20:55,521: ERROR/MainProcess] Task pypln.backend.workers.elastic_indexer.ElasticIndexer[76b5682a-660b-480f-b533-48f619246362] raised unexpected: SerializationError({u'mimetype': 'application/pdf', u'upload_date': datetime.datetime(2015, 6, 25, 16, 20, 54, 169000), u'forced_decoding': False, u'language': 'un', u'text': u'This is a minimal pdf.\n1', u'filename': u'minimal_pdf.pdf_1435249254.17', u'length': 12876, u'file_id': '558c2a66798ebd634b0b249f', u'file_metadata': {'UserProperties': 'no', 'Tagged': 'no', 'Form': 'none', 'Producer': 'pdfTeX-1.40.15', 'Creator': 'TeX', 'Encrypted': 'no', 'JavaScript': 'no', 'Suspects': 'no', 'Optimized': 'no', 'PDF version': '1.5', 'ModDate': 'Thu Jun 25 18:17:25 2015', 'Page size': '612 x 792 pts (letter)', 'CreationDate': 'Thu Jun 25 18:17:25 2015', 'Pages': '1', 'Page rot': '0'}, u'contents': '%PDF-1.5\n%\xd0\xd4\xc5\xd8\n3 0 obj\n<<\n/Length 105 \n/Filter /FlateDecode\n>>\nstream\nx\xda%\x8c\xbb\n\x800\x0c\x00\xf7~E\xc6vh$\xe9{\x15tp\xce&\x0e\x82\x8
@flavioamieiro
flavioamieiro / firefox_url_extractor.py
Last active August 29, 2015 14:16
This script gets urls from tabs in a firefox session.
#!/usr/bin/env python3
import json
def get_session_from_file(filename):
with open(filename, 'r') as fp:
session = json.load(fp)
return session
def get_tabs_from_session(session):
return session['windows'][0]['tabs']
@flavioamieiro
flavioamieiro / netvibes_unread_sidebar.js
Created August 27, 2014 19:56
Shows only feeds with unread items in netvibes' sidebar
window.setInterval( function() {
Array.prototype.slice.call(document.getElementsByClassName('nv-treeview-feed'), 0).map(
function (el, idx, arr) {
// if you're running on chromium you may need to use innerText
if (el.children[0].children[2].innerHTML == 0) {
el.hidden = true;
} else {
el.hidden = false;
}
}
@flavioamieiro
flavioamieiro / get_wordcloud.py
Created August 20, 2014 01:10
Get wordcloud from a document in PyPLN
#!/usr/bin/env python2
# just give this script a document id (as in `$ python get_wordcloud.py 1`, for example)
# and it will download the wordcloud from PyPLN.
#
# Replace PYPLN_URL by the desired host.
import base64
import sys
import pypln.api
@flavioamieiro
flavioamieiro / darktable_selected
Created April 14, 2013 03:38
Export filename of selected photos from darktable.
#!/usr/bin/env bash
sqlite3 $HOME/.config/darktable/library.db "select filename from images where
id in (select imgid from selected_images);"
@flavioamieiro
flavioamieiro / wp_extractor.py
Created March 21, 2013 21:53
Extrai textos do dump da wikipedia em xml
from lxml import etree
FILENAME = 'data/ptwiki-20130306-pages-meta-current.xml'
def get_parser():
for event, element in etree.iterparse(FILENAME, events=('end',)):
if element.tag.endswith('page'):
namespace_tag = element.find(
'{http://www.mediawiki.org/xml/export-0.8/}ns')
if namespace_tag.text == '0':
dicionario = {'I':1,'V':5,'X':10,'L':50,'C':100,'D':500,'M':1000}
def eh_o_ultimo_caractere(indice,romano):
return indice == (len(romano) - 1)
def retorna_o_valor_do_proximo_caractere(indice, romano):
if eh_o_ultimo_caractere(indice,romano):
valor_do_proximo_caracter = 0
else:
valor_do_proximo_caracter = dicionario[romano[indice + 1]]
import unittest
def fizzbuzz(numero):
resultado = ""
if numero%3 == 0:
resultado += 'fizz'
if numero%5 == 0:
resultado += 'buzz'
if resultado == "":
resultado = numero
@flavioamieiro
flavioamieiro / CR2JPEG.sh
Created November 17, 2012 22:23
Extrair JPEG do header de um arquivo raw da Canon (CR2)
#!/bin/bash
find . -name '*.CR2' -print0 | while read -r -d '' FILE
do
BASENAME=$(basename -s ".CR2" "$FILE")
exiftool -b -previewImage -ext .CR2 -w .JPG "$FILE"
exiftool -tagsFromFile "$FILE" "$BASENAME".JPG
done