Skip to content

Instantly share code, notes, and snippets.

View christopherkullenberg's full-sized avatar

Christopher Kullenberg christopherkullenberg

View GitHub Profile
@christopherkullenberg
christopherkullenberg / searchaltmedia.ipynb
Created January 11, 2017 20:55
Search Alt (Right) Media
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from subprocess import Popen, PIPE, STDOUT
from nltk.tokenize import sent_tokenize #make sure to install the full corpus.
import re
aFile = '/home/christopher/Desktop/Introduction to Computation and Programming Using Python, Revised - Guttag, John V..pdf'
def pdftoText(filename):
'''
Input: a PDF file
Output: output of pdftotext.
def pdfparser(fn):
import subprocess
'''Requires pdftotext to be installed on the system. Please change directories
accordingly.'''
f = "upload/" + fn # fn is the filename. upload/ is a directory, pls change.
cmd = 'python3 upload/pdf2txt.py -o %s.txt %s' % (f.split('.')[0], f)
run = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = run.communicate()
# display errors if they occur
#if err:
'''Use with Python3'''
import re
import struct
import textwrap
thefile = open('style.css', encoding="utf-8")
lines = thefile.readlines()
def colorchanger(hexnumber):
'''Substitute hex colors according to Andrejs formula. Return RGB-strings for CSS'''
@christopherkullenberg
christopherkullenberg / almedalenparser.py
Created July 2, 2016 06:55
Parse html of almedalsveckan.info
import urllib.request
import re
from bs4 import BeautifulSoup
with urllib.request.urlopen('http://www.almedalsveckan.info/event/user-view/38029') as event:
html = event.read()
soup = BeautifulSoup(html, 'html.parser')
datadivs = soup.findAll("div", { "class" : "row clearfix" })
for d in datadivs:
leftdivs = d.findAll("div", { "class" : "leftcol" })
<html>
<head>
<meta charset="utf-8" />
<link rel="stylesheet" href="style.css">
<title>Facescraper</title>
</head>
<body>
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# Import modules for CGI handling and UTF-8 handling of input/output
import cgi, cgitb
import sys
import re
import os
import sqlite3
import numpy as np
import collections
#-*- coding: utf8 -*-
import json
from json import load
import sqlite3
import hmac
import hashlib
from facepy import GraphAPI
from django.core.serializers.json import DjangoJSONEncoder
import json
from os import listdir
from lxml import etree as ET
from gexf import *
from itertools import combinations
#import xml.etree.ElementTree as ET #Use this if you don't have lxml installed
# Open up a gexf file
gexf = Gexf("Author-Institution network", "GU")
graph = gexf.addGraph("undirected", "static", "Swepub network")
attribute_node = graph.addNodeAttribute("University", "default_value", "string")
import csv
from os import listdir
import re
from gexf import *
gexf = Gexf("Twitter Mentions Network", "Test")
graph = gexf.addGraph("directed", "static", "Twitter network")