Christopher Kullenberg christopherkullenberg

## swepubscraper.py
from urllib.request import urlopen

counter = 1

while True:
	url = 'http://libris.kb.se/xsearch?d=swepub&hitlist&q=l%C3%A4ros%C3%A4te%3agu&f=ext&spell=true&hist=true&n=200&format=json&start=' + str(counter)
	print ("Fetching: " + url)
	data = urlopen(url).read()
	if not data.find(b'"identifier"') >= 0:
		print("No more records!")

## swepubjsonparser.py
import json
from os import listdir

for filename in listdir("GU20151228json/"): #alla filer i en katalog
    with open("GU20151228json/" + filename) as currentFile:

        jsondata = json.load(currentFile)
        print(jsondata)

## swepubxmlparser.py
"""
Data structure: http://libris.kb.se/xsearch?d=swepub&hitlist&q=l%C3%A4ros%C3%A4te%3agu&f=ext&spell=true&hist=true&n=200&p=1
Trying to access only the value after "code="u">" in:
<datafield tag="700" ind1="1" ind2=" ">
 <subfield code="a">Alvestad, Torgeir,</subfield>
 <subfield code="d">1960-,</subfield>
 <subfield code="u">Göteborgs universitet, Institutionen för pedagogik och didaktik, University of Gothenburg, Department of Education</subfield>
 <subfield code="4">edt</subfield>
 <subfield code="0">(SwePub:chalmers.se)xalvto</subfield>
</datafield>

## tweetTSVtoGEXF.py
import csv
from os import listdir
import re
from gexf import *


gexf = Gexf("Twitter Mentions Network", "Test")
graph = gexf.addGraph("directed", "static", "Twitter network")


## Swepub to gexf.py
from os import listdir
from lxml import etree as ET
from gexf import *
from itertools import combinations
#import xml.etree.ElementTree as ET #Use this if you don't have lxml installed

# Open up a gexf file
gexf = Gexf("Author-Institution network", "GU")
graph = gexf.addGraph("undirected", "static", "Swepub network")
attribute_node = graph.addNodeAttribute("University", "default_value", "string")

## facescraper.py
#-*- coding: utf8 -*-
import json
from json import load
import sqlite3
import hmac
import hashlib
from facepy import GraphAPI
from django.core.serializers.json import DjangoJSONEncoder
import json

## facebooksok.py
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# Import modules for CGI handling and UTF-8 handling of input/output
import cgi, cgitb
import sys
import re
import os
import sqlite3
import numpy as np
import collections

## index.html
<html>
<head>
    <meta charset="utf-8" />
    <link rel="stylesheet" href="style.css">
    <title>Facescraper</title>
</head>

<body>


## almedalenparser.py
import urllib.request
import re
from bs4 import BeautifulSoup

with urllib.request.urlopen('http://www.almedalsveckan.info/event/user-view/38029') as event:
    html = event.read()
    soup = BeautifulSoup(html, 'html.parser')
    datadivs = soup.findAll("div", { "class" : "row clearfix" })
    for d in datadivs:
        leftdivs = d.findAll("div", { "class" : "leftcol" })

## colorconverter.py
'''Use with Python3'''
import re
import struct
import textwrap

thefile = open('style.css', encoding="utf-8")
lines = thefile.readlines()

def colorchanger(hexnumber):
    '''Substitute hex colors according to Andrejs formula. Return RGB-strings for CSS'''
	from urllib.request import urlopen

	counter = 1

	while True:
	url = 'http://libris.kb.se/xsearch?d=swepub&hitlist&q=l%C3%A4ros%C3%A4te%3agu&f=ext&spell=true&hist=true&n=200&format=json&start=' + str(counter)
	print ("Fetching: " + url)
	data = urlopen(url).read()
	if not data.find(b'"identifier"') >= 0:
	print("No more records!")
	import json
	from os import listdir

	for filename in listdir("GU20151228json/"): #alla filer i en katalog
	with open("GU20151228json/" + filename) as currentFile:

	jsondata = json.load(currentFile)
	print(jsondata)
	"""
	Data structure: http://libris.kb.se/xsearch?d=swepub&hitlist&q=l%C3%A4ros%C3%A4te%3agu&f=ext&spell=true&hist=true&n=200&p=1
	Trying to access only the value after "code="u">" in:
	<datafield tag="700" ind1="1" ind2=" ">
	<subfield code="a">Alvestad, Torgeir,</subfield>
	<subfield code="d">1960-,</subfield>
	<subfield code="u">Göteborgs universitet, Institutionen för pedagogik och didaktik, University of Gothenburg, Department of Education</subfield>
	<subfield code="4">edt</subfield>
	<subfield code="0">(SwePub:chalmers.se)xalvto</subfield>
	</datafield>
	import csv
	from os import listdir
	import re
	from gexf import *


	gexf = Gexf("Twitter Mentions Network", "Test")
	graph = gexf.addGraph("directed", "static", "Twitter network")
	from os import listdir
	from lxml import etree as ET
	from gexf import *
	from itertools import combinations
	#import xml.etree.ElementTree as ET #Use this if you don't have lxml installed

	# Open up a gexf file
	gexf = Gexf("Author-Institution network", "GU")
	graph = gexf.addGraph("undirected", "static", "Swepub network")
	attribute_node = graph.addNodeAttribute("University", "default_value", "string")
	#-- coding: utf8 --
	import json
	from json import load
	import sqlite3
	import hmac
	import hashlib
	from facepy import GraphAPI
	from django.core.serializers.json import DjangoJSONEncoder
	import json
	#!/usr/bin/env python3
	# -- coding: UTF-8 --
	# Import modules for CGI handling and UTF-8 handling of input/output
	import cgi, cgitb
	import sys
	import re
	import os
	import sqlite3
	import numpy as np
	import collections
	<html>
	<head>
	<meta charset="utf-8" />
	<link rel="stylesheet" href="style.css">
	<title>Facescraper</title>
	</head>

	<body>
	import urllib.request
	import re
	from bs4 import BeautifulSoup

	with urllib.request.urlopen('http://www.almedalsveckan.info/event/user-view/38029') as event:
	html = event.read()
	soup = BeautifulSoup(html, 'html.parser')
	datadivs = soup.findAll("div", { "class" : "row clearfix" })
	for d in datadivs:
	leftdivs = d.findAll("div", { "class" : "leftcol" })
	'''Use with Python3'''
	import re
	import struct
	import textwrap

	thefile = open('style.css', encoding="utf-8")
	lines = thefile.readlines()

	def colorchanger(hexnumber):
	'''Substitute hex colors according to Andrejs formula. Return RGB-strings for CSS'''