Skip to content

Instantly share code, notes, and snippets.

@milani
Created October 12, 2017 12:19
Show Gist options
  • Save milani/a45745794275261d7149eb067dd6cc59 to your computer and use it in GitHub Desktop.
Save milani/a45745794275261d7149eb067dd6cc59 to your computer and use it in GitHub Desktop.
WikiElection network construction
import re
import urllib2
import gzip
import StringIO
import numpy as np
def readDataset(save=True):
content = ''
outFilePath = 'wikielect.txt'
try:
with open(outFilePath, 'r') as f:
content = f.read()
except:
response = urllib2.urlopen('http://snap.stanford.edu/data/wikiElec.ElecBs3.txt.gz')
compressedFile = StringIO.StringIO()
compressedFile.write(response.read())
compressedFile.seek(0)
with gzip.GzipFile(fileobj=compressedFile, mode='rb') as f:
content = f.read()
if save:
with open(outFilePath, 'w') as outfile:
outfile.write(content)
return content
content = readDataset()
content = content.split('\n')
def formAdj(records,size_estimate=10000):
targetPattern = re.compile('N\s+([0-9]+)\s+[a-zA-Z]+')
sourcePattern = re.compile('V\s+(0|1|\-1)\s+([0-9]+)')
target = -1
max_index = 0
G = np.zeros([size_estimate,size_estimate])
for record in records:
match = targetPattern.match(record)
if match:
target = int(match.groups()[0])
else:
match = sourcePattern.match(record)
if match:
source = int(match.groups()[1])
sign = 1 if (int(match.groups()[0])>-1) else -1
G[source,target] = sign
max_index = np.max([source,target,max_index])
return G
G = formAdj(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment