Skip to content

Instantly share code, notes, and snippets.

View arcolife's full-sized avatar

Archit Sharma arcolife

View GitHub Profile
from __future__ import with_statement # we'll use this later, has to be here
from argparse import ArgumentParser
import requests
from BeautifulSoup import BeautifulStoneSoup as Soup
def parse_sitemap(url):
resp = requests.get(url)
# we didn't get a valid response, bail
pwd
s1 = open('stopwords.txt','r').read().split()
s1
s2 = open('../scholarec/corpus/stopwords.txt','r').read().split()
s2
set(s1)
set(s2)
set(s1)-set(s2)
l
ls
import urllib
start = 0
base_url = 'http://export.arxiv.org/api/query?';
search_query = 'all:electron'
max_results = int(raw_input("Enter max result count: "))
query = 'search_query=%s&start=%i&max_results=%i' % (search_query,
start,
max_results)
response = urllib.urlopen(base_url+query).read()
import feedparser
import untangle
data = untangle.parse('dblp.xml')
from pyes import *
conn = ES('localhost:9200')
q = TermQuery("id","0712.1111v1")
results = conn.search( query = q)
results?
results??
results
for r in results:
print r
f = open('_User.json','rb')
import json
data = json.loads(f.read())
f.close()
data
data[0]
data.keys()
data['results'][0]
data['results'][0].keys()
from pyes import *
f = open('_User.json','rb')
import json
data = json.loads(f.read())
f.close()
data
data[0]
data.keys()
data['results'][0]
data['results'][0].keys()
from pyes import *
@arcolife
arcolife / file1.py
Last active August 29, 2015 13:57 — forked from anonymous/file1.py
mongo upload script
import os
filenames = os.listdir('./_User_sharded/')
f = open('mongo_upload','wb')
f.write('#!/bin/bash\n')
f.write('cd ./_User_sharded/\n')
for filename in filenames:
cmd = ['mongoimport','--db','takezero_raw','--collection','users','--file',filename]
f.write(' '.join(cmd) + "\n")
from PIL import Image
fi = Image.open('f23bf11c-aa22-419b-855b-ab0b9f1f5cb3-SRV_0122.jpg')
fi.size()
fi.size
fi = Image.open('bd654b14-87a5-4ff3-868f-bd72b5121c49-IMG_9941.jpg')
fi.size
5184.0/3456
3456.0/5184
float(1024*3456) / 5184
fi.resize((1024,683), Image.ANTIALIAS)
import json
data = json.loads('_User.json','rb')
data = json.load('_User.json','rb')
data = json.load(open('_User.json','rb').read())
data = json.loads(open('_User.json','rb').read())
data
ls
data['results'].keys()
data['results']
data['results'][0]