Skip to content

Instantly share code, notes, and snippets.

@DataMinerUK
DataMinerUK / GCSE
Created May 24, 2013 14:53
Google Custom Search reveal
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>UK Search Sites</title>
<!-- <link rel="stylesheet" type="text/css" href="style.css"> -->
<script type="text/javascript" charset="utf8" src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-1.8.2.min.js"></script>
<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript" charset="utf-8">
#!/usr/bin/env python
import scraperwiki
import requests
from bs4 import BeautifulSoup
# Get the bird listing table for Costa Rica from Avibase
html = requests.get("http://avibase.bsc-eoc.org/checklist.jsp?region=cr&list=clements")
content = html.content
soup = BeautifulSoup(content, "html.parser")
#!/usr/bin/env python
import scraperwiki
import requests
import json
from bs4 import BeautifulSoup
# Ranking algorithm: Function for picking out the best audio track
def song_choose(results_list):
placement = 0
#!/usr/bin/env python
import scraperwiki
import requests
from bs4 import BeautifulSoup
# scraperwiki.sql.execute('drop table swdata')
# scraperwiki.sql.commit()
def get_next_page(offset):
@DataMinerUK
DataMinerUK / violations-syria.py
Created November 28, 2013 13:27
Scraper of data on deaths in Syria collected by the Violations Documentation Center in Syria (http://www.vdc-sy.info/index.php/en/martyrs)
#!/usr/bin/env python
import scraperwiki
import requests
from bs4 import BeautifulSoup
from time import sleep
for page in range(1,798):
url = "http://www.vdc-sy.info/index.php/en/martyrs/" + str(page) + "/c29ydGJ5PWEua2lsbGVkX2RhdGV8c29ydGRpcj1ERVNDfGFwcHJvdmVkPXZpc2libGV8ZXh0cmFkaXNwbGF5PTB8"
print url
@DataMinerUK
DataMinerUK / nazi-loot.py
Created December 9, 2013 15:35
Scraper for Nazi loot
#!/usr/bin/env python
import scraperwiki
import requests
from bs4 import BeautifulSoup
stem = "http://www.lostart.de"
site = "Webs/EN/Datenbank/KunstfundMuenchen.html?cms_param=INST_ID%3D12366%26page%3D"
site_id = "#id66922"
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import csv
#col = int(sys.argv[1])
sys.stdin = os.fdopen( sys.stdin.fileno(), "rU" )
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import csv
sys.stdin = os.fdopen( sys.stdin.fileno(), "rU" )
# Open stdin as a csv
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import csv
col = int(sys.argv[1])
sys.stdin = os.fdopen( sys.stdin.fileno(), "rU" )
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import csv
sys.stdin = os.fdopen( sys.stdin.fileno(), "rU" )
# Open stdin as a csv