Philip Parker pdparker

## myCitation
#load beautiful soup and itertools
from bs4 import BeautifulSoup
import itertools
import re
#import URL with imitated browser
from urllib import FancyURLopener
class MyOpener(FancyURLopener):
    version = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36'
openurl = MyOpener().open
print "sep=;"

## cricos
uni = raw_input("Please enter CRICOS ID: ")

#based on https://stockrt.github.io/p/emulating-a-browser-in-python-with-mechanize/

import mechanize
import cookielib
from bs4 import BeautifulSoup
import itertools
import re

## knitr
# Transform .Rmd files to slidy files

.SUFFIXES: .Rmd .html .md

all: Day1Part1-Introduction.md Day1Part1-Introduction.html Day1Part1-session2.md Day1Part1-session2.html \
	Day1Part1-session3.md Day1Part1-session3.html Day1Part2-session1.md Day1Part2-session1.html \
	Day1Part2-session2.md Day1Part2-session2.html

#markdown
%.md: %.Rmd

## myCite
#load beautiful soup and itertools
from bs4 import BeautifulSoup
import itertools
import re
from urllib import FancyURLopener
class MyOpener(FancyURLopener):
    version = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36'
openurl = MyOpener().open
#If you want to use you will have to change "user=*" in the url below
url = 'http://scholar.google.com.au/citations?user=xHY4MJ8AAAAJ&hl=en'

## gist:f227b5b3592ed0c52443
import itertools
import re
import string
import csv
from splinter import Browser
from bs4 import BeautifulSoup


#uni = "00219C"
uni = raw_input("")

## APAstyle
/* I have only tested this on Chrome but it
prints nicely to A4 size */

@media print {
    body {
    width: 210mm;
    height: 297mm;
	}
}

## APAtemplate
---
output:
  html_document:
    number_sections: no
    toc: no
    fig_caption: yes
    css: style.css
---

```{r titlePage, echo=FALSE, message=FALSE, warning=FALSE,results='asis'}

## C
/* compile using gcc digits.c -lm -o digits */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <errno.h>

#define DIGITS 5

void die(const char *message){

## R
data <- read.csv("/Users/phparker/Dropbox/Databases/AmericanStudy.csv")

iplocation <- function(ip=""){
  response    <- readLines(paste("http://www.datasciencetoolkit.org//ip2coordinates/",ip,sep=""))
  success     <- !any(grepl("null",response))

  ip <- grep("[[:digit:]]*\\.[[:digit:]]*\\.[[:digit:]]*\\.[[:digit:]]*",response,value=T)
  match <- regexpr("[[:digit:]]*\\.[[:digit:]]*\\.[[:digit:]]*\\.[[:digit:]]*",ip)
  ip <- substr(ip,match,as.integer(attributes(match)[1])+match-1)
  if(success==T){

## pmScraper.R
#################################### Set up database ###########################
# - Make sure database is setup to be read,write and executable outside of sudo
# - Make sure to start mongo deamon before setting up database usr$ mongod
################################################################################

##Produces mongodb documents with the following fields:
# _id: Transcript id - used to index the files
# title: Title of the speech or interview
# primMinister: Who gave the speech in format 'Last name, First name'
# releaseDate: Given in days since 1970-01-01 as per R's default data storage
	#load beautiful soup and itertools
	from bs4 import BeautifulSoup
	import itertools
	import re
	#import URL with imitated browser
	from urllib import FancyURLopener
	class MyOpener(FancyURLopener):
	version = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36'
	openurl = MyOpener().open
	print "sep=;"
	uni = raw_input("Please enter CRICOS ID: ")

	#based on https://stockrt.github.io/p/emulating-a-browser-in-python-with-mechanize/

	import mechanize
	import cookielib
	from bs4 import BeautifulSoup
	import itertools
	import re
	# Transform .Rmd files to slidy files

	.SUFFIXES: .Rmd .html .md

	all: Day1Part1-Introduction.md Day1Part1-Introduction.html Day1Part1-session2.md Day1Part1-session2.html \
	Day1Part1-session3.md Day1Part1-session3.html Day1Part2-session1.md Day1Part2-session1.html \
	Day1Part2-session2.md Day1Part2-session2.html

	#markdown
	%.md: %.Rmd
	import itertools
	import re
	import string
	import csv
	from splinter import Browser
	from bs4 import BeautifulSoup


	#uni = "00219C"
	uni = raw_input("")
	/* I have only tested this on Chrome but it
	prints nicely to A4 size */

	@media print {
	body {
	width: 210mm;
	height: 297mm;
	}
	}
	---
	output:
	html_document:
	number_sections: no
	toc: no
	fig_caption: yes
	css: style.css
	---

	```{r titlePage, echo=FALSE, message=FALSE, warning=FALSE,results='asis'}
	/* compile using gcc digits.c -lm -o digits */

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <errno.h>

	#define DIGITS 5

	void die(const char *message){
	data <- read.csv("/Users/phparker/Dropbox/Databases/AmericanStudy.csv")

	iplocation <- function(ip=""){
	response <- readLines(paste("http://www.datasciencetoolkit.org//ip2coordinates/",ip,sep=""))
	success <- !any(grepl("null",response))

	ip <- grep("[[:digit:]]\\.[[:digit:]]\\.[[:digit:]]\\.[[:digit:]]",response,value=T)
	match <- regexpr("[[:digit:]]\\.[[:digit:]]\\.[[:digit:]]\\.[[:digit:]]",ip)
	ip <- substr(ip,match,as.integer(attributes(match)[1])+match-1)
	if(success==T){
	#################################### Set up database ###########################
	# - Make sure database is setup to be read,write and executable outside of sudo
	# - Make sure to start mongo deamon before setting up database usr$ mongod
	################################################################################

	##Produces mongodb documents with the following fields:
	# _id: Transcript id - used to index the files
	# title: Title of the speech or interview
	# primMinister: Who gave the speech in format 'Last name, First name'
	# releaseDate: Given in days since 1970-01-01 as per R's default data storage