###Mining Digital Repositories: Challenges and Horizons, KB, Den Haag, 10 April 2014
Live notes, so an incomplete, partial record of what actually happened.
Tag #digrep14
####Challenges (Thursday)
Hans Jansen, KB
###Mining Digital Repositories: Challenges and Horizons, KB, Den Haag, 10 April 2014
Live notes, so an incomplete, partial record of what actually happened.
Tag #digrep14
####Challenges (Thursday)
Hans Jansen, KB
Install from CRAN and load
install.packages("solr")
library("solr")
Define URL
# | |
install.packages(c("twitteR","wordcloud","tm")) | |
library(twitteR); library(wordcloud); library(tm) | |
# Search for #bes12 tweets | |
bestweets <- searchTwitter("#bes12", n=5000) | |
length(bestweets) # ends up with 1344 as of 21-Dec-12 at 17:00 London time | |
# make into a data.frame | |
bestweets_df <- twListToDF(bestweets) |
library("RCurl") | |
library("XML") | |
library("plyr") | |
library("ggplot2") | |
library("directlabels") | |
######################## | |
# Download PubMed Data # | |
######################## |
Elsevier | 529633 | |
---|---|---|
Springer-Verlag | 206527 | |
Wiley Blackwell (John Wiley & Sons) | 110387 | |
Wiley Blackwell (Blackwell Publishing) | 100235 | |
Informa UK (Taylor & Francis) | 85869 | |
Trans Tech Publications | 53310 | |
Sage Publications | 42105 | |
Oxford University Press | 40496 | |
American Chemical Society | 39543 | |
Ovid Technologies (Wolters Kluwer) - Lippincott Williams & Wilkins | 39186 |
# dhist. | |
# An another algorithm for computing histogram breaks. Produces irregular bins. | |
# Provided by Lorraine Denby | |
# | |
# | |
# @keyword internal | |
dhist <- function(x, a=5*diff(quantile(x, c(0.25,0.75))), nbins=10, rx = range(x)) { | |
x <- sort(x) | |
if(a == 0) | |
a <- diff(range(x))/100000000 |
#!/bin/bash | |
### Original script by Geoffeg, modified by Roblight and later by me (alias me='alias Danog='Daniil Gentili'') | |
### How to install this script: | |
### wget https://gist.github.com/danog/a3963463892f7f7df74a/raw/dropbox_youtube_dl.sh -O ~/dropbox_youtube_dl.sh && chmod 755 ~/dropbox_youtube_dl.sh && ~/dropbox_youtube_dl.sh --install | |
### | |
### IFTTT Recipe URL: https://ifttt.com/recipes/277403-download-every-youtube-video-you-add-to-your-watch-later-playlist-automatically | |
if [ "$1" = "--install" ]; then | |
if [ -f /usr/local/bin/youtube-dl ]; then echo "Youtube-dl already installed."; else echo "Installing youtube-dl..."; sudo curl https://yt-dl.org/downloads/2015.04.03/youtube-dl -o /usr/local/bin/youtube-dl && sudo chmod a+x /usr/local/bin/youtube-dl && echo "YouTube-dl installed successfully." || echo "Couldn't install YouTube-dl."; fi |
doInstall <- TRUE | |
toInstall <- c("twitteR", "dismo", "maps", "ggplot2") | |
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")} | |
lapply(toInstall, library, character.only = TRUE) | |
searchTerm <- "#rstats" | |
searchResults <- searchTwitter(searchTerm, n = 1000) # Gather Tweets | |
tweetFrame <- twListToDF(searchResults) # Convert to a nice dF | |
userInfo <- lookupUsers(tweetFrame$screenName) # Batch lookup of user info |
<script type="text/javascript"> | |
SyntaxHighlighter.autoloader( | |
"r path/to/your/syntaxhighlighter/scripts/shBrushR.js", | |
"plain path/to/your/syntaxhighlighter/scripts/shBrushPlain.js", | |
"sql path/to/your/syntaxhighlighter/scripts/shBrushSql.js", | |
"js path/to/your/syntaxhighlighter/scripts/shBrushJScript.js", | |
"html xml path/to/your/syntaxhighlighter/scripts/shBrushXml.js" | |
); | |
SyntaxHighlighter.defaults["toolbar"] = false; | |
SyntaxHighlighter.all(); |