R scripts from the plotting of shipping logs by Ben Schmidt.
He has also a nice D3 library for comet-like trails.
R scripts from the plotting of shipping logs by Ben Schmidt.
He has also a nice D3 library for comet-like trails.
From http://rpubs.com/benmschmidt/shipLDA
opts_chunk$set(eval = FALSE)
# Oceans2
rm(list = ls())
require(ggplot2)
require(plyr)
require(lubridate)
source("ICOADS parsing.R")
source("../Map Functions.R")
data = loadInData("~/shipping/ICOADS/maury.txt")
data = splitDataByVoyage(data)
# Save the R data.frame into a directory that MALLET will be able
# to interpret as a text, and run a topic model on it:
MALLEABLE = data.frame(text = data$voyageid, word = paste(round(data$lat, 1),
round(data$long, 1), sep = "X"))
### I'm p system('mkdir /tmp/MALLET') system('rm /tmp/MALLET/*')
# Write one text file for each voyage with all the points it passed
# through.
ddply(MALLEABLE, .(text), function(text) {
write.table(text$word, paste("/tmp/MALLET/", text$text[1], ".txt", sep = ""),
row.names = F, col.names = F, quote = F)
}, .progress = "text")
# To get it to take numbers, I need to change the token-regex; that was
# the only major hiccup I found here.
system("cd ~/mallet-2.0.7/; bin/mallet import-dir
--input /tmp/MALLET --output ships.mallet --keep-sequence --token-regex \"\\S+\";")
# I ran this several times with topic topic sizes; only including in the
# post 9 and 25
system("cd ~/mallet-2.0.7/; bin/mallet train-topics
--input ships.mallet --output-topic-keys shipKeys.txt --num-topics 16
--output-doc-topics shipTopics.txt --output-state state.txt.gz;")
# This is the data I want to plot.
system("cd ~/mallet-2.0.7/; gunzip -f state.txt.gz")
malletresults = read.table("~/mallet-2.0.7/state.txt", sep = " ", col.names = c("?",
"file", "loc", "total", "point", "topic"), colClasses = c("NULL", "factor",
"NULL", "NULL", "factor", "factor"))
nrow(malletresults)
length(unique(malletresults$file))
levels(malletresults$file) = gsub(".*/", "", levels(malletresults$file))
levels(malletresults$file) = gsub(".txt", "", levels(malletresults$file))
words = (strsplit(as.character(malletresults$point), "x"))
plottable = ldply(words, as.numeric, progress = "text")
names(plottable) = c("lat", "long")
plottable$topic = malletresults$topic
plottable$long[plottable$long > 180] = plottable$long[plottable$long > 180] -
360
ggplot(plottable, aes(x = as.numeric(long), y = as.numeric(lat))) + geom_point(alpha = 0.01,
size = 0.3) + facet_wrap(~topic, scales = "free", ncol = 4) + annotation_map(map = world,
fill = "#F3E0BD") + theme_nothing + labs(title = "Distribution of points across a 9-topic model")
ggplot(plottable[plottable$topic == 8, ], aes(x = as.numeric(long), y = as.numeric(lat))) +
geom_point(alpha = 0.04, size = 0.5) + facet_wrap(~topic, scales = "free",
ncol = 4) + annotation_map(map = world, fill = "#F3E0BD") + theme_nothing +
labs(title = "Topic 8 is a chimera of a sort text-based topic modelling analysis wouldn't uncover")
counts = table(plottable$word)
countz = as.data.frame(counts)
countz$rank = rank(-countz$Freq)
ggplot(countz, aes(x = rank, y = Freq)) + geom_point() + scale_x_log10() + scale_y_log10()
Try K-means clustering on the voyages in topic space (note: this didn't work–most ended up in a single k-means cluster–so I didn't include it in the blog post). Might work better with a higher number of topics, a couple hundred or something, but that would take better priors.
topicdists = table(malletresults$file, malletresults$topic)
head(topicdists)
topicdists = apply(topicdists, 2, function(z) {
z/sum(z, na.rm = T)
})
class(topicdists) = "matrix"
f = kmeans(topicdists, centers = 9)
data$cluster = f$cluster[match(data$voyageid, names(f$cluster))]
offset = 220
plotWorld = Recenter(world, offset, idfield = "group")
plotData = Recenter(data, offset, shapeType = "segment", idfield = "voyageid")
head(plotData)
ggplot(plotData[!is.na(plotData$cluster), ], aes(x = as.numeric(long), y = as.numeric(lat))) +
geom_path(aes(group = group), alpha = 0.02) + annotation_map(map = plotWorld,
fill = "beige") + facet_wrap(~cluster, ncol = 3) + theme_nothign
This can be used for either type of plot--it's agnostic about the time variable going, and the lag can be set.
mapPlot = function(myframe = useful, mytime = 654100, timelag = 34, timevar = "time", ...) {
# I define half the plot here, and half later. No real reason.
# This is why I don't usually share that much code.
myframe$timevar = get(timevar, myframe)
myframe = myframe[myframe$timevar >= (mytime - timelag) & myframe$timevar <= mytime, ]
if (timevar == "time") {
myframe$group = myframe$voyageID
}
if (timevar == "yearday") {
myframe$group = paste(myframe$voyageID, myframe$Year)
}
ggplot(myframe, aes(y = Lat, x = Long)) +
geom_polygon(data = world, aes(x = x, y = y, group = id), drop = T) +
geom_path(size = 0.5, aes(group = group,
#This alpha here decaying by time is what makes the trails slowly erase.
alpha = (timevar - mytime) / timelag, color = color)) +
scale_color_identity() +
geom_point(data = myframe[myframe$timevar == mytime, ], size = 0.75, aes(group = voyageID,
alpha = (timevar - mytime) / timelag, color = color)) +
ylab("")+
xlab("") +
opts(legend.position = "none",
axis.text.x = theme_blank(),
axis.text.y = theme_blank(),
axis.ticks = theme_blank()) +
coord_map(...)
}
rm(list=ls()) | |
source("SQLFunctions.R") | |
plotSet = function(filename,dck=701,alpha=.01,lwd=8) { | |
library(grid) | |
paths = tbl(tblsrc,"paths") %.% | |
filter(DCK==dck) %.% | |
arrange(voyagenum,yearday) %.% | |
select(LON,LAT,voyagenum) %.% | |
collect() | |
paths = paths %.% mutate(LON=LON+180) | |
paths$LON[paths$LON<0] = paths$LON[paths$LON<0] + 360 | |
paths$LON[paths$LON>360] = paths$LON[paths$LON>360] - 360 | |
#break groups that cross from 0 to 360 or vice versa, which otherwise leave streaks | |
paths$split = c(0,abs(paths$LON[-1]-paths$LON[1:(nrow(paths)-1)])>20) | |
paths$segment = 100*paths$voyagenum + cumsum(paths$split) | |
#desperately try to remove all elements: code stole from stack overflow, somewhere. | |
new_theme_empty <- theme_bw() | |
new_theme_empty$line <- element_blank() | |
new_theme_empty$rect <- element_blank() | |
new_theme_empty$strip.text <- element_blank() | |
new_theme_empty$axis.text <- element_blank() | |
new_theme_empty$plot.title <- element_blank() | |
new_theme_empty$axis.title <- element_blank() | |
new_theme_empty$plot.margin <- structure(c(0, 0, -1, -1), | |
unit = "lines", valid.unit = 3L, class = "unit") | |
plotting = paths | |
#Actually make the plot: | |
plot = ggplot(plotting, | |
aes(x=LON,y=LAT,group=segment)) + | |
geom_path(alpha=alpha,lwd=lwd) + | |
labs(x="",y="",title="") + | |
theme(line=element_blank(), | |
rect=element_blank(), | |
axis.text=element_blank() | |
)+ coord_equal() + scale_x_continuous(expand=c(0,0),limits=c(0,360)) + | |
scale_y_continuous(expand=c(0,0),limits=c(-90,90)) | |
png(filename=filename,width=4096*2,height=2048*2) | |
#plotting code from: [whoops forgot to paste it in, but SO somewhere again] | |
#plots just the central panel, which makes it compatible with the format Science on a Sphere wants. | |
gt <- ggplot_gtable(ggplot_build(plot)) | |
ge = subset(gt$layout,name=="panel") | |
grid.draw(gt[ge$t:ge$b, ge$l:ge$r]) | |
graphics.off() | |
} | |
dcks = tbl(tblsrc,"voyages") %.% | |
group_by(DCK) %.% summarize(count=n(),points=sum(points)) %.% | |
arrange(-count) %.% collect() | |
dcks %.% filter(points<3000000,points>10000) %.% | |
group_by(DCK) %.% | |
do(function(row) { | |
dck = row$DCK[1] | |
filename = paste0("~/Dropbox/",dck,"-2.png") | |
if (!file.exists(filename)) { | |
plotSet(filename=filename,dck=dck,alpha=.04,lwd=3) | |
} | |
}) | |
From http://rpubs.com/benmschmidt/MauryClassification
# Oceans2
opts_chunk$set(eval = F)
rm(list = ls())
require(ggplot2)
## Loading required package: ggplot2
require(plyr)
## Loading required package: plyr
require(lubridate)
## Loading required package: lubridate
source("ICOADS parsing.R")
## Loading required package: grid
data = loadInData("~/shipping/ICOADS/maury.txt")
data = splitDataByVoyage(data)
data$color = as.character("#A65628")
source("~/gibbon/Map Functions.R")
require(plyr)
colors = data.frame(city = c("Boston", "New York", "Baltimore", "New Bedford",
"Sag Harbor", "Nantucket", "Salem", "Philadelphia", "Other"), color = c("green",
"#377EB8", "#FF7F00", "#4DAF4A", "#4DAF4A", "#4DAF4A", "#984EA3", "#F781BF",
"#A65628"))
for (city in colors$city[colors$city != "Other"]) {
cat(city, "\n")
data$color[grep(city, data$origin, ignore.case = T)] = as.character(colors$color[colors$city ==
city])
data$color[grep(city, data$destination, ignore.case = T)] = as.character(colors$color[colors$city ==
city])
}
colors = colors[order(nchar(as.character(colors$city))), ]
colors$lat = seq(62, 32, length.out = nrow(colors))
colors$long = 80
offset = 200
data$whaling = NA
data$whaling[grepl("SAG HARBOR", data$origin) | grepl("SAG HARBOR", data$destination) |
grepl("NEW BEDFORD", data$destination) | grepl("NEW BEDFORD", data$origin) |
grepl("COLD SPRING", data$destination) | grepl("COLD SPRING", data$origin) |
grepl("NANTUCKET", data$origin) | grepl("NANTUCKET", data$destination) |
grepl("WHALING", data$origin) | grepl("WHALING", data$destination) | grepl("NEW LONDON",
data$origin) | grepl("NEW LONDON", data$destination)] = T
data$whaling[is.na(data$whaling)] = F
sort(-table(data$origin[data$whaling]))[1:15]
require(plyr)
plotWorld = Recenter(world, offset, idfield = "group")
plotData = Recenter(data, offset, shapeType = "segment", idfield = "voyageid")
Here's the chunk I used in my part that classifies whaling voyages:
data$bin = factor(round(data$lat/36) * 28000 + round(data$long/36) * 36)
ggplot(plotData[grepl("NEW BEDFORD", plotData$destination) | grepl("NEW BEDFORD",
plotData$origin), ], aes(x = long, y = lat, group = group)) + geom_path(alpha = 0.1,
color = "black") + geom_polygon(data = plotWorld, aes(group = group), fill = "grey") +
theme_nothing + coord_map(proj = "mollweide") +
labs(title = "The paths of ships that sailed from or to New Bedford in Maury's collection")
plotData$bin = factor(plotData$bin, levels = sample(levels(plotData$bin)))
ggplot(plotData, aes(x = long, y = lat, color = bin)) + geom_point(alpha = 0.1) +
annotation_map(plotWorld, aes(group = group), fill = "grey") +
labs(title = "Each voyage is classified by the time it spend in these grid points") +
theme_nothing
control = plotData
control$voyageid = factor(control$voyageid)
ddply(control, .(whaling), function(frame) {
length(unique(frame$voyageid))
})
keep = sample(unique(control$voyageid[!control$whaling]),
length(unique(control$voyageid[control$whaling])))
control = control[control$whaling | control$voyageid %in% keep, ]
control$voyageid = factor(control$voyageid)
ddply(control, .(whaling), function(frame) {
length(unique(frame$voyageid))
})
# Visual inspections confirms this works reasonably for
# whaling/nonwhaling:
ggplot(control, aes(x = long, y = lat, color = whaling, group = group)) +
geom_path(alpha = 0.1) +
geom_polygon(data = plotWorld, aes(group = group), color = "black") +
theme_nothing +
coord_map(proj = "mollweide") +
labs(title = "Training sets for whaling (blue) and non-whaling (red) voyages")
require(class)
crosscut = table(plotData$voyageid, plotData$bin)
# Not sure if normalizing or allowing whaling voyages to be naturally
# longer is better. Classifier accuracy is about the same.
crosscut = (crosscut/rowSums(crosscut))
class(crosscut) = "matrix"
training = crosscut[rownames(crosscut) %in% control$voyageid, ]
# applygroup = crosscut[!rownames(crosscut) %in% control$voyageid,]
applygroup = crosscut
classes = knn(training, training, control$whaling[match(rownames(training),
control$voyageid)], k = 3)
summary(classes)
# First round, only assign _new_ whaling voyages; no old ones.
classes[classes == FALSE & control$whaling[match(rownames(training), control$voyageid)]] = TRUE
classes = knn(training, training, classes, k = 3)
summary(classes)
# classes = knn(training,classes,k=5) classes =
# knn.cv(training,classes,k=5) classes = knn.cv(training,classes,k=5)
results = control[control$voyageid %in% rownames(training), ]
results$derived = classes[match(results$voyageid, rownames(training))]
testing = results[results$voyageid %in% unique(control$voyageid), ]
testing$derived = factor(testing$derived, labels = c("Classifier\nsays not whaling",
"Classifier\nsays whaling"))
testing$whaling = factor(testing$whaling, labels = c("Port-of-call\ndoesn't indicate whaling",
"Port-of-call\nindicates whaling"))
ggplot(testing, aes(x = long, y = lat, color = as.numeric(derived) == as.numeric(whaling),
group = group)) + geom_path(alpha = 0.31) + annotation_map(plotWorld, aes(group = group),
fill = "grey") + facet_grid(derived ~ whaling) + theme_nothing +
labs(title = "Where the classifier disagrees with the port-of-call (red quadrants),\nthe classifier looks more accurate") +
coord_map(proj = "mercator", ylim = c(-60, 75))
plotData = Recenter(data, offset, shapeType = "segment", idfield = "voyageid")
plotData = plotData[!is.na(plotData$derived), ]
summary(plotData$derived)
ggplot(plotData[plotData$Year == 1849, ], aes(x = long, y = lat, color = derived,
group = group)) + geom_path(alpha = 0.31) + annotation_map(map = plotWorld,
aes(group = group), color = "black") +
theme_nothing +
labs(title = "Whaling (blue) and non-whaling voyages in 1849") +
coord_map(proj = "mollweide", ylim = c(-60, 75))
head(plotData)
dim(as.matrix(crosscut))
crosscut = table(plotData$voyageid, plotData$bin)
groups = kmeans(as.matrix(crosscut), centers = 9, iter.max = 25, nstart = 1)
clusters = groups$cluster
plotData$cluster = factor(clusters[match(plotData$voyageid, names(clusters))])
names = daply(plotData, .(cluster), function(cluster) {
paste0("(", round(length(unique(cluster$voyageid))/length(levels(cluster$voyageid)) *
100, 1), "%)")
})
# These names will change with different kmeans clusters
levels(plotData$cluster) = c("Cape of Good Hope", "Around the Horn", "Near Atlantic",
"Europe and Back", "East Coast", "Hawaii", "Strait of Malacca", "Eastern Australia",
"Galapagos")
levels(plotData$cluster) = paste(levels(plotData$cluster), names)
ggplot(plotData, aes(x = long, y = lat, group = group)) + geom_path(alpha = 0.02,
color = "black") + annotation_map(map = plotWorld, aes(group = group), fill = "grey") +
theme_nothing + labs(title = "Clusters of voyages in the Maury collection") +
coord_map(proj = "mercator", ylim = c(-60, 75)) + facet_wrap(~cluster)
# Show common whaling ports
voyageCounts = plotData[!duplicated(plotData$voyageid), c("origin", "destination",
"derived")]
require(reshape2)
r = melt(voyageCounts, id.var = c("derived"))
head(r)
percents = ddply(r, .(value), function(cityframe) {
data.frame(city = cityframe$value[1], total = nrow(cityframe),
whalepercent = sum(as.logical(cityframe$derived == TRUE)) * 100/nrow(cityframe))
}, .progress = "text")
percents = percents[order(-percents$total), ]
head(percents)
capwords <- function(s, strict = FALSE) {
cap <- function(s) paste(toupper(substring(s, 1, 1)), {
s <- substring(s, 2)
if (strict)
tolower(s) else s
}, sep = "", collapse = " ")
sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}
percents$city = capwords(as.character(percents$city), strict = TRUE)
ggplot(percents[1:150, ]) + geom_text(aes(y = whalepercent, x = total, label = city),
size = 3.5) + scale_x_log10() + labs(title = "Which ports are Whaling Ports?",
x = "Number of voyages mentioning location (log scale)",
y = "Percent of voyages through location classified as whalers")
data$bin = factor(round(data$lat/14) * 28000 + round(data$long/14) * 14)
data$numericbin = as.numeric(data$bin)
modellable = dlply(data, .(voyageid), function(voyage) {
tmp = voyage$numericbin - 1
matrix(c(as.numeric(tmp), rep(1, length(as.numeric(tmp)))), nrow = 2, byrow = T)
}, .progress = "text")
prod(sapply(modellable, function(row) {
nrow(row) == 2
}))
lda.collapsed.gibbs.sampler(documents = modellable, K = 15, unique(data$numericbin),
10, 0.1, 0.1)
require(reshape2)
day = median(plotData$time)
length(levels(plotData$voyageid))
day = day + 1
day = median(plotData$time)
day = day - 1000
day = 1
source("~/gibbon/Map Functions.R")
day = 17
SeasonalityPlots = llply(as.vector(1:365), function(day) {
if (!file.exists(paste("~/shipping/ICOADS/images/MauryYear/", formatC(day,
digits = 7, format = "d", flag = "0"), ".png", sep = ""))) {
altered = plotData
if (day < 6) {
altered$Year[altered$yearday > 355 & altered$yearday != 366] = altered$Year[altered$yearday >
355 & altered$yearday != 366] + 1
altered$yearday[altered$yearday > 355 & altered$yearday != 366] = altered$yearday[altered$yearday >
355 & altered$yearday != 366] - 365
}
myplot = mapPlot(altered, mytime = day, timevar = "yearday", timelag = 4,
colorvar = "color", polygons = plotWorld, orientation = c(90, offset,
0), ylim = c(-70, 75), myalpha = 0.6, proj = "gall", lat0 = 10) +
theme_nothing + geom_text(data = colors, aes(label = city, color = color),
size = 4) + annotate("text", x = 260, y = 40, size = 6, label = paste(months(as.Date("1-1-1") +
day - 1), mday(as.Date("1-1-1") + day - 1), sep = "\n"), color = "yellow",
align = 1) + opts(plot.margin = unit(rep(0, 4), "lines"))
# And then save it to a file.
ggsave(filename = paste("~/shipping/ICOADS/images/MauryYear/", formatC(day,
digits = 7, format = "d", flag = "0"), ".png", sep = ""), plot = myplot,
width = 1920/200, height = 1080/200, dpi = 200)
}
}, .progress = "text")
# in shell: for file in 0* ; do cp '$file' '3$file'; done; for file in 0*
# ; do cp '$file' '2$file'; done; for file in 0* ; do cp '$file' '1$file';
# done; Fill in from the middle to make prettier charts I can check on in
# progress:
days = sort(unique(plotData$time[plotData$Year >= 1825 & plotData$Year < 1855]))
# only plot even numbered days to keep the files smaller
days = sort(unique(plotData$time))
length(days)
days = arbitraryOrder(unique(data$time),2)
length(days)
AllPlots = llply(days,function(day) {
if (!file.exists(paste("~/shipping/ICOADS/images/Maury/",
formatC(day,digits=7,format='d',flag='0')
,".png",sep=""))) {
myplot =
mapPlot(
plotData,
mytime=day,
timevar="time",
timelag=45,
colorvar='color',
polygons=plotWorld,
orientation=c(90,offset,0),
ylim=c(-70,75),
xlim=range(plotData$long),
myalpha=.9,
proj='gall',lat0=10) +
theme_nothing+ geom_text(data=colors,aes(label=city,color=color),size=4) +
annotate("text",x=260,y=40,size=6,
label=paste(months(as.Date("1-1-1") + day-1),year(as.Date("1-1-1") + day)-1,sep="\n"),
color='yellow',align=1) +
theme(plot.margin = unit(rep(0, 4), "lines"))
ggsave(filename=paste("~/shipping/ICOADS/images/Maury/",
formatC(day,digits=7,format='d',flag='0')
,".png",sep=""), plot=myplot,width=1920/200,height=1080/200,dpi = 200)
}
},.progress="text")
?ggsave
whaleData = plotData[plotData$derived==TRUE,]
days = sort(unique(whaleData$time[
whaleData$Year>=1830 & whaleData$Year <= 1855]))
days = arbitraryOrder(days,2)
whaleData$color='red'
dim(whaleData)
require(grid)
require(lubridate)
day = day+3000
require(grid)
WhalePlots = llply(days,function(day) {
if (!file.exists(paste("~/shipping/ICOADS/images/whalescar/",
formatC(day,digits=7,format='d',flag='0'),".png",sep=""))) {
myplot =
mapPlot(
whaleData,
mytime=day, timevar="time",
timelag=14,colorvar='color',landfill='#F3E0BD',
polygons=plotWorld,
orientation=c(90,offset,0),
ylim=c(-70,75),
xlim=range(plotData$long), myalpha=.9,
proj='mollweide') +
theme_nothing +
annotate("text",x=260,y=40,size=3.5,
label=paste(months(as.Date("1-1-1") + day-1),
year(as.Date("1-1-1") + day)-1,sep="\n"),
color='black',align=1) +
theme(plot.margin = unit(rep(0, 4), "lines")) +
scale_color_manual(values="red")
#And then save it to a file.
myplot = myplot + geom_path(
data=whaleData[whaleData$time<day,],
color='black',lwd=.3,alpha=.05,aes(group=group))
myplot
ggsave(filename=paste("~/shipping/ICOADS/images/whalescar/",
formatC(day,digits=7,format='d',flag='0')
,".png",sep=""), plot=myplot,width=1920/200,height=1080/200,dpi = 200)
}
},.progress="text")
ggplot()
require(plyr)
require(ggplot2)
require(lubridate)
#plotting Melville's voyages
head(plotData[plotData$derived,])
ggplot(plotData[plotData$derived==TRUE,],aes(x=long,y=lat,group=group)) +
geom_path(alpha=.02,color='black') +
annotation_map(map=plotWorld,aes(group=group),
fill='grey') +
theme_nothing +
labs(title="The Voyages of the Acushnet in the context of American Whaling\n(All ships in black; the Acushnet in green; and Melville's time on board in red)") +
coord_map(proj='mollweide',ylim=c(-60,75)) + geom_path(data=plotData[grep("ACUSHNET",plotData$ID),],alpha=.5,color='green',lwd=3)+ geom_path(data=plotData[grepl("ACUSHNET",plotData$ID) &
plotData$Year >=1841 &
plotData$time <= 672988,],alpha=1,color='red',lwd=3)
ggplot(plotData[plotData$Year < 1815,],aes(x=long,y=lat,group=group)) +
geom_path(alpha=.22,color='black') +
annotation_map(map=plotWorld,aes(group=group),
fill='grey') +
theme_nothing +
labs(title="The 1803-1805 voyages of the Essex (in red)\nin the context of pre-1815 American shipping") +
coord_map(proj='mollweide',ylim=c(-60,75)) +
geom_path(data=plotData[grepl("ESSEX",plotData$ID) & plotData$Year >=1800,],
alpha=1,
color='red',
lwd=3)
melvilleLength=as.Date("1842-7-31")-as.Date("1841-1-1")
plotData$origin[grep("ESSEX",plotData$ID)]
shipDates=plotData$date[grep("ACUSHNET",plotData$ID)]
dates = as.Date("1840-1-1"):max(shipDates)
as.Date("1840-1-1")-dates[1]
ship = data.frame(date = dates,variable="Have Ship Data for Acushnet",value=0)
ship$value[ship$date %in% shipDates] = 1
melvilleaboard = as.Date("1841-1-1"):as.Date("1842-7-31")
melville =data.frame(date = dates,variable="Melville On Acushnet",value=0)
melville$value[melville$date %in% melvilleaboard] = 1
mergt = rbind(ship,melville)
head(mergt)
mergt$date=as.Date(mergt$date,origin=as.Date("1970-01-01"))
ggplot(mergt,aes(x=date,y=variable,alpha=value)) +
geom_tile() +
scale_x_date() +
theme(legend.position="None")+
labs(y="",title="We have nearly-continuous logbooks from the Acushnet except for the beginning of Melville's voyage")
ggplot(data[data$destination %in% names(sort(-table(data$destination)))[1:10],],
aes(x=destination))+geom_bar()
tmp=data[data$voyageid %in%
unique(data$voyageid[
data$lat > -10 & data$lat < 0 & data$long >85 & data$lat < 95]),]
ggplot(data,aes(x=long,y=lat)) + geom_hex(bins=133)+
theme_nothing() +
opts(title="Density of log readings in the Maury collection",
legend.position="right",
panel.background = theme_rect(fill='grey', colour='grey'),
panel.border = theme_blank()) +
scale_fill_gradient("number of observations",trans='log',low="white",high="steelblue")
data$yeargrp = factor(floor(data$Year/10)*10)
data$yeargrp[as.numeric(as.character(data$yeargrp))<1820] = "1820"
data$yeargrp[as.numeric(as.character(data$yeargrp))>1850] = "1850"
data$yeargrp = factor(data$yeargrp)
data$cities = NA
cities = c("NEW ORLEANS")
for (city in cities) {
data$cities[grepl(city,data$origin) | grepl(city,data$destination)]=city
}
data$cities = factor(data$cities)
ggplot(coordShift(data[!is.na(data$cities),],0),
aes(x=long,y=lat,group=voyageid)) +
geom_path(alpha=.1) + theme_nothing() +
opts(title="Ship paths from Maury's collection (ICOANS data)",
panel.background = theme_rect(fill='white', colour='white'),
panel.border = theme_blank()) +
annotate_map(data=world,aes(x=long,y=lat,group=id),drop=T,fill='lightgreen') +
coord_map(proj='mercator',orientation=c(90,0,0)) + facet_grid(yeargrp~cities)
max(world$long)
coordShift =function(myframe,meridian) {
myframe$long[myframe$long>meridian+180] = myframe$long[myframe$long>meridian+180]-360
myframe$long[myframe$long<=meridian-180] = myframe$long[myframe$long<=meridian-180]+360
myframe
}
qplot(coordShift(world,180)$long)
melville = data[grepl("ACUSHNET",data$ID) & data$Year>=1841 & data$time < 687270,c(1:8,14)]