Skip to content

Instantly share code, notes, and snippets.

@bbarrilleaux
Created February 18, 2014 05:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbarrilleaux/9064953 to your computer and use it in GitHub Desktop.
Save bbarrilleaux/9064953 to your computer and use it in GitHub Desktop.
UFO sightings
1 California CA 36756666 403931.96 91 155959 235.68
2 Texas TX 24326974 678051.12 35.88 261797 92.92
3 New York NY 19490297 122283.7 159.39 47214 412.81
4 Florida FL 18328340 139760.29 131.14 53927 339.87
5 Illinois IL 12901563 143961.9 89.62 55584 232.11
6 Pennsylvania PA 12448279 116075.5 107.24 44817 277.76
7 Ohio OH 11485910 106054.83 108.3 40948 280.5
8 Michigan MI 10003422 147121.68 67.99 56804 176.1
9 Georgia GA 9685744 149975.85 64.58 57906 167.27
10 North Carolina NC 9222414 126160.91 73.1 48711 189.33
11 New Jersey NJ 8682661 19209.94 451.99 7417 1170.64
12 Virginia VA 7769089 102547.99 75.76 39594 196.22
13 Washington WA 6549224 172348.17 38 66544 98.42
14 Arizona AZ 6500180 294313.3 22.09 113635 57.2
15 Massachusetts MA 6497967 20305.51 320.01 7840 828.82
16 Indiana IN 6376792 92895.1 68.65 35867 177.79
17 Tennessee TN 6214888 106751.54 58.22 41217 150.78
18 Missouri MO 5911605 178413.92 33.13 68886 85.82
19 Maryland MD 5633597 25314.54 222.54 9774 576.39
20 Wisconsin WI 5627967 140662.25 40.01 54310 103.63
21 Minnesota MN 5220393 206188.95 25.32 79610 65.57
22 Colorado CO 4939456 268628.39 18.39 103718 47.62
23 Alabama AL 4661900 131426.36 35.47 50744 91.87
24 South Carolina SC 4479800 77981.95 57.45 30109 148.79
25 Louisiana LA 4410796 112825.06 39.09 43562 101.25
26 Kentucky KY 4269245 102895.05 41.49 39728 107.46
27 Oregon OR 3790060 268631.09 14.11 95997 39.48
28 Oklahoma OK 3642361 177846.71 20.48 68667 53.04
29 Connecticut CT 3501252 12548.49 279.02 4845 722.65
30 Iowa IA 3002555 144700.05 20.75 55869 53.74
31 Mississippi MS 2938618 121488.57 24.19 46907 62.65
32 Arkansas AR 2855390 134856 21.17 52068 54.84
33 Kansas KS 2802134 211899.88 13.22 81815 34.25
34 Utah UT 2736424 212751.98 12.86 82144 33.31
35 Nevada NV 2600167 284448.03 9.14 109826 23.68
36 New Mexico NM 1984356 314310.6 6.31 121356 16.35
37 West Virginia WV 1814468 62361.73 29.1 24078 75.36
38 Nebraska NE 1783432 199097.57 8.96 76872 23.2
39 Idaho ID 1523816 214313.75 7.11 82747 18.42
40 Maine ME 1316456 79932.21 16.47 30862 42.66
41 New Hampshire NH 1315809 23227.01 56.65 8968 146.72
42 Hawaii HI 1288198 16635.49 77.44 6423 200.56
43 Rhode Island RI 1050788 2706.54 388.24 1045 1005.54
44 Montana MT 967440 376977.95 2.57 145552 6.65
45 Delaware DE 873092 5060.84 172.52 1954 446.82
46 South Dakota SD 804194 196541.25 4.09 75885 10.6
47 Alaska AK 686293 1481346 0.46 571951 1.2
48 North Dakota ND 641481 178647.02 3.59 68976 9.3
49 Vermont VT 621270 23957.39 25.93 9250 67.16
50 Wyoming WY 532668 251487.85 2.12 97100 5.49
library("maps")
library("ggplot2")
library("RColorBrewer")
library("mapproj")
# data from infochimps, http://www.infochimps.com/datasets/60000-documented-ufo-sightings-with-text-descriptions-and-metada
tsvfile <- "chimps_16154-2010-10-20_14-33-35/ufo_awesome.tsv"
ufo <- read.table(tsvfile, sep ="\t", fill=TRUE, stringsAsFactors = FALSE)
ufo$state <- sapply(ufo$V3, function (x) strsplit(as.character(x), ", ")[[1]][2])
ufo$statename <- tolower(state.name[match(ufo$state, state.abb)])
# this file is given in the gist:
popfile <- "statepops.csv"
statePops <- read.csv(popfile, header = FALSE)
statePops <- statePops[, 3 : 4]
statePops$region <- tolower(state.name[match(statePops$V3, state.abb)])
names(statePops)[2] <- "population"
stateTable <- unlist(table(ufo$statename))
stateDF <- as.data.frame(stateTable)
names(stateDF) <- c("region", "UFO")
stateDF <- merge(stateDF, statePops)
stateDF$UFOpercapita <- stateDF$UFO * 10000 / stateDF$population
# set up a theme for the map
new_theme_empty <- theme_bw()
new_theme_empty$line <- element_blank()
new_theme_empty$rect <- element_blank()
new_theme_empty$strip.text <- element_blank()
new_theme_empty$axis.text <- element_blank()
new_theme_empty$axis.title <- element_blank()
new_theme_empty$plot.margin <- structure(c(0, 0, -1, -1), unit = "lines", valid.unit = 3L, class = "unit")
stateShapes <- map("state", plot = FALSE, fill = TRUE)
stateShapes <- fortify(stateShapes) # Load state shapefiles and convert to a data.frame
stateShapes$UFO = stateDF$UFOpercapita[match(stateShapes$region, stateDF$region)]
# make the map!
myPalette <- brewer.pal(9,"PuRd")
mapPlot <- ggplot(stateShapes,
aes(x = long, y = lat, group = group,
fill = UFO))
mapPlot <- mapPlot + geom_polygon(colour = "BLACK")
mapPlot <- mapPlot + coord_map(project="conic", lat0 = 30)
mapPlot <- mapPlot + new_theme_empty
mapPlot <- mapPlot + scale_fill_gradientn("UFO sightings per 10,000 residents",
colours = myPalette)
mapPlot <- mapPlot + ggtitle("UFO sightings, 1995-2010")
print(mapPlot)
# make the word cloud!
library(tm)
library(wordcloud)
WAtext <- data.frame(as.character(ufo[ufo$state == "WA", 6]))
WAtext <- data.frame(WAtext[!is.na(WAtext)])
WA.corpus <- Corpus(DataframeSource(WAtext))
WA.corpus <- tm_map(WA.corpus, removePunctuation)
WA.corpus <- tm_map(WA.corpus, tolower)
WA.corpus <- tm_map(WA.corpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(WA.corpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
pal <- brewer.pal(9, "BuGn")
pal <- pal[-(1:2)]
wordcloud(d$word, d$freq, scale = c(5, .5), min.freq = 3, max.words = 60, random.order = TRUE, rot.per = 0.15, colors = pal, vfont = c("sans serif", "plain"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment