Skip to content

Instantly share code, notes, and snippets.

@felixhaass
Last active August 29, 2015 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save felixhaass/11282831 to your computer and use it in GitHub Desktop.
Save felixhaass/11282831 to your computer and use it in GitHub Desktop.
This code reproduces the data and plots in this blog post on each Game of Thrones character's screen time on the show in seasons 1 to 3.
got <- readLines("GoT_screentime_full.txt", encoding="UTF-8")
gotdf <- data.frame(name=vector(),
minutes=vector(),
seconds=vector(),
seasons=vector(),
episode_count=vector())
for(i in 1:length(got)) {
char <- strsplit(got[i], split=" = |\\. |\\(|\\:|; |)")
char <- unlist(char)
gotdf[i, ] <- char[2:6]
}
# fix data types
gotdf[ ,c(2:3,5)] <- apply(gotdf[, c("minutes", "seconds", "episode_count")], 2, FUN= as.numeric)
# calculate screentime in seconds
gotdf$screentime_seconds <- (gotdf$minutes * 60) + gotdf$seconds
# extract family name from 'name' variable
# Beware: gives first name, if no family name provided
for(i in 1:nrow(gotdf)) {
split_name <- unlist(strsplit(gotdf[i, 1], " "))
gotdf[i, "family"] <- tail(split_name, 1)
}
# Generate a Lannister-Stark-Other factor variable
gotdf$sl_dummy <- ifelse(gotdf$family == "Stark", "Stark",
ifelse(gotdf$family == "Lannister", "Lannister", "Other"))
# I manually coded sex
gotdf$sex <- c('m', 'f', 'm', 'f', 'm', 'f', 'f', 'm', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'f', 'f', 'm', 'm', 'm', 'm', 'f', 'f', 'f', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'm', 'm', 'f', 'm', 'm', 'm', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'f', 'm', 'm', 'm', 'm', 'm', 'm', 'f', 'm', 'm', 'm', 'm', 'm')
# extract info in how many seasons character appeared
for(i in 1:nrow(gotdf)) {
seasons <- unlist(strsplit(gotdf[i, "seasons"], ","))
gotdf[i, "season_count"] <- length(seasons)
}
# same thing with ddply:
# ddply(gotdf,
# "name",
# function(df) {
# df[,"season_count"] <- length(unlist(strsplit(df$seasons, ",")))
# return(df)})
#
# I think the loop's more reader-friendly, though, & it doesn't make much difference in terms of speed
# Manually fix Balon Grejoy entry
gotdf[gotdf$name == "Balon Greyjoy", "season_count"] <- 2
#########
# Plots #
#########
library(Cairo)
# plot screentime as a function of episode count
CairoPNG("st_epc.png", width = 800, height = 400)
plot(gotdf$screentime_seconds ~ gotdf$episode_count, xlab = "Episode Count", ylab="Screen time (seconds)")
dev.off()
# Lannister vs. Stark vs. Other screentime boxplot
CairoPNG("lan_stark_other.png", width = 800, height = 400)
boxplot(gotdf$screentime_seconds ~ gotdf$sl_dummy)
dev.off()
# male vs. female character screentime
CairoPNG("got_sex.png", width = 800, height = 400)
boxplot(gotdf$screentime_seconds ~ gotdf$sex)
dev.off()
1. Tyrion Lannister = 166:15 (1,2,3; 28)
2. Daenerys Targaryen = 127:35 (1,2,3; 25)
3. Jon Snow = 126:41 (1,2,3; 24)
4. Arya Stark = 100:11 (1,2,3; 27)
5. Eddard Stark = 94:35 (1; 9)
6. Catelyn Stark = 91:14 (1,2,3; 25)
7. Cersei Lannister = 90:02 (1,2,3; 27)
8. Robb Stark = 89:16 (1,2,3; 22)
9. Sansa Stark = 85:42 (1,2,3; 24)
10. Theon Greyjoy = 84:11 (1,2,3; 23)
11. Jorah Mormont = 72:18 (1,2,3; 24)
12. Jaime Lannister = 71:25 (1,2,3; 19)
13. Samwell Tarly = 61:48 (1,2,3; 19)
14. Petyr Baelish = 60:00 (1,2,3; 19)
15. Bran Stark = 57:56 (1,2,3; 21)
16. Varys = 57:17 (1,2,3; 18)
17. Joffrey Baratheon = 54:22 (1,2,3; 23)
18. Tywin Lannister = 49:39 (1,2,3; 19)
19. Davos Seaworth = 44:00 (2,3; 10)
20. Ygritte = 42:55 (2,3; 12)
21. Shae = 40:45 (1,2,3; 16)
22. Brienne of Tarth = 39:38 (2,3; 14)
23. Gendry = 39:14 (1,2,3; 17)
24. Sandor Clegane = 39:03 (1,2,3; 21)
25. Bronn = 36:54 (1,2,3; 16)
26. Stannis Baratheon = 36:45 (2,3; 12)
27. Margaery Tyrell = 32:51 (2,3; 10)
28. Talisa Maegyr = 32:25 (2,3; 12)
29. Melisandre = 32:22 (2,3; 10)
30. Osha = 30:53 (1,2,3; 14)
31. Robert Baratheon = 29:38 (1; 7)
32. Grand Maester Pycelle = 28:38 (1,2,3; 15)
33. Khal Drogo = 27:20 (1,2; 10)
34. Barristan Selmy = 25:43 (1,3; 13)
35. Renly Baratheon = 25:26 (1,2; 8)
36. Maester Luwin = 24:22 (1,2; 14)
37. Jeor Mormont = 23:42 (1,2,3; 12)
38. Ros = 22:12 (1,2,3; 15)
39. Loras Tyrell = 21:54 (1,2,3; 11)
40. Viserys Targaryen = 20:15 (1; 5)
41. Gilly = 18:08 (2,3; 9)
42. Rodrik Cassel = 16:46 (1,2; 12)
43. Hot Pie = 15:47 (1,2,3; 10)
44. Thoros of Myr = 15:35 (3; 6)
45. Olenna Tyrell = 14:54 (3; 5)
46. Yoren = 13:48 (1,2; 7)
47. Ramsay Snow = 13:43 (3; 6)
48. Roose Bolton = 13:41 (2,3; 11)
49. Edmure Tully = 13:08 (3; 5)
50. Walder Frey = 12:26 (1,3; 3)
51. Brynden Tully = 12:00 (3; 5)
52. Yara Greyjoy = 11:50 (2,3; 5)
53. Missandei = 11:39 (3; 8)
54. Beric Dondarion = 11:07 (1,3; 5)
55. Tormund Giantsbane = 10:27 (3; 7)
56. Daario Naharis = 10:12 (3; 3)
57. Jaqen H’qhar = 8:15 (1,2; 7)
58. Balon Greyjoy = 7:58 (2;3; 3)
59. Qyburn = 7:16 (3; 4)
60. Lysa Arryn = 7:03 (1; 4)
61. Benjen Stark = 7:00 (1; 3)
62. Maester Aemon = 6:49 (1,3; 4)
63. Mance Rayder = 6:22 (3; 3)
64. Syrio Forel = 6:15 (1; 3)
65. Robin Arryn = 4:37 (1; 4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment