Last active
January 6, 2020 14:55
-
-
Save jillymackay/06b27e982b4f412349fc0973f224e0c5 to your computer and use it in GitHub Desktop.
DBER-Animal Welfare code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Visualisations for paper | |
awabstracts_un %>% | |
anti_join(stop_words) %>% | |
count(lemword, sort = TRUE) %>% | |
top_n (20) %>% | |
mutate(text_order = nrow(.):1) %>% | |
filter(lemword != "NA") %>% | |
ggplot(aes(reorder(lemword, text_order), n)) + | |
geom_bar (stat = "identity") + | |
labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") + | |
theme_bw() + | |
theme(panel.grid = element_blank(), | |
panel.background = element_blank(), legend.position = "none")+ | |
coord_flip() | |
awpapers %>% | |
ggplot (aes(x = Year.Published, fill = Year.Published)) + | |
geom_histogram(binwidth = 1) + | |
labs (x = "Publication Year", y = "Number of Publications") + | |
scale_fill_manual(values = wes_palette("GrandBudapest2")) + | |
theme_classic() + | |
theme(axis.text.x = element_text(angle = 90), | |
panel.grid = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
legend.position = "none") + | |
scale_x_continuous(breaks = seq(1978,2017,1)) | |
by.pub %>% | |
ggplot (aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) + | |
geom_bar() + | |
labs (x = "Source Name", y = "Number of Publications") + | |
theme_classic() + | |
theme(axis.text.x = element_text(angle = 90), | |
panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), | |
legend.position = "none") + | |
scale_y_continuous() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Animal Welfare Methods" | |
author: "Jill MacKay" | |
date: "1 February 2018" | |
output: html_document | |
--- | |
#Overview of literature at present | |
A Web of Science search in October 2017 reported 406 articles over a 39 year period featuring the topic name "animal welfare" AND "education". Upon review of the abstracts and text of the publications, 200 were excluded for not specifically discussing animal welfare in an educational context, .e.g they proposed furhter education would improve animal wefare, or explored the impact of education on tttitudes to animal welfare. This excluded a large number of sutdies about consumer choice behaviour with regards to animal products. Studies were retained where they explored students' perceptions of animal welfare, as participants in these studies were being recruited specifically beacause of their student role. A further three studies were excluded for being duplicate records. Excluding book reviews, news items and editorial materials, there were 188 publications from 1990 to 2017 relating to education and animal welfare. | |
## The R Environment | |
```{r, echo = FALSE, message = FALSE} | |
library(tidyverse) | |
library(tidytext) | |
library(wesanderson) | |
library(wordcloud) | |
library(textstem) | |
library(tm) | |
``` | |
## The Data | |
```{r} | |
awpapers <- readxl::read_excel("/WoS lit Search 13.10.17.1.xlsx", | |
sheet = "The 188") | |
awpapers <- awpapers %>% | |
rename (Publication.Type = "PT (Publication Type?)", | |
Book.Grp.Authors = GP, | |
Author.Full = AF, | |
Group.Authors = CA, | |
Doc.Type = DT, | |
Conference.Date = CY, | |
Conf.Location = CL, | |
Conf.Sponsors = SP, | |
Conf.Host = HO, | |
Keywords.Author = DE, | |
Keywords.Plus = ID, | |
Author.Address = C1, | |
Reprint.Address = RP, | |
Contact.Email = EM, | |
ResearchID = RI, | |
OrcID = OI, | |
Funding.Agency = FU, | |
Funding.Text = FX, | |
Cited.Refs = CR, | |
Cited.Refs.Count = NR, | |
Times.Cited.Core = TC, | |
Times.Cited = Z9, | |
Usage.180Days = U1, | |
Usage.Since2013 = U2, | |
Publisher = PU, | |
Publisher.City = PI, | |
Publisher.Address = PA, | |
ISSN = SN, | |
eiSSN = EI, | |
ISBN = BN, | |
Source.Abbrev = J9, | |
ISO.Source.Abbrev = JI, | |
Date.Published = PD, | |
Year.Published = PY, | |
Volume = VL, | |
Issue = IS, | |
Part.Number = PN, | |
Supplement = SU, | |
Special.Issue = SI, | |
Meeting.Abstract = MA, | |
Pg.Start = BP, | |
Pg.End = EP, | |
Article.Number = AR, | |
DOI = DI, | |
BkDOI = D2, | |
Page.Count = PG, | |
WoS.Cats = WC, | |
Res.Areas = SC, | |
Doc.Delivery.Number = GA, | |
Accession.Number = UT, | |
PubMedID = PM, | |
Open.Access.Journal = OA, | |
Highly.Cited = HC, | |
Hot.Paper = HP, | |
Date.Exported = DA) %>% | |
select(-Contact.Email, -Author.Address, -Reprint.Address) | |
``` | |
## What Do The Abstracts Say? | |
While subjective analyses of the content of these papers is the most valuable aspect of this review, it will start by an objective exploration of the information contained within the metadata. Using R Version 3.4.2 (Short Summer) from the R Foundation for Statistical Computing and R Studio. The `textstem` package was used to lemmatise abstracts. Lemmatisation is a form of language processing which stems words with reference to their grammatical origin, e.g. 'running', 'runs' and 'ran' would be shortened to 'run', while 'runner' would be retained as independent to 'run'. Then the text mining package (`tm`) was used to strip data from the abstracts of these papers and explore most frequent words through the use of document term matrices. | |
```{r} | |
awpapers$LemAbstracts <- lemmatize_strings(awpapers$Abstract) | |
Abstract.corpus <- Corpus(VectorSource(awpapers$LemAbstracts)) %>% | |
tm_map(removePunctuation) %>% | |
tm_map(removeNumbers) %>% | |
tm_map(tolower) %>% | |
tm_map(removeWords, stopwords("english")) %>% | |
tm_map(stripWhitespace) | |
Abstract.dtm <- DocumentTermMatrix(Abstract.corpus) | |
inspect(Abstract.dtm) | |
findFreqTerms(Abstract.dtm, 20) | |
awabstracts <- tibble(text = awpapers$Abstract) | |
awabstracts <- add_column(awabstracts, paper.id = 1:188) | |
awabstracts <- awabstracts %>% | |
mutate(paper.id = str_c("P", str_pad(paper.id, 3, "left", "0"))) | |
awabstracts_un <- awabstracts %>% | |
unnest_tokens(word, text) | |
awabstracts_un$paper.id <- as.factor(awabstracts_un$paper.id) | |
awabstracts_un$lemword <- lemmatize_strings(awabstracts_un$word) | |
aw_word_freq <- awabstracts_un %>% | |
anti_join(stop_words) %>% | |
count(lemword, sort = TRUE) %>% | |
top_n (20) %>% | |
mutate(text_order = nrow(.):1) %>% | |
filter(lemword != "NA") %>% | |
ggplot(aes(reorder(lemword, text_order), n)) + | |
geom_bar (stat = "identity") + | |
labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") + | |
theme_bw() + | |
theme(panel.grid = element_blank(), panel.background = element_blank(), legend.position = "none")+ | |
coord_flip() | |
aw_word_freq | |
``` | |
## Publications by Time | |
```{r} | |
ByYear <- ggplot (data = awpapers, aes(x = Year.Published)) + | |
geom_histogram(binwidth = 1) + | |
labs (title = "Publications by year (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Publication Year", y = "Number of Publications") + | |
theme_bw() + | |
theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") + | |
scale_x_continuous(breaks = seq(1978,2017,1)) | |
# Remember to check via summary(awpapers$Year.Published) that the x scale is right | |
ByYear | |
``` | |
## Publications by Source | |
```{r} | |
by.pub <- | |
awpapers %>% | |
filter(Source.Abbrev !="") %>% | |
group_by(Source.Abbrev) | |
by.pub <- within(by.pub, Source.Abbrev <- factor(Source.Abbrev, | |
levels=names(sort(table(Source.Abbrev), | |
decreasing=TRUE)))) | |
table(by.pub$Source.Abbrev) # Check how many levels you want to retain | |
table(fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals with <3 Publications")) # Check this looks right | |
by.pub <- awpapers %>% | |
filter (Source.Abbrev !="")%>% | |
mutate (Source.Short = fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals With <3 Publications")) %>% | |
group_by(Source.Short) | |
BySource <- ggplot (data = by.pub, aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) + | |
geom_bar() + | |
labs (title = "Publications by source title (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Source Name", y = "N Publications") + | |
theme_classic() + | |
theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") + | |
scale_y_continuous() | |
BySource | |
``` | |
## What Are The Papers About | |
```{r} | |
summary(as.factor(awpapers$`Animal Category`)) | |
summary(as.factor(awpapers$`People Category`)) | |
summary(as.factor(awpapers$`Paper Purpose`)) | |
tb_cats <- tibble(animal = awpapers$`Animal Category`, | |
audience = awpapers$`People Category`, | |
purpose = awpapers$`Paper Purpose`, | |
Year.Published = awpapers$Year.Published, | |
Source.Abbrev = awpapers$Source.Abbrev) | |
lv.a <- c("Captive Wild Animals", "Companion Animals", "Equine", "General", "Laboratory animals", "NA", "Production Animals", "Wild Animals") | |
lv.ppl <- c("Adolescents", "Children", "Industry", "NA", "Public", "teachers", "university", "Veterinary") | |
lv.p <- c("Animal-Welfare", "Attitudes", "Call to Action", "NA", "Pedagogy Practice Review") | |
tb_cats <- tb_cats %>% | |
as_factor(animal, lv.a) %>% | |
as_factor(audience, lv.ppl) %>% | |
as_factor(purpose, lv.p) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment