jillymackay/Methods.Rmd

## gistfile1.txt
# Visualisations for paper


awabstracts_un %>%
  anti_join(stop_words) %>%
  count(lemword, sort = TRUE) %>%
  top_n (20) %>%
  mutate(text_order = nrow(.):1) %>%
  filter(lemword != "NA") %>%
  ggplot(aes(reorder(lemword, text_order), n)) +
  geom_bar (stat = "identity") +
  labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        panel.background = element_blank(), legend.position = "none")+
  coord_flip()


awpapers %>%
  ggplot (aes(x = Year.Published, fill = Year.Published)) +
  geom_histogram(binwidth = 1) +
  labs (x = "Publication Year", y = "Number of Publications") +
  scale_fill_manual(values = wes_palette("GrandBudapest2")) +
  theme_classic()  +
  theme(axis.text.x = element_text(angle = 90),
        panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none") +
  scale_x_continuous(breaks = seq(1978,2017,1))


by.pub %>%
  ggplot (aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) +
  geom_bar() +
  labs (x = "Source Name", y = "Number of Publications") +
  theme_classic()  +
  theme(axis.text.x = element_text(angle = 90),
        panel.grid = element_blank(), plot.title = element_text(hjust = 0.5),
        legend.position = "none") +
  scale_y_continuous()


## Methods.Rmd
---
title: "Animal Welfare Methods"
author: "Jill MacKay"
date: "1 February 2018"
output: html_document
---

#Overview of literature at present

A Web of Science search in October 2017 reported 406 articles over a 39 year period featuring the topic name "animal welfare" AND "education". Upon review of the abstracts and text of the publications, 200 were excluded for not specifically discussing animal welfare in an educational context, .e.g they proposed furhter education would improve animal wefare, or explored the impact of education on tttitudes to animal welfare. This excluded a large number of sutdies about consumer choice behaviour with regards to animal products. Studies were retained where they explored students' perceptions of animal welfare, as participants in these studies were being recruited specifically beacause of their student role. A further three studies were excluded for being duplicate records. Excluding book reviews, news items and editorial materials, there were 188 publications from 1990 to 2017 relating to education and animal welfare.

## The R Environment
```{r, echo = FALSE, message = FALSE}
library(tidyverse)
library(tidytext)
library(wesanderson)
library(wordcloud)
library(textstem)
library(tm)
```

## The Data
```{r}
awpapers <- readxl::read_excel("/WoS lit Search 13.10.17.1.xlsx",
    sheet = "The 188")
awpapers <- awpapers %>%
  rename (Publication.Type = "PT (Publication Type?)",
          Book.Grp.Authors = GP,
          Author.Full = AF,
          Group.Authors = CA,
          Doc.Type = DT,
          Conference.Date = CY,
          Conf.Location = CL,
          Conf.Sponsors = SP,
          Conf.Host = HO,
          Keywords.Author = DE,
          Keywords.Plus = ID,
          Author.Address = C1,
          Reprint.Address = RP,
          Contact.Email = EM,
          ResearchID = RI,
          OrcID = OI,
          Funding.Agency = FU,
          Funding.Text = FX,
          Cited.Refs = CR,
          Cited.Refs.Count = NR,
          Times.Cited.Core = TC,
          Times.Cited = Z9,
          Usage.180Days = U1,
          Usage.Since2013 = U2,
          Publisher = PU,
          Publisher.City = PI,
          Publisher.Address = PA,
          ISSN = SN,
          eiSSN = EI,
          ISBN = BN,
          Source.Abbrev = J9,
          ISO.Source.Abbrev = JI,
          Date.Published = PD,
          Year.Published = PY,
          Volume = VL,
          Issue = IS,
          Part.Number = PN,
          Supplement = SU,
          Special.Issue = SI,
          Meeting.Abstract = MA,
          Pg.Start = BP,
          Pg.End = EP,
          Article.Number = AR,
          DOI = DI,
          BkDOI = D2,
          Page.Count = PG,
          WoS.Cats = WC,
          Res.Areas = SC,
          Doc.Delivery.Number = GA,
          Accession.Number = UT,
          PubMedID = PM,
          Open.Access.Journal = OA,
          Highly.Cited = HC,
          Hot.Paper = HP,
          Date.Exported = DA) %>%
  select(-Contact.Email, -Author.Address, -Reprint.Address)

```


## What Do The Abstracts Say?
While subjective analyses of the content of these papers is the most valuable aspect of this review, it will start by an objective exploration of the information contained within the metadata. Using R Version 3.4.2  (Short Summer) from the R Foundation for Statistical Computing and R Studio. The `textstem` package was used to lemmatise abstracts. Lemmatisation is a form of language processing which stems words with reference to their grammatical origin, e.g. 'running', 'runs' and 'ran' would be shortened to 'run', while 'runner' would be retained as independent to 'run'. Then the text mining package (`tm`) was used to strip data from the abstracts of these papers and explore most frequent words through the use of document term matrices.

```{r}
awpapers$LemAbstracts <- lemmatize_strings(awpapers$Abstract)
Abstract.corpus <- Corpus(VectorSource(awpapers$LemAbstracts)) %>%
  tm_map(removePunctuation) %>%
  tm_map(removeNumbers) %>%
  tm_map(tolower)  %>%
  tm_map(removeWords, stopwords("english")) %>%
  tm_map(stripWhitespace)

Abstract.dtm <- DocumentTermMatrix(Abstract.corpus)
inspect(Abstract.dtm)
findFreqTerms(Abstract.dtm, 20)


awabstracts <- tibble(text = awpapers$Abstract)
awabstracts <- add_column(awabstracts, paper.id = 1:188)
awabstracts <- awabstracts %>%
  mutate(paper.id = str_c("P", str_pad(paper.id, 3, "left", "0")))
awabstracts_un <- awabstracts %>%
  unnest_tokens(word, text)
awabstracts_un$paper.id <- as.factor(awabstracts_un$paper.id)
awabstracts_un$lemword <- lemmatize_strings(awabstracts_un$word)

aw_word_freq <- awabstracts_un %>%
  anti_join(stop_words) %>%
  count(lemword, sort = TRUE) %>%
  top_n (20) %>%
  mutate(text_order = nrow(.):1) %>%
  filter(lemword != "NA") %>%
  ggplot(aes(reorder(lemword, text_order), n)) +
  geom_bar (stat = "identity") +
  labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") +
  theme_bw() +
  theme(panel.grid = element_blank(), panel.background = element_blank(), legend.position = "none")+
  coord_flip()

aw_word_freq

```

## Publications by Time

```{r}
ByYear <- ggplot (data = awpapers, aes(x = Year.Published)) +
  geom_histogram(binwidth = 1) +
  labs (title = "Publications by year (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Publication Year", y = "Number of Publications") +
  theme_bw()  +
  theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") +
  scale_x_continuous(breaks = seq(1978,2017,1))
# Remember to check via summary(awpapers$Year.Published) that the x scale is right
ByYear

```


## Publications by Source

```{r}
by.pub <-
  awpapers %>%
  filter(Source.Abbrev !="") %>%
  group_by(Source.Abbrev)
by.pub <- within(by.pub, Source.Abbrev <- factor(Source.Abbrev,
                                                 levels=names(sort(table(Source.Abbrev),
                                                                   decreasing=TRUE))))
table(by.pub$Source.Abbrev) # Check how many levels you want to retain
table(fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals with <3 Publications")) # Check this looks right

by.pub <- awpapers %>%
  filter (Source.Abbrev !="")%>%
  mutate (Source.Short = fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals With <3 Publications")) %>%
  group_by(Source.Short)


BySource <- ggplot (data = by.pub, aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) +
  geom_bar() +
  labs (title = "Publications by source title (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Source Name", y = "N Publications") +
  theme_classic()  +
  theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") +
  scale_y_continuous()
BySource
```

## What Are The Papers About

```{r}
summary(as.factor(awpapers$`Animal Category`))
summary(as.factor(awpapers$`People Category`))
summary(as.factor(awpapers$`Paper Purpose`))


tb_cats <- tibble(animal = awpapers$`Animal Category`,
                  audience = awpapers$`People Category`,
                  purpose = awpapers$`Paper Purpose`,
                  Year.Published = awpapers$Year.Published,
                  Source.Abbrev = awpapers$Source.Abbrev)

lv.a <- c("Captive Wild Animals", "Companion Animals", "Equine", "General", "Laboratory animals", "NA", "Production Animals", "Wild Animals")
lv.ppl <- c("Adolescents", "Children", "Industry", "NA", "Public", "teachers", "university", "Veterinary")
lv.p <- c("Animal-Welfare", "Attitudes", "Call to Action", "NA", "Pedagogy Practice Review")

tb_cats <- tb_cats %>%
  as_factor(animal, lv.a) %>%
  as_factor(audience, lv.ppl) %>%
  as_factor(purpose, lv.p)

```
	# Visualisations for paper


	awabstracts_un %>%
	anti_join(stop_words) %>%
	count(lemword, sort = TRUE) %>%
	top_n (20) %>%
	mutate(text_order = nrow(.):1) %>%
	filter(lemword != "NA") %>%
	ggplot(aes(reorder(lemword, text_order), n)) +
	geom_bar (stat = "identity") +
	labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") +
	theme_bw() +
	theme(panel.grid = element_blank(),
	panel.background = element_blank(), legend.position = "none")+
	coord_flip()


	awpapers %>%
	ggplot (aes(x = Year.Published, fill = Year.Published)) +
	geom_histogram(binwidth = 1) +
	labs (x = "Publication Year", y = "Number of Publications") +
	scale_fill_manual(values = wes_palette("GrandBudapest2")) +
	theme_classic() +
	theme(axis.text.x = element_text(angle = 90),
	panel.grid = element_blank(),
	plot.title = element_text(hjust = 0.5),
	legend.position = "none") +
	scale_x_continuous(breaks = seq(1978,2017,1))


	by.pub %>%
	ggplot (aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) +
	geom_bar() +
	labs (x = "Source Name", y = "Number of Publications") +
	theme_classic() +
	theme(axis.text.x = element_text(angle = 90),
	panel.grid = element_blank(), plot.title = element_text(hjust = 0.5),
	legend.position = "none") +
	scale_y_continuous()
	---
	title: "Animal Welfare Methods"
	author: "Jill MacKay"
	date: "1 February 2018"
	output: html_document
	---

	#Overview of literature at present

	A Web of Science search in October 2017 reported 406 articles over a 39 year period featuring the topic name "animal welfare" AND "education". Upon review of the abstracts and text of the publications, 200 were excluded for not specifically discussing animal welfare in an educational context, .e.g they proposed furhter education would improve animal wefare, or explored the impact of education on tttitudes to animal welfare. This excluded a large number of sutdies about consumer choice behaviour with regards to animal products. Studies were retained where they explored students' perceptions of animal welfare, as participants in these studies were being recruited specifically beacause of their student role. A further three studies were excluded for being duplicate records. Excluding book reviews, news items and editorial materials, there were 188 publications from 1990 to 2017 relating to education and animal welfare.

	## The R Environment
	```{r, echo = FALSE, message = FALSE}
	library(tidyverse)
	library(tidytext)
	library(wesanderson)
	library(wordcloud)
	library(textstem)
	library(tm)
	```

	## The Data
	```{r}
	awpapers <- readxl::read_excel("/WoS lit Search 13.10.17.1.xlsx",
	sheet = "The 188")
	awpapers <- awpapers %>%
	rename (Publication.Type = "PT (Publication Type?)",
	Book.Grp.Authors = GP,
	Author.Full = AF,
	Group.Authors = CA,
	Doc.Type = DT,
	Conference.Date = CY,
	Conf.Location = CL,
	Conf.Sponsors = SP,
	Conf.Host = HO,
	Keywords.Author = DE,
	Keywords.Plus = ID,
	Author.Address = C1,
	Reprint.Address = RP,
	Contact.Email = EM,
	ResearchID = RI,
	OrcID = OI,
	Funding.Agency = FU,
	Funding.Text = FX,
	Cited.Refs = CR,
	Cited.Refs.Count = NR,
	Times.Cited.Core = TC,
	Times.Cited = Z9,
	Usage.180Days = U1,
	Usage.Since2013 = U2,
	Publisher = PU,
	Publisher.City = PI,
	Publisher.Address = PA,
	ISSN = SN,
	eiSSN = EI,
	ISBN = BN,
	Source.Abbrev = J9,
	ISO.Source.Abbrev = JI,
	Date.Published = PD,
	Year.Published = PY,
	Volume = VL,
	Issue = IS,
	Part.Number = PN,
	Supplement = SU,
	Special.Issue = SI,
	Meeting.Abstract = MA,
	Pg.Start = BP,
	Pg.End = EP,
	Article.Number = AR,
	DOI = DI,
	BkDOI = D2,
	Page.Count = PG,
	WoS.Cats = WC,
	Res.Areas = SC,
	Doc.Delivery.Number = GA,
	Accession.Number = UT,
	PubMedID = PM,
	Open.Access.Journal = OA,
	Highly.Cited = HC,
	Hot.Paper = HP,
	Date.Exported = DA) %>%
	select(-Contact.Email, -Author.Address, -Reprint.Address)

	```


	## What Do The Abstracts Say?
	While subjective analyses of the content of these papers is the most valuable aspect of this review, it will start by an objective exploration of the information contained within the metadata. Using R Version 3.4.2 (Short Summer) from the R Foundation for Statistical Computing and R Studio. The `textstem` package was used to lemmatise abstracts. Lemmatisation is a form of language processing which stems words with reference to their grammatical origin, e.g. 'running', 'runs' and 'ran' would be shortened to 'run', while 'runner' would be retained as independent to 'run'. Then the text mining package (`tm`) was used to strip data from the abstracts of these papers and explore most frequent words through the use of document term matrices.

	```{r}
	awpapers$LemAbstracts <- lemmatize_strings(awpapers$Abstract)
	Abstract.corpus <- Corpus(VectorSource(awpapers$LemAbstracts)) %>%
	tm_map(removePunctuation) %>%
	tm_map(removeNumbers) %>%
	tm_map(tolower) %>%
	tm_map(removeWords, stopwords("english")) %>%
	tm_map(stripWhitespace)

	Abstract.dtm <- DocumentTermMatrix(Abstract.corpus)
	inspect(Abstract.dtm)
	findFreqTerms(Abstract.dtm, 20)






	awabstracts <- tibble(text = awpapers$Abstract)
	awabstracts <- add_column(awabstracts, paper.id = 1:188)
	awabstracts <- awabstracts %>%
	mutate(paper.id = str_c("P", str_pad(paper.id, 3, "left", "0")))
	awabstracts_un <- awabstracts %>%
	unnest_tokens(word, text)
	awabstracts_un$paper.id <- as.factor(awabstracts_un$paper.id)
	awabstracts_un$lemword <- lemmatize_strings(awabstracts_un$word)

	aw_word_freq <- awabstracts_un %>%
	anti_join(stop_words) %>%
	count(lemword, sort = TRUE) %>%
	top_n (20) %>%
	mutate(text_order = nrow(.):1) %>%
	filter(lemword != "NA") %>%
	ggplot(aes(reorder(lemword, text_order), n)) +
	geom_bar (stat = "identity") +
	labs (x = "Word", y = "Frequency in animal welfare education abstracts (n = 188)") +
	theme_bw() +
	theme(panel.grid = element_blank(), panel.background = element_blank(), legend.position = "none")+
	coord_flip()

	aw_word_freq

	```

	## Publications by Time

	```{r}
	ByYear <- ggplot (data = awpapers, aes(x = Year.Published)) +
	geom_histogram(binwidth = 1) +
	labs (title = "Publications by year (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Publication Year", y = "Number of Publications") +
	theme_bw() +
	theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") +
	scale_x_continuous(breaks = seq(1978,2017,1))
	# Remember to check via summary(awpapers$Year.Published) that the x scale is right
	ByYear

	```


	## Publications by Source

	```{r}
	by.pub <-
	awpapers %>%
	filter(Source.Abbrev !="") %>%
	group_by(Source.Abbrev)
	by.pub <- within(by.pub, Source.Abbrev <- factor(Source.Abbrev,
	levels=names(sort(table(Source.Abbrev),
	decreasing=TRUE))))
	table(by.pub$Source.Abbrev) # Check how many levels you want to retain
	table(fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals with <3 Publications")) # Check this looks right

	by.pub <- awpapers %>%
	filter (Source.Abbrev !="")%>%
	mutate (Source.Short = fct_lump(by.pub$Source.Abbrev, n = 13, other_level = "Journals With <3 Publications")) %>%
	group_by(Source.Short)



	BySource <- ggplot (data = by.pub, aes(x = fct_reorder(Source.Short, Source.Short, length, .desc = TRUE))) +
	geom_bar() +
	labs (title = "Publications by source title (n = 188) in Web of Science Search \n Topics = 'Animal Welfare' AND 'Education'", x = "Source Name", y = "N Publications") +
	theme_classic() +
	theme(axis.text.x = element_text(angle = 90), panel.grid = element_blank(), plot.title = element_text(hjust = 0.5), legend.position = "none") +
	scale_y_continuous()
	BySource
	```

	## What Are The Papers About

	```{r}
	summary(as.factor(awpapers$`Animal Category`))
	summary(as.factor(awpapers$`People Category`))
	summary(as.factor(awpapers$`Paper Purpose`))


	tb_cats <- tibble(animal = awpapers$`Animal Category`,
	audience = awpapers$`People Category`,
	purpose = awpapers$`Paper Purpose`,
	Year.Published = awpapers$Year.Published,
	Source.Abbrev = awpapers$Source.Abbrev)

	lv.a <- c("Captive Wild Animals", "Companion Animals", "Equine", "General", "Laboratory animals", "NA", "Production Animals", "Wild Animals")
	lv.ppl <- c("Adolescents", "Children", "Industry", "NA", "Public", "teachers", "university", "Veterinary")
	lv.p <- c("Animal-Welfare", "Attitudes", "Call to Action", "NA", "Pedagogy Practice Review")

	tb_cats <- tb_cats %>%
	as_factor(animal, lv.a) %>%
	as_factor(audience, lv.ppl) %>%
	as_factor(purpose, lv.p)

	```