Skip to content

Instantly share code, notes, and snippets.

@berkorbay
Last active April 26, 2023 14:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save berkorbay/f394bf2ea8775cf7b5316aeafd27c482 to your computer and use it in GitHub Desktop.
Save berkorbay/f394bf2ea8775cf7b5316aeafd27c482 to your computer and use it in GitHub Desktop.
YOK Foreign Students Data Starter Code for Tidyverse (dplyr + ggplot2) and Quarto
---
title: "Yabancı Öğrenci Raporu (Örnek)"
author: "MEF MGMT 553 2022-2023 Bahar Dönemi"
format: pptx
editor: visual
execute:
echo: false
message: false
warning: false
---
```{r,echo=FALSE,message=FALSE,warning=FALSE}
## Hazırlık: tidyverse paketini yükle, excel dosyalarını yükle, dataframei düzenle
library(tidyverse)
raw_df <- readxl::read_excel("/Users/rocket/Downloads/mef_mgmt_quarto/foreign_students_by_nationality_2021_2022.xlsx")
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7)
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric))
```
```{r}
## Örnek grafik: En fazla yabancı öğrenci sağlayan 10 ülkeyi listele
## veriyi hazırla
df1 <- the_df %>%
filter(complete.cases(.)) %>% #Eksik (NA) değerleri olmayan satırları getir
group_by(country) %>% # Ülkelere göre grupla
summarise(total=sum(total)) %>% # Toplam öğrenci sayısını hesapla
arrange(desc(total)) # Büyükten küçüğe sırala
ggplot(
df1 %>% slice(1:10),
aes(
x=reorder(country,-total),
y=total
)
) +
geom_bar(stat="identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle=60,hjust=1)) +
labs(
title="En Fazla Yabancı Öğrencinin Geldiği Ülkeler",
subtitle="Kaynak: 2021-2022 YÖK Verileri",
y="Öğrenci Sayısı",
x=""
)
```
```{r}
## Fonksiyon hazırla
generate_country_plot <- function(the_df,the_country){
country_df<- the_df %>%
filter(complete.cases(.) & country==the_country[1]) %>% arrange(desc(total)) %>%
select(univ_name,male,female,total,univ_type) %>% pivot_longer(.,-c(univ_name,univ_type,total)) %>% arrange(desc(total))
# ggplot(country_df,aes(x=univ_name,y=))
country_plot <- ggplot(country_df %>% slice(1:20),aes(x=reorder(univ_name,total),y=value,fill=name)) + geom_bar(stat="identity",position="stack") + coord_flip() + theme_minimal() + labs(title = paste0(the_country),subtitle="Ülkeden gelen öğrencilerin en fazla yerleştiği 10 üniversite.",y="",x="Öğrenci Sayısı",fill="") + theme(legend.position = "top")
return(country_plot)
}
```
```{r,output='asis'}
for (i in 1:10){
the_country <- df1 %>% select(country) %>% slice(i) %>% unlist()
total_students <- df1 %>% filter(complete.cases(.) & country==the_country[1]) %>% select(total) %>% unlist()
cat(paste0("## ",the_country),"\n\n")
cat(paste0("Öğrenci sayısı: ", total_students,"\n\n"))
# generate_country_plot(the_df,the_country)
knitr::knit_print(generate_country_plot(the_df,the_country))
cat("\n\n")
}
```
---
title: "Yabancı Öğrenci Raporu (Üniversiteler)"
author: "MEF MGMT 553 2022-2023 Bahar Dönemi"
format: html
editor: visual
execute:
echo: false
message: false
warning: false
---
```{r,echo=FALSE,message=FALSE,warning=FALSE}
## Hazırlık: tidyverse paketini yükle, excel dosyalarını yükle, dataframei düzenle
library(tidyverse)
raw_df <- readxl::read_excel("foreign_students_by_nationality_2021_2022.xlsx")
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7)
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric))
n_results = 20
```
```{r}
## Örnek grafik: En fazla yabancı öğrenci sağlayan 10 ülkeyi listele
## veriyi hazırla
df1 <- the_df %>% filter(univ_type=="VAKIF") %>%
filter(complete.cases(.)) %>% #Eksik (NA) değerleri olmayan satırları getir
group_by(univ_name,univ_type) %>% # Ülkelere göre grupla
summarise(total=sum(total)) %>% # Toplam öğrenci sayısını hesapla
arrange(desc(total)) %>% ungroup() %>% ungroup() # Büyükten küçüğe sırala
ggplot(
df1 %>% slice(1:n_results),
aes(
x=reorder(univ_name,total),
y=total,
fill=univ_type
)
) +
geom_bar(stat="identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle=60,hjust=1)) +
labs(
title="En Fazla Yabancı Öğrencinin Bulunduğu Üniversiteler",
subtitle="Kaynak: 2021-2022 YÖK Verileri",
y="Öğrenci Sayısı",
x=""
) + coord_flip()
```
```{r}
## Fonksiyon hazırla
generate_university_plot <- function(the_df,the_university){
university_df<- the_df %>%
filter(complete.cases(.) & univ_name==the_university[1]) %>% arrange(desc(total)) %>% mutate(male=male/sum(total),female=female/sum(total)) %>%
select(country,male,female,total) %>% pivot_longer(.,-c(country,total)) %>% arrange(desc(total))
# ggplot(country_df,aes(x=univ_name,y=))
university_plot <- ggplot(university_df %>% slice(1:20),aes(x=reorder(country,total),y=value,fill=name)) + geom_bar(stat="identity",position="stack") + coord_flip() + theme_minimal() + labs(title = paste0(the_university),subtitle="Üniversiteye gelen öğrencilerin en fazla geldikleri 10 ülke.",y="Öğrenci Yüzdesi",x="",fill="") + theme(legend.position = "top") + scale_y_continuous(labels=scales::percent)
return(university_plot)
}
```
```{r,output='asis',eval=TRUE}
for (i in 1:n_results){
the_university <- df1 %>% filter(univ_type=="VAKIF") %>% select(univ_name) %>% slice(i) %>% unlist()
# the_country <- df1 %>% select(country) %>% slice(i) %>% unlist()
total_students <- df1 %>% filter(complete.cases(.) & univ_name==the_university[1]) %>% select(total) %>% unlist()
cat(paste0("## ",the_university),"\n\n")
cat(paste0("Öğrenci sayısı: ", total_students,"\n\n"))
# generate_country_plot(the_df,the_country)
knitr::knit_print(generate_university_plot(the_df,the_university))
cat("\n\n")
}
```
#install.packages("tidyverse","readxl")
library(tidyverse)
#https://github.com/berkorbay/datasets/raw/master/foreign_students/foreign_students_by_nationality_2021_2022.xlsx
raw_df <- readxl::read_excel("foreign_students_by_nationality_2021_2022.xlsx")
raw_df <- raw_df %>% rename(univ_name=1,univ_type=2,city_name=3,country=4,male=5,female=6,total=7)
the_df <- raw_df %>% mutate(across(c(male,female,total),as.numeric))
the_df %>%
filter(complete.cases(.)) %>%
group_by(country) %>%
summarise(total=sum(total)) %>%
arrange(desc(total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment