Last active
January 27, 2020 03:20
-
-
Save alexklapheke/874157b173c822478ae5cfa0094fae39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
library(ggplot2) | |
library(dplyr) | |
library(lubridate) | |
library(moments) | |
pdf("test.pdf", width=7, height=5) | |
kickstarter <- read.csv("DSI_kickstarterscrape_dataset.csv", header=T) | |
# Fix HTML entity issue | |
kickstarter[kickstarter$category == "Film & Video",]$category <- "Film & Video" | |
# Format dates (Fri, 19 Aug 2011 19:28:17 -0000) | |
kickstarter$funded.date <- as.Date(kickstarter$funded.date, "%a, %d %b %Y %T %z") | |
# Calculate starting date | |
kickstarter$started.date <- kickstarter$funded.date - days(round(kickstarter$duration)) | |
# What is the mean (total) pledge that projects get? (not per backer) | |
# Your answer may vary by +/- 5% | |
mean(kickstarter$pledged, na.rm = T) | |
# Create a histogram that shows the distribution for number of | |
# backers. What is the skew of the distribution? | |
ggplot(kickstarter) + geom_histogram(aes(backers)) + scale_x_continuous(trans="log10") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Distribution of backers", x="Backers (log10)", y="Number of projects") | |
skewness(kickstarter$backers, na.rm=T) | |
# Is the ‘duration’ variable normally distributed? | |
ggplot(kickstarter) + geom_histogram(aes(duration)) | |
shapiro.test(sample(kickstarter$duration, 5000)) | |
ggplot(kickstarter[!is.na(kickstarter$pledged),]) + geom_boxplot(aes(x=category, y=pledged, color=category)) + | |
scale_y_continuous(trans="log10") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Distribution of pledge amounts by category", x="Category", y="Amount (log10 $)", fill="Category") | |
# Presentation graphs | |
kickstarter_median <- kickstarter %>% | |
group_by(category) %>% | |
summarise(median = median(pledged, na.rm = T)) | |
ggplot(kickstarter_median) + geom_bar(aes(x=category, y=median, fill=category), stat="identity") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Median pledge amounts by category", x="Category", y="Amount ($)", fill="Category") | |
kickstarter$start.month <- month(kickstarter$started.date) | |
kickstarter_month <- kickstarter %>% | |
group_by(start.month) %>% | |
summarise(median = median(pledged, na.rm = T)) | |
ggplot(kickstarter_month) + geom_bar(aes(x=start.month, y=median), stat="identity") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Amount pledged by month project started", x="Month", y="Amount pledged ($)") | |
# Success rates by various metrics | |
ggplot(kickstarter[kickstarter$status != "live",]) + geom_histogram(aes(x=duration, fill=status), position="fill") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Success rate by duration", x="Duration (days)", y="Success rate", fill="Status") | |
ggplot(kickstarter[kickstarter$status != "live",]) + geom_bar(aes(x=start.month, fill=status), stat="count", position="fill") + | |
scale_x_continuous(breaks = seq(12)) + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Success rate by month project started", x="Month", y="Success rate", fill="Status") | |
ggplot(kickstarter[kickstarter$status != "live",]) + geom_histogram(aes(x=goal, fill=status), position="fill", bins=20) + | |
scale_x_continuous(trans="log10", breaks = 10^seq(0,7)) + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Success rate by goal", x="Goal (log10 $)", y="Success rate", fill="Status") | |
ggplot(kickstarter[kickstarter$status != "live",]) + geom_bar(aes(x=category, fill=status), stat="count", position="fill") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Success rate by category", x="Category", y="Success rate", fill="Status") | |
kickstarter_median_goal <- kickstarter[kickstarter$status != "live",] %>% | |
group_by(category) %>% | |
summarise(median = median(goal, na.rm = T)) | |
ggplot(kickstarter_median_goal) + geom_bar(aes(x=category, y=median, fill=category), stat="identity") + | |
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) + | |
labs(title="Median goal by category", x="Category", y="Goal ($)", fill="Category") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment