This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text title paper automated_readability_index coleman_liau_index flesch_kincaid_grade_level flesch_readability_ease gunning_fog_index n_chars n_polysyllable_words n_sents n_syllables n_unique_words n_words smog_index | |
0 four us states are suing the trump administrat... Four states sue Trump administration over 'un-... https://www.theguardian.com 58.698156 15.299608 46.986883 -45.888247 51.064935 4108 136 7 1284 365 770 28.309659 | |
1 sean spicer, the press secretary for the trump... Sean Spicer holds first Trump administration p... https://www.theguardian.com 18.558549 15.390357 15.867059 31.768431 17.921569 466 14 3 147 66 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
articles = get_articles(the_guardian, breitbart, title_topic="Trump") | |
articles["text"] = articles.text.map(clean_text) | |
articles["text"] = preprocess_articles(articles.text) | |
def get_readability_stats(parsed_articles): | |
stats_list = [] | |
for ix, article in enumerate(parsed_articles): | |
doc = tcy.Doc(article) | |
readability_stats = tcy.text_stats.readability_stats(doc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readxl) | |
library(janitor) | |
library(ggraph) | |
library(igraph) | |
df <- readxl::read_excel("/eurovision_song_contest_1975_2016.xlsx") %>% | |
clean_names() | |
df <- df[, c(5, 6, 1:4, 7)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import spacy | |
from sklearn.decomposition import PCA | |
nlp = spacy.load("en") | |
animals = "dog cat hamster lion tiger elephant cheetah monkey gorilla antelope rabbit mouse rat zoo home pet fluffy wild domesticated" | |
animal_tokens = nlp(animals) | |
animal_vectors = np.vstack([word.vector for word in animal_tokens if word.has_vector]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
library(janitor) | |
library(broom) | |
library(lubridate) | |
# available at https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data | |
temp_loc <- "/Users/conormcdonald/Downloads/GlobalLandTemperatures/GlobalLandTemperaturesByCountry.csv" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
imdb_score_summary <- df %>% | |
summarise_each(funs(min, max, mean, median, sd), imdb_score) %>% | |
gather(stat, value) | |
df %>% | |
ggplot(aes(imdb_score)) + | |
geom_histogram(bins = 50, colour = "black", fill = "red", alpha = 0.4) + | |
theme_minimal() + | |
annotation_custom(tableGrob(imdb_score_summary, rows = NULL), | |
xmin = 2.5, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(here) | |
library(broom) | |
library(corrr) | |
library(forcats) | |
library(stringr) | |
library(lubridate) | |
library(gridExtra) | |
df <- read_csv("movie_metadata.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df %>% map_df(~sum(is.na(.))) %>% glimpse() | |
Observations: 1 | |
Variables: 28 | |
$ color <int> 19 | |
$ director_name <int> 104 | |
$ num_critic_for_reviews <int> 50 | |
$ duration <int> 15 | |
$ director_facebook_likes <int> 104 | |
$ actor_3_facebook_likes <int> 23 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
num_df <- num_df %>% | |
mutate(years_since_release = 2017 - title_year) %>% | |
select(-title_year) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df %>% | |
group_by(title_year) %>% | |
summarise(mean_rating = mean(imdb_score), | |
upper_rating = quantile(imdb_score, 0.975), | |
lower_rating = quantile(imdb_score, 0.0275)) %>% | |
ggplot(aes(title_year, mean_rating)) + | |
geom_line(colour = "dodger blue") + | |
geom_point(alpha = 0.5) + | |
geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) + | |
geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) + |
OlderNewer