Last active
August 25, 2017 12:40
-
-
Save abresler/33d47c835c0ea86808049244ee422cdb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Code for Introduction to Programming with gdeltr2 | |
## asbcllc.com/blog/2017/august/intro_to_programming_with_gdeltr2/index.html | |
# Package Install --------------------------------------------------------- | |
install.packages("devtools") | |
install.packages("purrr") | |
library(purrr) | |
packages_to_install <- | |
c("dplyr", "rlang", "tidyr", "stringr", "lubridate", "purrrlyr", "plotly", "wordcloud2", | |
"readr","tidyr", "ggplot2", "tidyverse", "tibble", "hrbrthemes", | |
"ggthemes", "jsonlite") | |
purrr::map(packages_to_install, install.packages) | |
# github install ---------------------------------------------------------- | |
## Install from github | |
devtools::install_github("hafen/trelliscopejs") | |
devtools::install_github("jbkunst/highcharter") | |
devtools::install_github("abresler/gdeltr2") | |
# strings ----------------------------------------------------------------- | |
### Start | |
## Strings | |
"My Text" == 'My Text' | |
"My Text" == 'my text' | |
# Objects ----------------------------------------------------------------- | |
### Objects | |
my_favorite_team <- | |
"Brooklyn Nets" | |
favoriteNBAPlayerEver <- | |
"Mitch Richmond" | |
my_favorite_team | |
favoriteNBAPlayerEver | |
my_family <- | |
c("Alex", "Liz", "Chase", "Theo") | |
their_type <- | |
c("Adult", "Adult", "Toy Poodle", "Baby") | |
their_age <- | |
c(33, 32, 2, 0) | |
my_family | |
their_type | |
their_age | |
all_objects <- c(my_family, their_type,their_age) | |
all_objects | |
# Data Frames ------------------------------------------------------------- | |
## Data Frame | |
library(dplyr) | |
df_bresler_family <- | |
data_frame(name = my_family, type = their_type, age = their_age) | |
df_bresler_family | |
View(df_bresler_family) | |
### Working with the API | |
# api --------------------------------------------------------------------- | |
library(gdeltr2) | |
## Terms | |
sports_terms <- | |
c('"Brooklyn Nets"', "Caris LeVert", '"Kyrie Irving" Trade', '"Luka Doncic"', | |
'NBA "Draft Prospect"', '"Jarrett Allen"') | |
political_terms <- | |
c('"Bill Perkins"', '"New York City" "City Counsel"') | |
finance_real_estate_terms <- | |
c("Eastdil", "Condo Bubble", '"JBG Smith"', '"CPPIB"', "Anbang", | |
"WeWork", '"Goldman Sachs"' , 'Blackstone "Real Estate"') | |
other_terms <- | |
c("Supergoop", '"LNG"', 'Maryland "High School Football"', | |
'"Jared Kushner"', '"Eddie Huang"') | |
my_terms <- | |
c(sports_terms, political_terms, finance_real_estate_terms, other_terms) | |
# domains ----------------------------------------------------------------- | |
news_domains <- | |
c("nypost.com", "washingtonpost.com", "wsj.com", "gothamgazette.com") | |
sports_domains <- | |
c("espn.com", "netsdaily.com") | |
finance_real_estate_domains <- | |
c("realdeal.com", "zerohedge.com", "institutionalinvestor.com", 'pionline.com', | |
"curbed.com", "archdaily.com") | |
random_domains <- | |
c("tmz.com", "snopes.com", "alphr.com", "oilprice.com") | |
my_domains <- | |
c(news_domains, sports_domains, finance_real_estate_domains, random_domains) | |
# GKG --------------------------------------------------------------------- | |
df_gkg <- | |
get_gdelt_codebook_ft_api(code_book = "gkg") | |
my_themes <- | |
c("ECON_WORLDCURRENCIES_CHINESE_YUAN", # stories about china's currency -- god way to find stories about china's economy | |
"ECON_BUBBLE", # articles about economic bubble | |
"TAX_FNCACT_BROKER", # articles about brokers of things | |
"ECON_HOUSING_PRICES", # articls about housing prices | |
"ECON_BITCOIN", # articles about bitcoin | |
"ELECTION_FRAUD", # articles about election fraud | |
"SOC_POINTSOFINTEREST_GOVERNMENT_BUILDINGS", # articles about government buildings | |
"WB_1277_BANKRUPTCY_AND_LIQUIDATION", # articles about bankruptcy | |
"WB_639_REPRODUCTIVE_MATERNAL_AND_CHILD_HEALTH", # articles about pregnancy and child health | |
"WB_2151_CHILD_DEVELOPMENT", # articles about child development | |
"TAX_FNCACT_BUILDER" # articles about builders | |
) | |
set.seed(1234) | |
random_themes <- | |
df_gkg %>% pull(idGKGTheme) %>% sample(3) | |
my_themes <- | |
c(my_themes, random_themes) | |
# OCR --------------------------------------------------------------------- | |
my_ocr <- | |
c( | |
"Brooklyn Nets", | |
"Panerai", | |
"Four Seasons", | |
"NBA", | |
"Goldman Sachs", | |
"Philadelphia Eagles", | |
"Supergoop", | |
"Boston Celtics", | |
"Big Baller Brand", | |
"BBB", | |
"Boston Properties" | |
) | |
# imagetags --------------------------------------------------------------- | |
df_imagetags <- | |
get_gdelt_codebook_ft_api(code_book = "imagetags") | |
View(df_imagetags) | |
my_image_tags <- | |
c("Toy Poodle", "poodle", "commercial building", "basketball player", "supermodel") | |
# Image Web --------------------------------------------------------------- | |
df_imageweb <- | |
get_gdelt_codebook_ft_api(code_book = "imageweb") | |
View(df_imageweb) | |
my_image_web <- | |
c("Jared Kushner", "Empire State Building", "New York City", "Ivanka Trump", | |
"Tesla Model 3", "Jeremy Lin", "NBA", "Brooklyn Nets" | |
) | |
# other_parameters -------------------------------------------------------- | |
my_timespan <- | |
"5 days" | |
df_countries <- | |
get_gdelt_codebook_ft_api(code_book = "countries") | |
View(df_countries) | |
my_trelliscope_parameters <- | |
list( | |
rows = 1, | |
columns = 2, | |
path = NULL | |
) | |
# Artlist ----------------------------------------------------------------- | |
get_data_ft_v2_api( | |
terms = my_terms, | |
domains = my_domains, | |
images_web_tag = my_image_web, | |
images_tag = my_image_tags, | |
images_ocr = my_ocr, | |
gkg_themes = my_themes, | |
modes = c("Artlist"), | |
timespans = my_timespan, | |
trelliscope_parameters = my_trelliscope_parameters | |
) | |
trelliscopeImage | |
# Timeline ----------------------------------------------------------------- | |
get_data_ft_v2_api( | |
terms = my_terms, | |
domains = my_domains, | |
images_web_tag = my_image_web, | |
images_tag = my_image_tags, | |
images_ocr = my_ocr, | |
gkg_themes = my_themes, | |
modes = c("TimelineVolInfo"), | |
timespans = "12 Weeks", | |
trelliscope_parameters = my_trelliscope_parameters | |
) | |
trelliscopeHighcharter | |
# wordclouds -------------------------------------------------------------- | |
get_data_ft_v2_api( | |
terms = my_terms, | |
domains = my_domains, | |
images_web_tag = my_image_web, | |
images_tag = my_image_tags, | |
images_ocr = my_ocr, | |
gkg_themes = my_themes, | |
modes = c("WordCloudEnglish", "WordCloudTheme", "WordCloudImageTags", "WordCloudImageWebTags"), | |
timespans = "2 weeks", | |
trelliscope_parameters = list( | |
rows = 1, | |
columns = 1, | |
path = NULL | |
) | |
) | |
trelliscopeWordcloud |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment