Skip to content

Instantly share code, notes, and snippets.

@abresler
Last active August 25, 2017 12:40
Show Gist options
  • Save abresler/33d47c835c0ea86808049244ee422cdb to your computer and use it in GitHub Desktop.
Save abresler/33d47c835c0ea86808049244ee422cdb to your computer and use it in GitHub Desktop.
### Code for Introduction to Programming with gdeltr2
## asbcllc.com/blog/2017/august/intro_to_programming_with_gdeltr2/index.html
# Package Install ---------------------------------------------------------
install.packages("devtools")
install.packages("purrr")
library(purrr)
packages_to_install <-
c("dplyr", "rlang", "tidyr", "stringr", "lubridate", "purrrlyr", "plotly", "wordcloud2",
"readr","tidyr", "ggplot2", "tidyverse", "tibble", "hrbrthemes",
"ggthemes", "jsonlite")
purrr::map(packages_to_install, install.packages)
# github install ----------------------------------------------------------
## Install from github
devtools::install_github("hafen/trelliscopejs")
devtools::install_github("jbkunst/highcharter")
devtools::install_github("abresler/gdeltr2")
# strings -----------------------------------------------------------------
### Start
## Strings
"My Text" == 'My Text'
"My Text" == 'my text'
# Objects -----------------------------------------------------------------
### Objects
my_favorite_team <-
"Brooklyn Nets"
favoriteNBAPlayerEver <-
"Mitch Richmond"
my_favorite_team
favoriteNBAPlayerEver
my_family <-
c("Alex", "Liz", "Chase", "Theo")
their_type <-
c("Adult", "Adult", "Toy Poodle", "Baby")
their_age <-
c(33, 32, 2, 0)
my_family
their_type
their_age
all_objects <- c(my_family, their_type,their_age)
all_objects
# Data Frames -------------------------------------------------------------
## Data Frame
library(dplyr)
df_bresler_family <-
data_frame(name = my_family, type = their_type, age = their_age)
df_bresler_family
View(df_bresler_family)
### Working with the API
# api ---------------------------------------------------------------------
library(gdeltr2)
## Terms
sports_terms <-
c('"Brooklyn Nets"', "Caris LeVert", '"Kyrie Irving" Trade', '"Luka Doncic"',
'NBA "Draft Prospect"', '"Jarrett Allen"')
political_terms <-
c('"Bill Perkins"', '"New York City" "City Counsel"')
finance_real_estate_terms <-
c("Eastdil", "Condo Bubble", '"JBG Smith"', '"CPPIB"', "Anbang",
"WeWork", '"Goldman Sachs"' , 'Blackstone "Real Estate"')
other_terms <-
c("Supergoop", '"LNG"', 'Maryland "High School Football"',
'"Jared Kushner"', '"Eddie Huang"')
my_terms <-
c(sports_terms, political_terms, finance_real_estate_terms, other_terms)
# domains -----------------------------------------------------------------
news_domains <-
c("nypost.com", "washingtonpost.com", "wsj.com", "gothamgazette.com")
sports_domains <-
c("espn.com", "netsdaily.com")
finance_real_estate_domains <-
c("realdeal.com", "zerohedge.com", "institutionalinvestor.com", 'pionline.com',
"curbed.com", "archdaily.com")
random_domains <-
c("tmz.com", "snopes.com", "alphr.com", "oilprice.com")
my_domains <-
c(news_domains, sports_domains, finance_real_estate_domains, random_domains)
# GKG ---------------------------------------------------------------------
df_gkg <-
get_gdelt_codebook_ft_api(code_book = "gkg")
my_themes <-
c("ECON_WORLDCURRENCIES_CHINESE_YUAN", # stories about china's currency -- god way to find stories about china's economy
"ECON_BUBBLE", # articles about economic bubble
"TAX_FNCACT_BROKER", # articles about brokers of things
"ECON_HOUSING_PRICES", # articls about housing prices
"ECON_BITCOIN", # articles about bitcoin
"ELECTION_FRAUD", # articles about election fraud
"SOC_POINTSOFINTEREST_GOVERNMENT_BUILDINGS", # articles about government buildings
"WB_1277_BANKRUPTCY_AND_LIQUIDATION", # articles about bankruptcy
"WB_639_REPRODUCTIVE_MATERNAL_AND_CHILD_HEALTH", # articles about pregnancy and child health
"WB_2151_CHILD_DEVELOPMENT", # articles about child development
"TAX_FNCACT_BUILDER" # articles about builders
)
set.seed(1234)
random_themes <-
df_gkg %>% pull(idGKGTheme) %>% sample(3)
my_themes <-
c(my_themes, random_themes)
# OCR ---------------------------------------------------------------------
my_ocr <-
c(
"Brooklyn Nets",
"Panerai",
"Four Seasons",
"NBA",
"Goldman Sachs",
"Philadelphia Eagles",
"Supergoop",
"Boston Celtics",
"Big Baller Brand",
"BBB",
"Boston Properties"
)
# imagetags ---------------------------------------------------------------
df_imagetags <-
get_gdelt_codebook_ft_api(code_book = "imagetags")
View(df_imagetags)
my_image_tags <-
c("Toy Poodle", "poodle", "commercial building", "basketball player", "supermodel")
# Image Web ---------------------------------------------------------------
df_imageweb <-
get_gdelt_codebook_ft_api(code_book = "imageweb")
View(df_imageweb)
my_image_web <-
c("Jared Kushner", "Empire State Building", "New York City", "Ivanka Trump",
"Tesla Model 3", "Jeremy Lin", "NBA", "Brooklyn Nets"
)
# other_parameters --------------------------------------------------------
my_timespan <-
"5 days"
df_countries <-
get_gdelt_codebook_ft_api(code_book = "countries")
View(df_countries)
my_trelliscope_parameters <-
list(
rows = 1,
columns = 2,
path = NULL
)
# Artlist -----------------------------------------------------------------
get_data_ft_v2_api(
terms = my_terms,
domains = my_domains,
images_web_tag = my_image_web,
images_tag = my_image_tags,
images_ocr = my_ocr,
gkg_themes = my_themes,
modes = c("Artlist"),
timespans = my_timespan,
trelliscope_parameters = my_trelliscope_parameters
)
trelliscopeImage
# Timeline -----------------------------------------------------------------
get_data_ft_v2_api(
terms = my_terms,
domains = my_domains,
images_web_tag = my_image_web,
images_tag = my_image_tags,
images_ocr = my_ocr,
gkg_themes = my_themes,
modes = c("TimelineVolInfo"),
timespans = "12 Weeks",
trelliscope_parameters = my_trelliscope_parameters
)
trelliscopeHighcharter
# wordclouds --------------------------------------------------------------
get_data_ft_v2_api(
terms = my_terms,
domains = my_domains,
images_web_tag = my_image_web,
images_tag = my_image_tags,
images_ocr = my_ocr,
gkg_themes = my_themes,
modes = c("WordCloudEnglish", "WordCloudTheme", "WordCloudImageTags", "WordCloudImageWebTags"),
timespans = "2 weeks",
trelliscope_parameters = list(
rows = 1,
columns = 1,
path = NULL
)
)
trelliscopeWordcloud
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment