Skip to content

Instantly share code, notes, and snippets.

@wimsy
Last active March 16, 2018 12:14
Show Gist options
  • Save wimsy/8952c75305a138875e745cc57a1e8030 to your computer and use it in GitHub Desktop.
Save wimsy/8952c75305a138875e745cc57a1e8030 to your computer and use it in GitHub Desktop.
R functions to authenticate with and query the Azure Active Directory reporting API for sign-in data.
# aad_reporting_api_signins.R
# Functions to authenticate with Azure AD by OAuth and query signing data from
# Microsoft's Azure AD Reporting API: https://docs.microsoft.com/en-us/azure/active-directory/active-directory-reporting-api-getting-started-azure-portal
# To get a client_id and client_secret, you must register an AAD application
# with appropriate permissions: https://docs.microsoft.com/en-us/azure/active-directory/active-directory-reporting-api-prerequisites-azure-portal
#
# Example usage:
#
# signins <- query_days(ymd("2018-03-01"), ymd("2018-03-07"), save_as_RDS = T)
#
# Will pull data between 3/1/18 and 3/7/18 into a dataframe named signins and
# save each day's data as an indiviudal RDS file in the names folder.
library(httr)
library(jsonlite)
library(tidyverse)
library(lubridate)
library(stringr)
library(purrr)
tenant_name <- "<yourtenant>.onmicrosoft.com"
aad_reporting_signin_endpoint_uri <-
paste0("https://graph.windows.net/",
tenant_name,
"/activities/signinEvents?api-version=beta")
resource_uri <- "https://graph.windows.net"
data_folder <- "./Daily_Signin_Data/" # Where you plan to save your data
format_datetime <- function(datetime_value) {
strftime(datetime_value, "%Y-%m-%dT%H:%M:%SZ")
}
get_aad_oauth_token <- function() {
client_id <- "<your_client_id>"
client_secret <- "<your_client_secret>="
app_name <- "aadreport" # not important for authorization grant flow
base_url <- paste0("https://login.windows.net/",
tenant_name,
"/oauth2")
azure_endpoint <- oauth_endpoint(authorize = NULL,
access = "token",
base_url = base_url)
# Create the app instance.
myapp <- oauth_app(appname = app_name,
key = client_id,
secret = client_secret)
# Step through the authorization chain:
mytoken <- oauth2.0_token(endpoint = azure_endpoint,
app = myapp,
client_credentials = TRUE,
user_params = list(resource = resource_uri),
cache = FALSE)
if (('error' %in% names(mytoken$credentials)) && (nchar(mytoken$credentials$error) > 0)) {
errorMsg <- paste('Error while acquiring token.',
paste('Error message:', mytoken$credentials$error),
paste('Error description:', mytoken$credentials$error_description),
paste('Error code:', mytoken$credentials$error_codes),
sep = '\n')
stop(errorMsg)
}
# Resource API can be accessed through "mytoken" at this point.
mytoken
}
process_signin_data <- function(df) {
# You can change types, filter unwanted columns, etc., here.
df
}
query_aad_signins <- function(query_string) {
mytoken <- get_aad_oauth_token()
result <- NULL
query_url <- paste0(aad_reporting_signin_endpoint_uri,
query_string)
url <- query_url
while(length(url) > 0) {
r <- GET(url,
add_headers(Authorization = paste(mytoken$credentials$token_type,
mytoken$credentials$access_token)))
json <- content(r, as = "text", encoding = "UTF-8")
response <- fromJSON(json, flatten = T)
url <- response$`@odata.nextLink`
print(str_sub(url,-3,-1))
query_response <- response$value %>%
process_signin_data()
result <- bind_rows(result, query_response)
}
result
}
query_day <- function(query_date) {
# Since the API limits you to 120,000 records per query
# I pull one day at a time
start_string <- format_datetime(query_date)
end_dt <- query_date + days(1) - seconds(1)
end_string <- format_datetime(end_dt)
query_string <- paste0("&$filter=",
"signinDateTime+ge+",
start_string,
"+and+signinDateTime+le+",
end_string)
result <- query_aad_signins(query_string)
}
query_days <- function(start_date, end_date = start_date, save_as_RDS = F) {
# Query a date range
query_dates <- seq(start_date, end_date, by=1)
result <- NULL
for (d in query_dates) {
qd <- as.Date(d, origin="1970-01-01")
print(qd)
r <- query_day(qd)
result <- bind_rows(result, r)
if(save_as_RDS) {
filename <- paste0(data_folder,
as.character(qd),
".Rda")
saveRDS(r, filename)
}
}
result
}
load_signin_data <- function(startdate = NULL,
enddate = NULL) {
r <- NULL
rds <- sort(list.files(data_folder, pattern = "*.Rda"))
if (!is.null(startdate)) {
startdate <- format(ymd(startdate), "%Y-%m-%d.Rda")
rds <- subset(rds, rds >= startdate)
}
if (!is.null(enddate)) {
enddate <- format(ymd(enddate), "%Y-%m-%d.Rda")
rds <- subset(rds, rds <= enddate)
}
rds <- paste0(data_folder, rds)
r <- map_df(rds, readRDS) %>%
process_signin_data()
pushover(message="Data loaded.")
}
@wimsy
Copy link
Author

wimsy commented Mar 16, 2018

The code works but I wouldn't say it's good.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment