Last active
March 16, 2018 12:14
-
-
Save wimsy/8952c75305a138875e745cc57a1e8030 to your computer and use it in GitHub Desktop.
R functions to authenticate with and query the Azure Active Directory reporting API for sign-in data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# aad_reporting_api_signins.R | |
# Functions to authenticate with Azure AD by OAuth and query signing data from | |
# Microsoft's Azure AD Reporting API: https://docs.microsoft.com/en-us/azure/active-directory/active-directory-reporting-api-getting-started-azure-portal | |
# To get a client_id and client_secret, you must register an AAD application | |
# with appropriate permissions: https://docs.microsoft.com/en-us/azure/active-directory/active-directory-reporting-api-prerequisites-azure-portal | |
# | |
# Example usage: | |
# | |
# signins <- query_days(ymd("2018-03-01"), ymd("2018-03-07"), save_as_RDS = T) | |
# | |
# Will pull data between 3/1/18 and 3/7/18 into a dataframe named signins and | |
# save each day's data as an indiviudal RDS file in the names folder. | |
library(httr) | |
library(jsonlite) | |
library(tidyverse) | |
library(lubridate) | |
library(stringr) | |
library(purrr) | |
tenant_name <- "<yourtenant>.onmicrosoft.com" | |
aad_reporting_signin_endpoint_uri <- | |
paste0("https://graph.windows.net/", | |
tenant_name, | |
"/activities/signinEvents?api-version=beta") | |
resource_uri <- "https://graph.windows.net" | |
data_folder <- "./Daily_Signin_Data/" # Where you plan to save your data | |
format_datetime <- function(datetime_value) { | |
strftime(datetime_value, "%Y-%m-%dT%H:%M:%SZ") | |
} | |
get_aad_oauth_token <- function() { | |
client_id <- "<your_client_id>" | |
client_secret <- "<your_client_secret>=" | |
app_name <- "aadreport" # not important for authorization grant flow | |
base_url <- paste0("https://login.windows.net/", | |
tenant_name, | |
"/oauth2") | |
azure_endpoint <- oauth_endpoint(authorize = NULL, | |
access = "token", | |
base_url = base_url) | |
# Create the app instance. | |
myapp <- oauth_app(appname = app_name, | |
key = client_id, | |
secret = client_secret) | |
# Step through the authorization chain: | |
mytoken <- oauth2.0_token(endpoint = azure_endpoint, | |
app = myapp, | |
client_credentials = TRUE, | |
user_params = list(resource = resource_uri), | |
cache = FALSE) | |
if (('error' %in% names(mytoken$credentials)) && (nchar(mytoken$credentials$error) > 0)) { | |
errorMsg <- paste('Error while acquiring token.', | |
paste('Error message:', mytoken$credentials$error), | |
paste('Error description:', mytoken$credentials$error_description), | |
paste('Error code:', mytoken$credentials$error_codes), | |
sep = '\n') | |
stop(errorMsg) | |
} | |
# Resource API can be accessed through "mytoken" at this point. | |
mytoken | |
} | |
process_signin_data <- function(df) { | |
# You can change types, filter unwanted columns, etc., here. | |
df | |
} | |
query_aad_signins <- function(query_string) { | |
mytoken <- get_aad_oauth_token() | |
result <- NULL | |
query_url <- paste0(aad_reporting_signin_endpoint_uri, | |
query_string) | |
url <- query_url | |
while(length(url) > 0) { | |
r <- GET(url, | |
add_headers(Authorization = paste(mytoken$credentials$token_type, | |
mytoken$credentials$access_token))) | |
json <- content(r, as = "text", encoding = "UTF-8") | |
response <- fromJSON(json, flatten = T) | |
url <- response$`@odata.nextLink` | |
print(str_sub(url,-3,-1)) | |
query_response <- response$value %>% | |
process_signin_data() | |
result <- bind_rows(result, query_response) | |
} | |
result | |
} | |
query_day <- function(query_date) { | |
# Since the API limits you to 120,000 records per query | |
# I pull one day at a time | |
start_string <- format_datetime(query_date) | |
end_dt <- query_date + days(1) - seconds(1) | |
end_string <- format_datetime(end_dt) | |
query_string <- paste0("&$filter=", | |
"signinDateTime+ge+", | |
start_string, | |
"+and+signinDateTime+le+", | |
end_string) | |
result <- query_aad_signins(query_string) | |
} | |
query_days <- function(start_date, end_date = start_date, save_as_RDS = F) { | |
# Query a date range | |
query_dates <- seq(start_date, end_date, by=1) | |
result <- NULL | |
for (d in query_dates) { | |
qd <- as.Date(d, origin="1970-01-01") | |
print(qd) | |
r <- query_day(qd) | |
result <- bind_rows(result, r) | |
if(save_as_RDS) { | |
filename <- paste0(data_folder, | |
as.character(qd), | |
".Rda") | |
saveRDS(r, filename) | |
} | |
} | |
result | |
} | |
load_signin_data <- function(startdate = NULL, | |
enddate = NULL) { | |
r <- NULL | |
rds <- sort(list.files(data_folder, pattern = "*.Rda")) | |
if (!is.null(startdate)) { | |
startdate <- format(ymd(startdate), "%Y-%m-%d.Rda") | |
rds <- subset(rds, rds >= startdate) | |
} | |
if (!is.null(enddate)) { | |
enddate <- format(ymd(enddate), "%Y-%m-%d.Rda") | |
rds <- subset(rds, rds <= enddate) | |
} | |
rds <- paste0(data_folder, rds) | |
r <- map_df(rds, readRDS) %>% | |
process_signin_data() | |
pushover(message="Data loaded.") | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The code works but I wouldn't say it's good.