Skip to content

Instantly share code, notes, and snippets.

@cvengros
Created February 13, 2015 22:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cvengros/81319530e7bf25e8631f to your computer and use it in GitHub Desktop.
Save cvengros/81319530e7bf25e8631f to your computer and use it in GitHub Desktop.
sharepoint downloader
require 'csv'
require_relative 'sharepoint_downloader'
# variables
client_id = ENV['CLIENT_ID']
client_secret = ENV['CLIENT_SECRET']
refresh_token = ENV['REFRESH_TOKEN']
resource = ENV['RESOURCE']
site_url = 'https://boozallen.sharepoint.com'
source_id = 'b09a7990-05ea-4af9-81ef-edfab16c4e31'
properties = 'AccountName,BAHIMT,BAHAccountGroup,Department,BAHOfficeCity,BAHOfficeState,BAHOfficeCountry,JobTitle,BAHCluster,BAHCareerTrack,BAHCertifications,WorkEmail,BAHFCPrimary,BAHGroup,BAHIMT,BAHLanguages,JobTitle,BAHOffice,PreferredName,Group,BAHProposalExperience,BAHRC,BAHRolePrimary,BAHSkills,BAHFCPrimary'
csv_path = 'data.csv'
page_size = 100
downloader = SharepointDownloder.new(
:client_id => client_id,
:client_secret => client_secret,
:refresh_token => refresh_token,
:resource => resource,
:site_url => site_url,
:source_id => source_id,
:page_size => page_size,
)
downloader.download_all(csv_path, properties)
require 'rest_client'
require 'json'
require 'cgi'
require 'csv'
require_relative 'sharepoint_parser'
class SharepointDownloder
DEFAULT_PAGE_SIZE = 10
def initialize(options={})
# get the options
client_id = CGI.escape(options[:client_id])
client_secret = CGI.escape(options[:client_secret])
refresh_token = CGI.escape(options[:refresh_token])
resource = options[:resource]
@site_url = options[:site_url]
@source_id = options[:source_id]
@page_size = options[:page_size] || DEFAULT_PAGE_SIZE
# get the access token
access_token_url = "https://accounts.accesscontrol.windows.net/tokens/OAuth/2"
body = "grant_type=refresh_token&client_id=#{client_id}&client_secret=#{client_secret}&refresh_token=#{refresh_token}&resource=#{resource}"
post = RestClient.post(access_token_url, body)
response = JSON.parse(post, :symbolize_names => true)
@access_token = response[:access_token]
@headers = {
'Authorization' => "Bearer #{@access_token}",
'Content-Type' => 'application/json',
'Accept' => 'application/json'
}
end
def download_all(csv_path, properties)
# initialize
start_row = 1
# do the first request
page = get_page(properties, start_row)
# start a csv, write header row
CSV.open(csv_path, "wb", :force_quotes => true) do |csv|
headers = SharepointParser.get_headers(page)
csv << headers
SharepointParser.parse_rows(page) do |row|
csv << row
end
start_row += @page_size
loop do
# get the page and write it to csv
page = get_page(properties, start_row)
SharepointParser.parse_rows(page) do |row|
csv << row
end
# go to next page
start_row += @page_size
# check if next page available
if SharepointParser.page_size(page) < @page_size
break
end
end
end
end
def get_page(properties, start_row, options={})
escaped_properties = CGI.escape("'#{properties}'")
get_url = "#{@site_url}/_api/search/query?querytext=%27*%27&selectproperties=#{escaped_properties}&SourceId=%27#{@source_id}%27&startrow=#{start_row}&RowLimit=#{@page_size}"
response = RestClient::Request.execute(
:url => get_url,
:ssl_version => 'SSLv3',
:method => 'get',
:headers => @headers
)
JSON.parse(response)
end
end
require 'json'
class SharepointParser
def self.get_headers(page)
page['PrimaryQueryResult']['RelevantResults']['Table']['Rows'][0]['Cells'].map {|h| h['Key']}
end
def self.parse_rows(page)
# for each row in the page
page['PrimaryQueryResult']['RelevantResults']['Table']['Rows'].each do |r|
# get the values from each row and call the block
yield(r['Cells'].map {|v| v['Value']})
end
end
def self.page_size(page)
page['PrimaryQueryResult']['RelevantResults']['Table']['Rows'].size
end
def self.total_rows(page)
page['PrimaryQueryResult']['RelevantResults']['TotalRows']
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment