Skip to content

Instantly share code, notes, and snippets.

@aaronpmiller
Last active September 18, 2015 05:06
Show Gist options
  • Save aaronpmiller/6119644c4430ab8bc56e to your computer and use it in GitHub Desktop.
Save aaronpmiller/6119644c4430ab8bc56e to your computer and use it in GitHub Desktop.
Function Import-Excel {
<#
.Synopsis
Converts an Excel document into an array of objects with the columns as separate properties.
.Example
Import-Excel -Path .\Example.xlsx
This example would import the data stored in the Example.xlsx spreadsheet.
.Description
The Import-Excel cmdlet converts an Excel document into an array of objects whose property names are determined by the column headers and whose values are determined by the column data.
Additionally, you can specify whether this particular Excel file has any headers at all, in which case the objects will be given the property names based on their column. Likewise, if the document has headers, but one column does not, its data will be assigned a Column# property name.
.Parameter Path
Specifies the path to the Excel file to import. You can also pipe a path to Import-Excel.
.Parameter noHeaders
Specifies that the document being imported has no headers. Default value is False.
.Parameter displayProgress
If you want a progress dialogue of what's going on, will cause processing delays.
.Parameter ProgressID
ID of the Write-Progress cmdlet, will get a random int if not specified.
.Parameter ParentProgressID
ID of the parent Write-Progress cmdlet if one exists, will default to -1 if not specified.
.Outputs
Hashtable
.Notes
Name: Import-Excel
Author: Aaron Miller (credit to Jeremy Engel for the original function)
Date: 03/21/2014
.Link
Export-Excel
#>
[CmdletBinding()]
Param (
[Parameter(Mandatory=$true,ValueFromPipeline=$true)]
[ValidateScript({(Test-Path $_) -and ($_ -match ".xls[mx]?$")})]
[string]$Path,
[Parameter(Mandatory=$false)][string[]]$worksheetName,
[Parameter(Mandatory=$false)][switch]$noHeaders,
[Parameter(Mandatory=$false)][switch]$displayProgress,
[Parameter(Mandatory=$false)][int]$ProgressID=(Get-Random),
[Parameter(Mandatory=$false)][int]$ParentProgressID=-1
)
Write-Verbose "Getting Excel ComObject"
$excel = New-Object -ComObject Excel.Application -ErrorAction SilentlyContinue
if (!$excel) {
Write-Error "Please install Excel"
return
}
$Path = (Resolve-Path $Path).Path
$workbooks = $excel.Workbooks
Write-Verbose "Opening the workbook [$Path]"
$workbook = $workbooks.Open($Path)
$worksheetCount = $workbook.Worksheets.Count
$workbookData = @{}
if ($displayProgress) {$worksheetProgressSplat = @{Id=$ProgressID;ParentId=$ParentProgressID}}
for ($s=1;$s -le $worksheetCount;$s++) {
# Get the sheetname
$sheetName = $workbook.Worksheets.Item($s).Name
if (($worksheetName) -and ($sheetName -notin $worksheetName)) {
Write-Verbose "Skipping sheet: $sheetName [$s] of [$worksheetCount]"
} else {
if ($displayProgress) {
if ($worksheetCount -gt 1) {
$worksheetProgressSplat["PercentComplete"] = 100 * $s / $worksheetCount
}
Write-Progress -Activity "Importing Excel File $Path" -Status "Currently gathering data from sheet $sheetName [$s] of [$worksheetCount]" @worksheetProgressSplat
}
# Pull the row data out of the rows, this will be a two-dimensional array with cell range notation for access (e.g. base is 1 instead of 0)
$rowData = $workbook.Worksheets.Item($s).UsedRange.Rows.Value2
# We'll be using these count in for loops
$rowCount = $workbook.Worksheets.Item($s).UsedRange.Rows.Count
$columnCount = $workbook.Worksheets.Item($s).UsedRange.Columns.Count
# Typecasting and specifying defaults so things work smoothly
[string[]]$headers = @()
#[array]$content = @()
$content = New-Object System.Collections.ArrayList
Write-Verbose "Processing data for sheet#:$s [$sheetName]"
if ($rowData) {
Write-Verbose "Creating our headers"
for ($c=1;$c -le $columnCount;$c++){
# The header is typically the cell in the first row for each column
$header = $rowData[1,$c]
# Unless we do not have a header row or the cell was empty
if ($noHeaders -or (-not($header))) {
# Create a header name with a simple format
$header = "Column$c"
}
# Add the header to the header array for this sheet
$headers+=$header
Write-Verbose "Added header [$header]"
}
if ($displayProgress) {$rowProgressSplat = @{Id=$(Get-Random);ParentId=$ProgressID}}
Write-Verbose "Starting to pull our content out and assign it to the headers"
# If we don't have any headers start retrieving data on the first row, otherwise use the second row
$top = if ($noHeaders) {1} else {2}
# We're going to loop through all rows in the range
for ($r=$top;$r-le$rowCount;$r++) {
if ($displayProgress) {
if ($rowCount -gt 1) {
$rowProgressSplat["PercentComplete"] = 100 * $r / $rowCount
}
Write-Progress -Activity "Gathering data from sheet $sheetName" -Status "Adding data for row $r of $rowCount" @rowProgressSplat
}
# Creating a custom PSObject to hold / return our row data
$line = New-Object PSOBject
# We're going to keep track of how many cells are blank in the row
[int]$blanks=0
# Now we're going to loop across the columns in the row
for ($c=1;$c-le$columnCount;$c++) {
# The cell value is available in the two-dimensional array with cell range notation
$value = $rowdata[$r,$c]
# Adjust the index into the header since it is a typical 0-based array
$headerName = $headers[$c-1]
# For every cell we pull data from we're going to associate it with its header and add it to our $line for return
$line | Add-Member -MemberType NoteProperty -Name $headerName -Value $value
if (-not($value)) {
# If the cell didn't contain any data incriment the $blanks counter
$blanks++
} else {
Write-Verbose "Adding [$value] to column [$headerName] from row [$r]"
}
}
# Check if the row contained fewer blank values than columns (make sure we don't have a blank row if so we're going to ignore it)
if ($blanks -lt $columnCount) {[void]$content.Add($line)}
}
}
# Take the content we generated for the sheet (we specify a default empty array above if no content was returned) and add it to our $workbookData
Write-Verbose "Adding gathered content to the workbook"
$workbookData[$sheetName]=$content
}
}
# All done, let's clean up any remaining ComObjects, close our file, and quit Excel
Write-Verbose "Closing the workbook"
$workbook.Close($false)
Write-Verbose "Releasing the ComObjects for the current and all workbooks"
do { $o = [System.Runtime.InteropServices.Marshal]::ReleaseComObject($workbook) } while($o -gt -1)
do { $o = [System.Runtime.InteropServices.Marshal]::ReleaseComObject($workbooks) } while($o -gt -1)
Write-Verbose "Quitting Excel"
$excel.Quit()
Write-Verbose "Releasing the ExcelComObject"
do { $o = [System.Runtime.InteropServices.Marshal]::ReleaseComObject($excel) } while($o -gt -1)
# Time to return the data!
return $workbookData
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment