Last active
October 24, 2023 22:00
-
-
Save potatoqualitee/c22e2a213f5b05e47c0c to your computer and use it in GitHub Desktop.
Get Duplicates from PowerShell Datatable using LINQ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create Datatable | |
$dt = New-Object System.Data.Datatable "Music" | |
[void]$dt.Columns.Add("Artist") | |
[void]$dt.Columns.Add("Genre") | |
[void]$dt.Columns.Add("Album") | |
[void]$dt.Columns.Add("ReleaseYear") | |
# Add data | |
[void]$dt.Rows.Add("Poison","Glam Metal","Open Up and Say... Ahh!",1988) | |
[void]$dt.Rows.Add("Cinderella","Night Songs","Flesh & Blood",1986) | |
[void]$dt.Rows.Add("Eazy-E","Gangsta Rap","Eazy-Duz-It",1988) | |
[void]$dt.Rows.Add("Tim Smooth","Southern Rap","I Gotsta' Have It",1991) | |
# Add dupes | |
[void]$dt.Rows.Add("Poison","Glam Metal","Open Up and Say... Ahh!",1988) | |
[void]$dt.Rows.Add("Eazy-E","Gangsta Rap","Eazy-Duz-It",1988) | |
[void]$dt.Rows.Add("Eazy-E","Gangsta Rap","Eazy-Duz-It",1988) | |
# Perform the dupe check | |
[void][Reflection.Assembly]::LoadWithPartialName("System.Data.DataSetExtensions") | |
[void][Reflection.Assembly]::LoadWithPartialName("System.Data.Linq") | |
# Add a count column prior to making the dataset a LINQ list. | |
[void]$dt.Columns.Add("DupeCount") | |
$list = [System.Data.DataTableExtensions]::AsEnumerable($dt) | |
# Actually don't need! -> $list = [System.Linq.Enumerable]::ToList($enum) | |
# Group By | |
$groupbyquery = [System.Func[System.Data.DataRow, string]] { param($row) $row.Artist, $row.Album } | |
$groupby = [System.Linq.Enumerable]::GroupBy($list,$groupbyquery) | |
# Where, can probably use Any, too. | |
$wherequery = [System.Func[System.Object, bool]] { param($row) $row.Artist.count -gt 1 -and $row.Album -gt 1 } | |
$where = [System.Linq.Enumerable]::Where($groupby,$wherequery) | |
# Select distinct, and add a count | |
$selectquery = [system.func[System.Object,System.Object]] { param($row) | |
$null = $row.Item(0)["DupeCount"] = $row.count | |
$row.Item(0) | |
} | |
$select = [System.Linq.Enumerable]::Select($where,$selectquery) | |
# You could streamline it, too. | |
# [System.Linq.Enumerable]::Select([System.Linq.Enumerable]::GroupBy($list,$groupbyquery).Where({$wherequery}),$selectquery) | |
# Build & populate new dupetable | |
$dupetable = $dt.Clone() | |
foreach ($row in $select) { $dupetable.ImportRow($row) } | |
# Remove count column from original datatable | |
[void]$dt.Columns.Remove("DupeCount") | |
# Show your results. Orderby can probably be performed above, but whatever. | |
$dupetable | Select Artist, Album, Dupecount | Sort-Object Dupecount |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment