Last active
August 16, 2022 00:46
-
-
Save DBremen/85dc6a1a7d8903102cce to your computer and use it in GitHub Desktop.
VBA Excel to cross-join and clean data with multiple entries per cell separated by comma or line-breaks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Private Function isSaved() As Boolean | |
Dim lastSaved As String | |
On Error GoTo EHandler | |
s = ActiveWorkbook.BuiltinDocumentProperties("last save time") | |
isSaved = True | |
Exit Function | |
EHandler: | |
isSaved = False | |
End Function | |
Private Sub CrossJoinRangesWithoutDupes(colRanges() As Variant, destSheetName As String, srcSheetName As String) | |
Dim cn As ADODB.Connection | |
Dim sql As String | |
Dim sqlRanges() As String, startAddress As String, endAddress As String | |
Dim tempSheet As Worksheet | |
Dim lastCol As Long, col As Long, endRow As Long | |
Dim rs As ADODB.Recordset | |
lastCol = UBound(colRanges) | |
sql = "SELECT DISTINCT * FROM " | |
ReDim sqlRanges(1 To lastCol) | |
Set tempSheet = Sheets.Add | |
'copy each column to a tempSheet and create the sql dynamically | |
For col = 1 To lastCol | |
'add the columnheader | |
tempSheet.Cells(1, col) = Sheets(srcSheetName).Cells(1, col).Value | |
endRow = UBound(colRanges(col)) + 2 | |
startAddress = Cells(2, col).Address(False, False) | |
endAddress = Cells(endRow, col).Address(False, False) | |
tempSheet.Range(startAddress & ":" & endAddress) = WorksheetFunction.Transpose(colRanges(col)) | |
startAddress = Cells(1, col).Address(False, False) | |
sqlRanges(col) = "[" & tempSheet.Name & "$" & startAddress & ":" & endAddress & "]" | |
Next col | |
sql = sql + Join(sqlRanges, ",") | |
Set rs = New ADODB.Recordset | |
Set cn = New ADODB.Connection | |
With cn | |
.ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" & _ | |
"Data Source=" & ActiveWorkbook.FullName & ";" & _ | |
"Extended Properties=""Excel 12.0 XML;HDR=Yes""" | |
.Open | |
End With | |
rs.Open sql, cn | |
'append to outputSheet | |
endRow = Sheets(destSheetName).Cells(Rows.Count, 1).End(xlUp).Row | |
If Sheets(destSheetName).Range("A1").Value <> "" Then | |
endRow = endRow + 1 | |
End If | |
Sheets(destSheetName).Cells(endRow, 1).CopyFromRecordset rs | |
'delete the temp sheet | |
Application.DisplayAlerts = False | |
tempSheet.Delete | |
Application.DisplayAlerts = True | |
rs.Close | |
cn.Close | |
End Sub | |
Sub CleanData() | |
Dim colRanges() As Variant | |
Dim workingRange As Range, currCell As Range, currRow As Range | |
Dim currRowIndexIndex As Long, currColIndexIndex As Long, endRow As Long | |
Dim endCol As Long | |
Dim i As Integer | |
Dim activeSheetName As String | |
Dim hasMultiple As Boolean | |
Dim outputSheet As Worksheet | |
Dim objRegex As Object | |
If Not isSaved Then | |
MsgBox "Please save the WorkBook prior to running this macro", vbExclamation, "WorkBook not saved!" | |
Exit Sub | |
End If | |
Application.ScreenUpdating = False | |
'Define the working range | |
Set workingRange = Range("A1").CurrentRegion | |
activeSheetName = ActiveSheet.Name | |
Set outputSheet = Sheets.Add | |
outputSheet.Name = "Clean Data" | |
For Each currRow In workingRange.Rows | |
hasMultiple = False | |
ReDim colRanges(1 To currRow.Columns.Count) | |
currColIndex = 1 | |
For Each currCol In currRow.Columns | |
'split column by Alt Enter or comma | |
colRanges(currColIndex) = Split(currCol, vbLf) | |
'if no alt enter check for comma | |
If UBound(colRanges(currColIndex)) = 0 And InStr(currCol, ",") Then | |
colRanges(currColIndex) = Split(currCol, ",") | |
End If | |
'check if if the current row contains any columns with multiple entries | |
If UBound(colRanges(currColIndex)) > 0 Then | |
hasMultiple = True | |
End If | |
currColIndex = currColIndex + 1 | |
Next | |
'get rid of extra spaces | |
For i = 1 To UBound(colRanges) | |
For col = 0 To UBound(colRanges(i)) | |
colRanges(i)(col) = Trim(colRanges(i)(col)) | |
Next col | |
Next i | |
If Not hasMultiple Then | |
'output row as is | |
endRow = outputSheet.Cells(Rows.Count, 1).End(xlUp).Row | |
If outputSheet.Range("A1").Value <> "" Then | |
endRow = endRow + 1 | |
End If | |
endCol = UBound(colRanges) | |
outputSheet.Range(Cells(endRow, 1), Cells(endRow, endCol)) = WorksheetFunction.Transpose(colRanges) | |
Else | |
'output cross-join (without dupes) of columns | |
CrossJoinRangesWithoutDupes colRanges, "Clean Data", activeSheetName | |
End If | |
Next | |
End Sub |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment