Skip to content

Instantly share code, notes, and snippets.

@DBremen
Last active August 16, 2022 00:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DBremen/85dc6a1a7d8903102cce to your computer and use it in GitHub Desktop.
Save DBremen/85dc6a1a7d8903102cce to your computer and use it in GitHub Desktop.
VBA Excel to cross-join and clean data with multiple entries per cell separated by comma or line-breaks
Private Function isSaved() As Boolean
Dim lastSaved As String
On Error GoTo EHandler
s = ActiveWorkbook.BuiltinDocumentProperties("last save time")
isSaved = True
Exit Function
EHandler:
isSaved = False
End Function
Private Sub CrossJoinRangesWithoutDupes(colRanges() As Variant, destSheetName As String, srcSheetName As String)
Dim cn As ADODB.Connection
Dim sql As String
Dim sqlRanges() As String, startAddress As String, endAddress As String
Dim tempSheet As Worksheet
Dim lastCol As Long, col As Long, endRow As Long
Dim rs As ADODB.Recordset
lastCol = UBound(colRanges)
sql = "SELECT DISTINCT * FROM "
ReDim sqlRanges(1 To lastCol)
Set tempSheet = Sheets.Add
'copy each column to a tempSheet and create the sql dynamically
For col = 1 To lastCol
'add the columnheader
tempSheet.Cells(1, col) = Sheets(srcSheetName).Cells(1, col).Value
endRow = UBound(colRanges(col)) + 2
startAddress = Cells(2, col).Address(False, False)
endAddress = Cells(endRow, col).Address(False, False)
tempSheet.Range(startAddress & ":" & endAddress) = WorksheetFunction.Transpose(colRanges(col))
startAddress = Cells(1, col).Address(False, False)
sqlRanges(col) = "[" & tempSheet.Name & "$" & startAddress & ":" & endAddress & "]"
Next col
sql = sql + Join(sqlRanges, ",")
Set rs = New ADODB.Recordset
Set cn = New ADODB.Connection
With cn
.ConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" & _
"Data Source=" & ActiveWorkbook.FullName & ";" & _
"Extended Properties=""Excel 12.0 XML;HDR=Yes"""
.Open
End With
rs.Open sql, cn
'append to outputSheet
endRow = Sheets(destSheetName).Cells(Rows.Count, 1).End(xlUp).Row
If Sheets(destSheetName).Range("A1").Value <> "" Then
endRow = endRow + 1
End If
Sheets(destSheetName).Cells(endRow, 1).CopyFromRecordset rs
'delete the temp sheet
Application.DisplayAlerts = False
tempSheet.Delete
Application.DisplayAlerts = True
rs.Close
cn.Close
End Sub
Sub CleanData()
Dim colRanges() As Variant
Dim workingRange As Range, currCell As Range, currRow As Range
Dim currRowIndexIndex As Long, currColIndexIndex As Long, endRow As Long
Dim endCol As Long
Dim i As Integer
Dim activeSheetName As String
Dim hasMultiple As Boolean
Dim outputSheet As Worksheet
Dim objRegex As Object
If Not isSaved Then
MsgBox "Please save the WorkBook prior to running this macro", vbExclamation, "WorkBook not saved!"
Exit Sub
End If
Application.ScreenUpdating = False
'Define the working range
Set workingRange = Range("A1").CurrentRegion
activeSheetName = ActiveSheet.Name
Set outputSheet = Sheets.Add
outputSheet.Name = "Clean Data"
For Each currRow In workingRange.Rows
hasMultiple = False
ReDim colRanges(1 To currRow.Columns.Count)
currColIndex = 1
For Each currCol In currRow.Columns
'split column by Alt Enter or comma
colRanges(currColIndex) = Split(currCol, vbLf)
'if no alt enter check for comma
If UBound(colRanges(currColIndex)) = 0 And InStr(currCol, ",") Then
colRanges(currColIndex) = Split(currCol, ",")
End If
'check if if the current row contains any columns with multiple entries
If UBound(colRanges(currColIndex)) > 0 Then
hasMultiple = True
End If
currColIndex = currColIndex + 1
Next
'get rid of extra spaces
For i = 1 To UBound(colRanges)
For col = 0 To UBound(colRanges(i))
colRanges(i)(col) = Trim(colRanges(i)(col))
Next col
Next i
If Not hasMultiple Then
'output row as is
endRow = outputSheet.Cells(Rows.Count, 1).End(xlUp).Row
If outputSheet.Range("A1").Value <> "" Then
endRow = endRow + 1
End If
endCol = UBound(colRanges)
outputSheet.Range(Cells(endRow, 1), Cells(endRow, endCol)) = WorksheetFunction.Transpose(colRanges)
Else
'output cross-join (without dupes) of columns
CrossJoinRangesWithoutDupes colRanges, "Clean Data", activeSheetName
End If
Next
End Sub
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment