Skip to content

Instantly share code, notes, and snippets.

@chilismaug
Last active February 23, 2023 16:47
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chilismaug/a14f925f346686c1c1af1e495481be86 to your computer and use it in GitHub Desktop.
Save chilismaug/a14f925f346686c1c1af1e495481be86 to your computer and use it in GitHub Desktop.
vba word frequency app with dictionary and value sort
Option Explicit
Public txtFileName As String
Sub Mainline()
Call openDialog
Call GetWordsToOccurencesDict
End Sub
Sub GetWordsToOccurencesDict()
Dim wsWork As Worksheet
'run this to fetch text file contents
Dim wrdArr As Variant
Dim n As Long
Dim SingleWord As String 'Raw word
Dim SkipWords As String 'Words to be excluded
' Set up crude inline stop words list
SkipWords = "[the][a][at][of][is][to][for][this][that][by][be][and][are]"
Set wsWork = ThisWorkbook.Worksheets.Add(After:=ThisWorkbook.Worksheets(ThisWorkbook.Worksheets.Count))
wsWork.Name = "TEMP_WORKSHEET" & ThisWorkbook.Worksheets.Count
wsWork.Range("A1").Value = "WORD"
wsWork.Range("B1").Value = "OCCURENCE"
wsWork.Range("D1").Value = "TEXT FILE"
wsWork.Range("D2").Value = txtFileName
Dim dict As Object
Set dict = CreateObject("scripting.dictionary")
wrdArr = GetFileWords(txtFileName)
Dim wrdChunk As String
Dim keysArr As Variant
Dim k As String
Dim v As Long
Dim i As Long
k = ""
' pop them in a dict with counts
For n = 1 To UBound(wrdArr) - 1
' wrdChunk = RemovePunctuation(CStr(wrdArr(n)))
SingleWord = Trim(LCase(CStr(wrdArr(n))))
' skip Stop Words, remove punct, remaining words to the dict
If (InStr(SkipWords, SingleWord)) < 1 Then
filterPunctSpecialChars (SingleWord)
If Len(SingleWord) > 0 Then
If (dict.Exists(SingleWord)) Then
dict(SingleWord) = dict(SingleWord) + 1
Else
dict(SingleWord) = 1
End If
End If
End If
Next n
' Now lesseee the dict
' keysArr = dict.Keys
keysArr = DictToSortedArray(dict)
For i = 1 To dict.Count '-1
v = keysArr(i, 1)
k = keysArr(i, 2)
wsWork.Range("A" & i + 1).Value = k
wsWork.Range("B" & i + 1).Value = v
Next
wsWork.Columns("A:F").AutoFit
End Sub
Function GetFileWords(sPath As String) As Variant
Dim FSO As Object
Dim MyFile As Object
Dim MyString As String
Dim Arr As Variant
Set FSO = CreateObject("Scripting.FileSystemObject")
Set MyFile = FSO.OpenTextFile(sPath)
MyString = filterPunctSpecialChars(MyFile.ReadAll)
Arr = Split(MyString, " ")
GetFileWords = Arr
End Function
Function DictToSortedArray(D As Object) As Variant
'returns a 1-based 2-dimensional sorted array
'sorted by the keys
Dim A As Variant, i As Long, AL As Object, k As Variant
Dim thang As Variant, tmpKey As Variant, tmpVal As Variant
Dim j As Long 'j is next row after i
Set AL = CreateObject("System.Collections.ArrayList")
For Each k In D
AL.Add D(k) & "," & k
Next k
AL.Sort
AL.Reverse
ReDim A(1 To AL.Count, 1 To 2)
For i = 1 To AL.Count
thang = Split(AL(i - 1), ",")
A(i, 1) = thang(0)
A(i, 2) = thang(1)
Next i
Call Sort_2D_Array(A)
DictToSortedArray = A
End Function
Sub Sort_2D_Array(ByRef data As Variant)
Dim v As Variant
Dim i As Integer, j As Integer, ci As Integer
Dim r As Integer, c As Integer
Dim temp As Variant
'Bubble sort 1st column
ci = LBound(data, 2) '1st column index
For i = LBound(data) To UBound(data) - 1
For j = i + 1 To UBound(data)
If CInt(data(i, ci)) < CInt(data(j, ci)) Then
For c = LBound(data, 2) To UBound(data, 2)
temp = data(i, c)
data(i, c) = data(j, c)
data(j, c) = temp
Next
End If
Next
Next
End Sub
Sub openDialog()
Dim fd As Office.FileDialog
Set fd = Application.FileDialog(msoFileDialogFilePicker)
With fd
.AllowMultiSelect = False
' Set the title of the dialog box.
.Title = "Please select the file."
' Clear out the current filters, and add our own.
.Filters.Clear
.Filters.Add "Text Files", "*.txt"
.Filters.Add "All Files", "*.*"
' Dialog box .Show method returns True if user picked a file. If False, the user clicked Cancel.
If .Show = True Then
txtFileName = .SelectedItems(1) 'replace txtFileName with your textbox
End If
End With
End Sub
Public Function filterPunctSpecialChars(ByVal Txt As String) As String
With CreateObject("VBScript.RegExp")
.Global = True
.IgnoreCase = True
.Pattern = "[^a-zA-Z0-9]" 'let through all the printable everyday ascii characters
filterPunctSpecialChars = .Replace(Txt, " ")
End With
End Function
Public Function filterSpecialChars(ByVal Txt As String) As String
With CreateObject("VBScript.RegExp")
.Global = True
.IgnoreCase = True
.Pattern = "[^\n|a-zA-Z0-9]" ' here we ignore newline and let through all the printable everyday ascii characters
filterSpecialChars = .Replace(Txt, " ")
End With
End Function
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment