Last active
February 23, 2023 16:47
-
-
Save chilismaug/a14f925f346686c1c1af1e495481be86 to your computer and use it in GitHub Desktop.
vba word frequency app with dictionary and value sort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Option Explicit | |
Public txtFileName As String | |
Sub Mainline() | |
Call openDialog | |
Call GetWordsToOccurencesDict | |
End Sub | |
Sub GetWordsToOccurencesDict() | |
Dim wsWork As Worksheet | |
'run this to fetch text file contents | |
Dim wrdArr As Variant | |
Dim n As Long | |
Dim SingleWord As String 'Raw word | |
Dim SkipWords As String 'Words to be excluded | |
' Set up crude inline stop words list | |
SkipWords = "[the][a][at][of][is][to][for][this][that][by][be][and][are]" | |
Set wsWork = ThisWorkbook.Worksheets.Add(After:=ThisWorkbook.Worksheets(ThisWorkbook.Worksheets.Count)) | |
wsWork.Name = "TEMP_WORKSHEET" & ThisWorkbook.Worksheets.Count | |
wsWork.Range("A1").Value = "WORD" | |
wsWork.Range("B1").Value = "OCCURENCE" | |
wsWork.Range("D1").Value = "TEXT FILE" | |
wsWork.Range("D2").Value = txtFileName | |
Dim dict As Object | |
Set dict = CreateObject("scripting.dictionary") | |
wrdArr = GetFileWords(txtFileName) | |
Dim wrdChunk As String | |
Dim keysArr As Variant | |
Dim k As String | |
Dim v As Long | |
Dim i As Long | |
k = "" | |
' pop them in a dict with counts | |
For n = 1 To UBound(wrdArr) - 1 | |
' wrdChunk = RemovePunctuation(CStr(wrdArr(n))) | |
SingleWord = Trim(LCase(CStr(wrdArr(n)))) | |
' skip Stop Words, remove punct, remaining words to the dict | |
If (InStr(SkipWords, SingleWord)) < 1 Then | |
filterPunctSpecialChars (SingleWord) | |
If Len(SingleWord) > 0 Then | |
If (dict.Exists(SingleWord)) Then | |
dict(SingleWord) = dict(SingleWord) + 1 | |
Else | |
dict(SingleWord) = 1 | |
End If | |
End If | |
End If | |
Next n | |
' Now lesseee the dict | |
' keysArr = dict.Keys | |
keysArr = DictToSortedArray(dict) | |
For i = 1 To dict.Count '-1 | |
v = keysArr(i, 1) | |
k = keysArr(i, 2) | |
wsWork.Range("A" & i + 1).Value = k | |
wsWork.Range("B" & i + 1).Value = v | |
Next | |
wsWork.Columns("A:F").AutoFit | |
End Sub | |
Function GetFileWords(sPath As String) As Variant | |
Dim FSO As Object | |
Dim MyFile As Object | |
Dim MyString As String | |
Dim Arr As Variant | |
Set FSO = CreateObject("Scripting.FileSystemObject") | |
Set MyFile = FSO.OpenTextFile(sPath) | |
MyString = filterPunctSpecialChars(MyFile.ReadAll) | |
Arr = Split(MyString, " ") | |
GetFileWords = Arr | |
End Function | |
Function DictToSortedArray(D As Object) As Variant | |
'returns a 1-based 2-dimensional sorted array | |
'sorted by the keys | |
Dim A As Variant, i As Long, AL As Object, k As Variant | |
Dim thang As Variant, tmpKey As Variant, tmpVal As Variant | |
Dim j As Long 'j is next row after i | |
Set AL = CreateObject("System.Collections.ArrayList") | |
For Each k In D | |
AL.Add D(k) & "," & k | |
Next k | |
AL.Sort | |
AL.Reverse | |
ReDim A(1 To AL.Count, 1 To 2) | |
For i = 1 To AL.Count | |
thang = Split(AL(i - 1), ",") | |
A(i, 1) = thang(0) | |
A(i, 2) = thang(1) | |
Next i | |
Call Sort_2D_Array(A) | |
DictToSortedArray = A | |
End Function | |
Sub Sort_2D_Array(ByRef data As Variant) | |
Dim v As Variant | |
Dim i As Integer, j As Integer, ci As Integer | |
Dim r As Integer, c As Integer | |
Dim temp As Variant | |
'Bubble sort 1st column | |
ci = LBound(data, 2) '1st column index | |
For i = LBound(data) To UBound(data) - 1 | |
For j = i + 1 To UBound(data) | |
If CInt(data(i, ci)) < CInt(data(j, ci)) Then | |
For c = LBound(data, 2) To UBound(data, 2) | |
temp = data(i, c) | |
data(i, c) = data(j, c) | |
data(j, c) = temp | |
Next | |
End If | |
Next | |
Next | |
End Sub | |
Sub openDialog() | |
Dim fd As Office.FileDialog | |
Set fd = Application.FileDialog(msoFileDialogFilePicker) | |
With fd | |
.AllowMultiSelect = False | |
' Set the title of the dialog box. | |
.Title = "Please select the file." | |
' Clear out the current filters, and add our own. | |
.Filters.Clear | |
.Filters.Add "Text Files", "*.txt" | |
.Filters.Add "All Files", "*.*" | |
' Dialog box .Show method returns True if user picked a file. If False, the user clicked Cancel. | |
If .Show = True Then | |
txtFileName = .SelectedItems(1) 'replace txtFileName with your textbox | |
End If | |
End With | |
End Sub | |
Public Function filterPunctSpecialChars(ByVal Txt As String) As String | |
With CreateObject("VBScript.RegExp") | |
.Global = True | |
.IgnoreCase = True | |
.Pattern = "[^a-zA-Z0-9]" 'let through all the printable everyday ascii characters | |
filterPunctSpecialChars = .Replace(Txt, " ") | |
End With | |
End Function | |
Public Function filterSpecialChars(ByVal Txt As String) As String | |
With CreateObject("VBScript.RegExp") | |
.Global = True | |
.IgnoreCase = True | |
.Pattern = "[^\n|a-zA-Z0-9]" ' here we ignore newline and let through all the printable everyday ascii characters | |
filterSpecialChars = .Replace(Txt, " ") | |
End With | |
End Function | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment