Skip to content

Instantly share code, notes, and snippets.

@gmodeblog
Last active August 29, 2015 14:22
Show Gist options
  • Save gmodeblog/2dd5da62485e8f90a43c to your computer and use it in GitHub Desktop.
Save gmodeblog/2dd5da62485e8f90a43c to your computer and use it in GitHub Desktop.
Sub execute()
Dim nmc_tagger As New NmcTagger
Dim nmc_node As NmcNodeCollection
Dim nmc_param As New NmcParam
Dim FSO As Object
Dim buf As String, n As Long
Dim path As String
Dim file_extension_name As String
Dim dict As Object
Dim string_array() As String
' 初期化処理
Sheets("設定&実行").Activate
nmc_param.DicDir = ActiveSheet.Range("B3")
Call nmc_tagger.Create(nmc_param)
Set dict = CreateObject("Scripting.Dictionary")
Set FSO = CreateObject("Scripting.FileSystemObject")
Dim lr As Integer, sr As Integer
sr = 3
lr = ActiveSheet.Range("C65535").End(xlUp).Row
' 全テキストの内容を取得して情報をまとめていく
Do While sr <= lr
path = ActiveSheet.Range("C" & sr)
' ファイル内のテキスト抽出
If Left(path, 4) = "http" Then
string_array = GetHTMLText(path)
Else
file_extension_name = FSO.GetExtensionName(path)
If file_extension_name = "jtd" Then
string_array = GetJTDText(path)
ElseIf file_extension_name = "pdf" Then
string_array = GetPDFText(path)
ElseIf file_extension_name = "doc" Or file_extension_name = "docx" Then
string_array = GetWordText(path)
Else
' txtとして処理
string_array = GetText(path)
End If
End If
' 実処理
For Each doc In string_array
Set nmc_node = nmc_tagger.Parse(doc)
Call StoreAnalysisInfo(nmc_node, dict)
Next
sr = sr + 1
Loop
' 結果のシートを作成し、情報を書き出す
Call CreateResultSheet
Dim index As Integer
index = 3
For Each Key In dict
ActiveSheet.Range("B" & index) = dict.Item(Key).Item("count")
ActiveSheet.Range("C" & index) = Key
ActiveSheet.Range("D" & index) = dict.Item(Key).Item("feature")
index = index + 1
Next
' 出現回数で降順ソート
ActiveSheet.Range(Cells(3, 2), Cells(index, 4)) _
.Sort Key1:=ActiveSheet.Cells(3, 2), order1:=xlDescending
' 解放処理
Set dict = Nothing
Set FSO = Nothing
End Sub
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment