Last active
August 29, 2015 14:22
-
-
Save gmodeblog/2dd5da62485e8f90a43c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Sub execute() | |
| Dim nmc_tagger As New NmcTagger | |
| Dim nmc_node As NmcNodeCollection | |
| Dim nmc_param As New NmcParam | |
| Dim FSO As Object | |
| Dim buf As String, n As Long | |
| Dim path As String | |
| Dim file_extension_name As String | |
| Dim dict As Object | |
| Dim string_array() As String | |
| ' 初期化処理 | |
| Sheets("設定&実行").Activate | |
| nmc_param.DicDir = ActiveSheet.Range("B3") | |
| Call nmc_tagger.Create(nmc_param) | |
| Set dict = CreateObject("Scripting.Dictionary") | |
| Set FSO = CreateObject("Scripting.FileSystemObject") | |
| Dim lr As Integer, sr As Integer | |
| sr = 3 | |
| lr = ActiveSheet.Range("C65535").End(xlUp).Row | |
| ' 全テキストの内容を取得して情報をまとめていく | |
| Do While sr <= lr | |
| path = ActiveSheet.Range("C" & sr) | |
| ' ファイル内のテキスト抽出 | |
| If Left(path, 4) = "http" Then | |
| string_array = GetHTMLText(path) | |
| Else | |
| file_extension_name = FSO.GetExtensionName(path) | |
| If file_extension_name = "jtd" Then | |
| string_array = GetJTDText(path) | |
| ElseIf file_extension_name = "pdf" Then | |
| string_array = GetPDFText(path) | |
| ElseIf file_extension_name = "doc" Or file_extension_name = "docx" Then | |
| string_array = GetWordText(path) | |
| Else | |
| ' txtとして処理 | |
| string_array = GetText(path) | |
| End If | |
| End If | |
| ' 実処理 | |
| For Each doc In string_array | |
| Set nmc_node = nmc_tagger.Parse(doc) | |
| Call StoreAnalysisInfo(nmc_node, dict) | |
| Next | |
| sr = sr + 1 | |
| Loop | |
| ' 結果のシートを作成し、情報を書き出す | |
| Call CreateResultSheet | |
| Dim index As Integer | |
| index = 3 | |
| For Each Key In dict | |
| ActiveSheet.Range("B" & index) = dict.Item(Key).Item("count") | |
| ActiveSheet.Range("C" & index) = Key | |
| ActiveSheet.Range("D" & index) = dict.Item(Key).Item("feature") | |
| index = index + 1 | |
| Next | |
| ' 出現回数で降順ソート | |
| ActiveSheet.Range(Cells(3, 2), Cells(index, 4)) _ | |
| .Sort Key1:=ActiveSheet.Cells(3, 2), order1:=xlDescending | |
| ' 解放処理 | |
| Set dict = Nothing | |
| Set FSO = Nothing | |
| End Sub |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment