HonoShirai shihono

## spacy_morph_ja.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / spacy_morph_ja.ipynb
            
            
              Created
              June 23, 2024 08:27
            
              
                spacy_morph_ja.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## jdepp_make_install.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / jdepp_make_install.ipynb
            
            
              Created
              November 19, 2023 11:07
            
              
                jdepp_make_install.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ndiff_fulwidth.py
import difflib
import unicodedata

def get_char_width_list(text):
    """text の文字ごとの幅をリストで返す
    半角の場合は1, 全角の場合は2
    """
    result = []
    for c in text:
        if unicodedata.east_asian_width(c) in ["F", "W"]:

## japanese_lm.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / japanese_lm.ipynb
            
            
              Created
              June 12, 2022 23:38
            
              
                japanese_lm.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## pytorch_ngram_lm.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / pytorch_ngram_lm.ipynb
            
            
              Created
              May 1, 2022 08:19
            
              
                pytorch_ngram_lm.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## kneser_ney_smoothing.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / kneser_ney_smoothing.ipynb
            
            
              Created
              April 17, 2022 05:47
            
              
                kneser_ney_smoothing.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## nltk_lm_examples.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / nltk_lm_examples.ipynb
            
            
              Created
              April 5, 2022 00:15
            
              
                nltk_lm_examples.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## torchtext_pentreebank_gt.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / torchtext_pentreebank_gt.ipynb
            
            
              Created
              February 28, 2022 00:19
            
              
                torchtext_pentreebank_gt.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## torchtext_ngram.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shihono
                / torchtext_ngram.ipynb
            
            
              Created
              February 14, 2022 12:17
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ngram_generate.py
import lzma
import sys
import glob

def load_xz_ngram_file(n=2):
    ngram_dict = {}
    ngram_freq = {}
    files = glob.glob("/path/to/nwc2010-ngrams/word/over999/{}gms/*.xz".format(n))
    for file in files:
        print(file)
	import difflib
	import unicodedata

	def get_char_width_list(text):
	"""text の文字ごとの幅をリストで返す
	半角の場合は1, 全角の場合は2
	"""
	result = []
	for c in text:
	if unicodedata.east_asian_width(c) in ["F", "W"]:
	import lzma
	import sys
	import glob

	def load_xz_ngram_file(n=2):
	ngram_dict = {}
	ngram_freq = {}
	files = glob.glob("/path/to/nwc2010-ngrams/word/over999/{}gms/*.xz".format(n))
	for file in files:
	print(file)