Nguyen Binh nguyenvulebinh

## wav2vec2_vlsp_test.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                nguyenvulebinh
                / wav2vec2_vlsp_test.ipynb
            
            
              Created
              May 27, 2023 04:49
            
              
                wav2vec2_vlsp_test.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## signal_processing.py
import torch
import math
from packaging import version
import random


def compute_amplitude(waveforms, lengths=None, amp_type="avg", scale="linear"):
    """Compute amplitude of a batch of waveforms.

    Arguments

## clean_envi_doc.py
import re

CHARACTERS = "0123456789aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍị" \
             "ỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"
PUNCTUATION = ".,?!@%~`#$^&*()-_+=[]{}\|:;\"'<>/"
ALL_CHARS = CHARACTERS + PUNCTUATION
WORD_NORMALIZER = re.compile(r"[^ {}]".format(re.escape(ALL_CHARS)))

def remove_unk_char(text):
    return WORD_NORMALIZER.sub(' ', text)

## parser_wiki_dump.py
import xml.sax
import os
import re
import subprocess


class WikiXmlHandler(xml.sax.handler.ContentHandler):
    """Content handler for Wiki XML data using SAX"""

    @staticmethod

## format_tone.py
import ftfy
import bogo
import re

map_char = {
    "à": ["a", "f"], "á": ["a", "s"], "â": ["aa", ""], "ã": ["a", "x"], "è": ["e", "f"], "é": ["e", "s"],
    "ê": ["ee", ""], "ì": ["i", "f"], "í": ["i", "s"], "ò": ["o", "f"], "ó": ["o", "s"], "ô": ["oo", ""],
    "õ": ["o", "x"], "ù": ["u", "f"], "ú": ["u", "s"], "ý": ["y", "s"], "ă": ["aw", ""], "ĩ": ["i", "x"],
    "ũ": ["u", "x"], "ơ": ["ow", ""], "ư": ["uw", ""], "ạ": ["a", "j"], "ả": ["a", "r"], "ấ": ["aa", "s"],
    "ầ": ["aa", "f"], "ẩ": ["aa", "r"], "ẫ": ["aa", "x"], "ậ": ["aa", "j"], "ắ": ["aw", "s"], "ằ": ["aw", "f"],

## analogies.py
import numpy as np
from annoy import AnnoyIndex


class PretrainedEmbeddings(object):
    def __init__(self, word_to_index, word_vectors):
        """
        Args
        :param word_to_index: dict mapping from word to integers
        :param word_vectors: list of numpy arrays

## flatten_all_spark_schema.py
import typing as T

import cytoolz.curried as tz
import pyspark
from pyspark.sql.functions import explode


def schema_to_columns(schema: pyspark.sql.types.StructType) -> T.List[T.List[str]]:

    columns = list()
	import torch
	import math
	from packaging import version
	import random


	def compute_amplitude(waveforms, lengths=None, amp_type="avg", scale="linear"):
	"""Compute amplitude of a batch of waveforms.

	Arguments
	import re

	CHARACTERS = "0123456789aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍị" \
	"ỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"
	PUNCTUATION = ".,?!@%~`#$^&*()-_+=[]{}\\|:;\"'<>/"
	ALL_CHARS = CHARACTERS + PUNCTUATION
	WORD_NORMALIZER = re.compile(r"[^ {}]".format(re.escape(ALL_CHARS)))

	def remove_unk_char(text):
	return WORD_NORMALIZER.sub(' ', text)
	import xml.sax
	import os
	import re
	import subprocess


	class WikiXmlHandler(xml.sax.handler.ContentHandler):
	"""Content handler for Wiki XML data using SAX"""

	@staticmethod
	import ftfy
	import bogo
	import re

	map_char = {
	"à": ["a", "f"], "á": ["a", "s"], "â": ["aa", ""], "ã": ["a", "x"], "è": ["e", "f"], "é": ["e", "s"],
	"ê": ["ee", ""], "ì": ["i", "f"], "í": ["i", "s"], "ò": ["o", "f"], "ó": ["o", "s"], "ô": ["oo", ""],
	"õ": ["o", "x"], "ù": ["u", "f"], "ú": ["u", "s"], "ý": ["y", "s"], "ă": ["aw", ""], "ĩ": ["i", "x"],
	"ũ": ["u", "x"], "ơ": ["ow", ""], "ư": ["uw", ""], "ạ": ["a", "j"], "ả": ["a", "r"], "ấ": ["aa", "s"],
	"ầ": ["aa", "f"], "ẩ": ["aa", "r"], "ẫ": ["aa", "x"], "ậ": ["aa", "j"], "ắ": ["aw", "s"], "ằ": ["aw", "f"],
	import numpy as np
	from annoy import AnnoyIndex


	class PretrainedEmbeddings(object):
	def __init__(self, word_to_index, word_vectors):
	"""
	Args
	:param word_to_index: dict mapping from word to integers
	:param word_vectors: list of numpy arrays
	import typing as T

	import cytoolz.curried as tz
	import pyspark
	from pyspark.sql.functions import explode


	def schema_to_columns(schema: pyspark.sql.types.StructType) -> T.List[T.List[str]]:

	columns = list()