Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import math | |
from packaging import version | |
import random | |
def compute_amplitude(waveforms, lengths=None, amp_type="avg", scale="linear"): | |
"""Compute amplitude of a batch of waveforms. | |
Arguments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
CHARACTERS = "0123456789aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍị" \ | |
"ỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ" | |
PUNCTUATION = ".,?!@%~`#$^&*()-_+=[]{}\|:;\"'<>/" | |
ALL_CHARS = CHARACTERS + PUNCTUATION | |
WORD_NORMALIZER = re.compile(r"[^ {}]".format(re.escape(ALL_CHARS))) | |
def remove_unk_char(text): | |
return WORD_NORMALIZER.sub(' ', text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.sax | |
import os | |
import re | |
import subprocess | |
class WikiXmlHandler(xml.sax.handler.ContentHandler): | |
"""Content handler for Wiki XML data using SAX""" | |
@staticmethod |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ftfy | |
import bogo | |
import re | |
map_char = { | |
"à": ["a", "f"], "á": ["a", "s"], "â": ["aa", ""], "ã": ["a", "x"], "è": ["e", "f"], "é": ["e", "s"], | |
"ê": ["ee", ""], "ì": ["i", "f"], "í": ["i", "s"], "ò": ["o", "f"], "ó": ["o", "s"], "ô": ["oo", ""], | |
"õ": ["o", "x"], "ù": ["u", "f"], "ú": ["u", "s"], "ý": ["y", "s"], "ă": ["aw", ""], "ĩ": ["i", "x"], | |
"ũ": ["u", "x"], "ơ": ["ow", ""], "ư": ["uw", ""], "ạ": ["a", "j"], "ả": ["a", "r"], "ấ": ["aa", "s"], | |
"ầ": ["aa", "f"], "ẩ": ["aa", "r"], "ẫ": ["aa", "x"], "ậ": ["aa", "j"], "ắ": ["aw", "s"], "ằ": ["aw", "f"], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from annoy import AnnoyIndex | |
class PretrainedEmbeddings(object): | |
def __init__(self, word_to_index, word_vectors): | |
""" | |
Args | |
:param word_to_index: dict mapping from word to integers | |
:param word_vectors: list of numpy arrays |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typing as T | |
import cytoolz.curried as tz | |
import pyspark | |
from pyspark.sql.functions import explode | |
def schema_to_columns(schema: pyspark.sql.types.StructType) -> T.List[T.List[str]]: | |
columns = list() |