Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
![Screenshot 2023-12-18 at 10 40 27 PM](https://private-user-images.githubusercontent.com/3837836/291468646-4c30ad72-76ee-4939-a5fb-16b570d38cf2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjE0MzE1OTIsIm5iZiI6MTcyMTQzMTI5MiwicGF0aCI6Ii8zODM3ODM2LzI5MTQ2ODY0Ni00YzMwYWQ3Mi03NmVlLTQ5MzktYTVmYi0xNmI1NzBkMzhjZjIucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI0MDcxOSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNDA3MTlUMjMyMTMyWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9YWI4ZjAwODVmNmY0ZmE1Y2E0ZjRhNTYxYjY3MzY0Njk1ZjE1ZGUzNTRjZjE0OGUwOGU5MzIzODhmMTFlYjk1MCZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmYWN0b3JfaWQ9MCZrZXlfaWQ9MCZyZXBvX2lkPTAifQ.y3ZUn020JWvh1jzw-dAhN5sjAeIliMVtSD4mYAyowCE)
from __future__ import print_function | |
import json | |
import os | |
import numpy as np | |
from gensim.models import Word2Vec | |
from gensim.utils import simple_preprocess | |
from keras.engine import Input | |
from keras.layers import Embedding, merge |
# https://nlpforhackers.io/named-entity-extraction/ | |
import os | |
import string | |
import collections | |
import pickle | |
from collections import Iterable | |
from nltk.tag import ClassifierBasedTagger | |
from nltk.chunk import ChunkParserI, conlltags2tree, tree2conlltags |
''' Script for downloading all GLUE data. | |
Note: for legal reasons, we are unable to host MRPC. | |
You can either use the version hosted by the SentEval team, which is already tokenized, | |
or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually. | |
For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example). | |
You should then rename and place specific files in a folder (see below for an example). | |
mkdir MRPC | |
cabextract MSRParaphraseCorpus.msi -d MRPC |
""" Enhancing Intent Classification with the Universal Sentence Encoder: | |
https://medium.com/scalableminds/enhancing-intent-classification-with-the-universal-sentence-encoder-ecbcd7a3005c | |
""" | |
from rasa_nlu.featurizers import Featurizer | |
import tensorflow_hub as hub | |
import tensorflow as tf | |