Ryan Wesslen wesslen

## gist:587454dc0146a6ae21fc
"""
This is a batched LSTM forward and backward pass
"""
import numpy as np
import code

class LSTM:

  @staticmethod
  def init(input_size, hidden_size, fancy_forget_bias_init = 3):

## batch-lstm.R
###
### This is a batched LSTM forward and backward pass. Written by Andrej Karpathy (@karpathy)
### BSD License
### Re-written in R by @georgeblck
###

rm(list=ls(all=TRUE))

LSTM.init <- function(input_size, hidden_size, fancy_forget_bias_init = 3){
  # Initialize parameters of the LSTM (both weights and biases in one matrix)

## Dockerfile
# Dockerfile for prodigy, just place your linux-wheel (prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl) in same directoty as
# this dockerfile and:
# > docker build . -t prodigy
# > docker run -it -p 8080:8080 -v ${PWD}:/work prodigy bash

FROM python:3.6
RUN mkdir /prodigy
WORKDIR /prodigy
COPY ./prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl /prodigy
RUN pip install prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl

## Dockerfile
FROM python:3.6-alpine
# Opted for alpine to get a lean docker image as possible
RUN apk add --no-cache openssl

ENV DOCKERIZE_VERSION v0.6.1
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
    && tar -C /usr/local/bin -xzvf dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
    && rm dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz

# Python deps for alpine

## prodigy_srs.py
"""See https://twitter.com/honnibal/status/1120020992636661767 """
import time
import srsly
from prodigy import recipe
from prodigy.components.db import connect
from prodigy.util import INPUT_HASH_ATTR, set_hashes
from prodigy.components.filters import filter_duplicates


def get_rank_priority(data):

## Install
pip install streamlit
pip install spacy
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
python -m spacy download de_core_news_sm

## streamlit_prodigy.py
"""
Example of a Streamlit app for an interactive Prodigy dataset viewer that also lets you
run simple training experiments for NER and text classification.

Requires the Prodigy annotation tool to be installed: https://prodi.gy
See here for details on Streamlit: https://streamlit.io.
"""
import streamlit as st
from prodigy.components.db import connect
from prodigy.models.ner import EntityRecognizer, merge_spans, guess_batch_size

## statistical_rethinking_emcee.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                wrgoldstein
                / statistical_rethinking_emcee.ipynb
            
            
              Last active
              January 23, 2022 22:09
            
              
                A cheat sheet explaining how to perform simple Bayesian modeling in python.
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## tokenizations_post.md

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              64 stars
            
          
                tamuhey
                / tokenizations_post.md
            
            
              Last active
              June 26, 2024 01:00
            
              
                How to calculate the alignment between BERT and spaCy tokens effectively and robustly
              
          
    How to calculate the alignment between BERT and spaCy tokens effectively and robustly


site: https://tamuhey.github.io/tokenizations/
Natural Language Processing (NLP) has made great progress in recent years because of neural networks, which allows us to solve various tasks with end-to-end architecture. However, many NLP systems still require language-specific pre- and post-processing, especially in tokenizations. In this article, I describe an algorithm that simplifies calculating correspondence between tokens (e.g. BERT vs. spaCy), one such process. And I introduce Python and Rust libraries that implement this algorithm.
Here are the library and the demo site links:

repo: https://github.com/tamuhey/tokenizations


## dataset.jsonl
{"text":"Spam spam lovely spam!"}
{"text":"I like scrambled eggs."}
{"text":"I prefer spam!"}
	"""
	This is a batched LSTM forward and backward pass
	"""
	import numpy as np
	import code

	class LSTM:

	@staticmethod
	def init(input_size, hidden_size, fancy_forget_bias_init = 3):
	###
	### This is a batched LSTM forward and backward pass. Written by Andrej Karpathy (@karpathy)
	### BSD License
	### Re-written in R by @georgeblck
	###

	rm(list=ls(all=TRUE))

	LSTM.init <- function(input_size, hidden_size, fancy_forget_bias_init = 3){
	# Initialize parameters of the LSTM (both weights and biases in one matrix)
	# Dockerfile for prodigy, just place your linux-wheel (prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl) in same directoty as
	# this dockerfile and:
	# > docker build . -t prodigy
	# > docker run -it -p 8080:8080 -v ${PWD}:/work prodigy bash

	FROM python:3.6
	RUN mkdir /prodigy
	WORKDIR /prodigy
	COPY ./prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl /prodigy
	RUN pip install prodigy-0.1.0-cp36-cp36m-linux_x86_64.whl
	FROM python:3.6-alpine
	# Opted for alpine to get a lean docker image as possible
	RUN apk add --no-cache openssl

	ENV DOCKERIZE_VERSION v0.6.1
	RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
	&& tar -C /usr/local/bin -xzvf dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
	&& rm dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz

	# Python deps for alpine
	"""See https://twitter.com/honnibal/status/1120020992636661767 """
	import time
	import srsly
	from prodigy import recipe
	from prodigy.components.db import connect
	from prodigy.util import INPUT_HASH_ATTR, set_hashes
	from prodigy.components.filters import filter_duplicates


	def get_rank_priority(data):
	pip install streamlit
	pip install spacy
	python -m spacy download en_core_web_sm
	python -m spacy download en_core_web_md
	python -m spacy download de_core_news_sm
	"""
	Example of a Streamlit app for an interactive Prodigy dataset viewer that also lets you
	run simple training experiments for NER and text classification.

	Requires the Prodigy annotation tool to be installed: https://prodi.gy
	See here for details on Streamlit: https://streamlit.io.
	"""
	import streamlit as st
	from prodigy.components.db import connect
	from prodigy.models.ner import EntityRecognizer, merge_spans, guess_batch_size
	{"text":"Spam spam lovely spam!"}
	{"text":"I like scrambled eggs."}
	{"text":"I prefer spam!"}