Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
![Screenshot 2023-12-18 at 10 40 27 PM](https://private-user-images.githubusercontent.com/3837836/291468646-4c30ad72-76ee-4939-a5fb-16b570d38cf2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjAyMjMyMjQsIm5iZiI6MTcyMDIyMjkyNCwicGF0aCI6Ii8zODM3ODM2LzI5MTQ2ODY0Ni00YzMwYWQ3Mi03NmVlLTQ5MzktYTVmYi0xNmI1NzBkMzhjZjIucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI0MDcwNSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNDA3MDVUMjM0MjA0WiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9MmU4MjUyZGFkOTUwZjZiNTVlZWFkOWQzY2IwZDUwNjA4YWE0MGIwMDczYzhkNTg3MThlZjQ4ODA4NzIyNTIzZiZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmYWN0b3JfaWQ9MCZrZXlfaWQ9MCZyZXBvX2lkPTAifQ.yLx7XinMXiXc8IyHutpvmuSKirL3o96qCEA4v2d4TAo)
import os; import psutil; import timeit | |
from datasets import load_dataset | |
mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
wiki = load_dataset("wikipedia", "20200501.en", split='train') | |
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20 | |
print(f"RAM memory used: {(mem_after - mem_before)} MB") | |
s = """batch_size = 1000 | |
for i in range(0, len(wiki), batch_size): |
""" | |
Example of a Streamlit app for an interactive Prodigy dataset viewer that also lets you | |
run simple training experiments for NER and text classification. | |
Requires the Prodigy annotation tool to be installed: https://prodi.gy | |
See here for details on Streamlit: https://streamlit.io. | |
""" | |
import streamlit as st | |
from prodigy.components.db import connect | |
from prodigy.models.ner import EntityRecognizer, merge_spans, guess_batch_size |