Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
![Screenshot 2023-12-18 at 10 40 27 PM](https://private-user-images.githubusercontent.com/3837836/291468646-4c30ad72-76ee-4939-a5fb-16b570d38cf2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjE3MjI0ODEsIm5iZiI6MTcyMTcyMjE4MSwicGF0aCI6Ii8zODM3ODM2LzI5MTQ2ODY0Ni00YzMwYWQ3Mi03NmVlLTQ5MzktYTVmYi0xNmI1NzBkMzhjZjIucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI0MDcyMyUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNDA3MjNUMDgwOTQxWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9NjUyOWU3OGVhNzdiMjkxZjNiOTI4MjgyNGQ1MGI3ZWYzMmJlMTYxOWFjMjJhM2U2MWIyMDcxN2I5NjY5MTIzNyZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmYWN0b3JfaWQ9MCZrZXlfaWQ9MCZyZXBvX2lkPTAifQ._3t011oKRtwGDyBz3q6Dy0ZzV0aV21cNx82SMrQ9BKU)
""" | |
a simple script that reads tweets inside a json file, uses openai to compute embeddings and creates two files, metadata.tsv and output.tsv, which cam be used to visualise the tweets and their embeddings in TensorFlow Projector (https://projector.tensorflow.org/) | |
""" | |
# obtain tweets.json from https://gist.github.com/gd3kr/948296cf675469f5028911f8eb276dbc | |
import pandas as pd | |
import json | |
from openai import OpenAI |
#!/usr/bin/env -S bash -c "docker run -p 8080:8080 -it --rm \$(docker build --progress plain -f \$0 . 2>&1 | tee /dev/stderr | grep -oP 'sha256:[0-9a-f]*')" | |
# syntax = docker/dockerfile:1.4.0 | |
FROM node:20 | |
WORKDIR /root | |
RUN npm install sqlite3 |
NAME OF BELLIGERENT Rating +/- Deviation (sorted by Rating - Deviation) | |
Turkistan Islamic Party 3587 +/- 113 | |
Dadullah Front 3581 +/- 111 | |
High Council of Afghanistan Islamic Emirate 3581 +/- 111 | |
Syrian Arab Republic 3429 +/- 161 | |
ISIL-YP 3250 +/- 166 | |
GPC 3233 +/- 166 | |
Saleh 3163 +/- 163 | |
Supreme Political Council 3159 +/- 163 |
//@ts-check | |
async function massBlock() { | |
const dummy = {}; | |
const progressPopup = await showProgressPopup(); | |
const thumbsUp = '\uD83D\uDC4D'; | |
async function showProgressPopup() { | |
const animationTimeMsec = 200; |
#!/usr/bin/env ruby | |
# Based on: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/main/Bookmarks-lookup/bookmarks_lookup.rb | |
# See: https://github.com/ryanfb/twitter-bookmarks-export | |
require 'json' | |
require 'typhoeus' | |
require 'twitter_oauth2' | |
# First, you will need to enable OAuth 2.0 in your App’s auth settings in the Developer Portal to get your client ID. | |
# Inside your terminal you will need to set an enviornment variable | |
# export CLIENT_ID='your-client-id' |
# This is a new feature, so make sure to update to the latest version of transformers! | |
# You will also need to pip install tensorflow_text | |
import tensorflow as tf | |
from transformers import TFAutoModel, TFBertTokenizer | |
class EndToEndModel(tf.keras.Model): | |
def __init__(self, checkpoint): | |
super().__init__() |
from typing import Optional, Iterable, cast, List | |
from thinc.api import get_current_ops, Ops | |
from thinc.types import Ragged, Ints1d | |
from spacy.pipeline.spancat import Suggester | |
from spacy.tokens import Doc | |
from spacy.util import registry | |
@registry.misc("ngram_digits_suggester.v1") |
import streamlit.components.v1 as components | |
_ = components.html( | |
""" | |
<script> | |
function loadScript(url) | |
{ | |
return new Promise(function(resolve, reject) { | |