Charly Wargnier CharlyWargnier

## normcore-llm.md

      
              1 file
            
          
              218 forks
            
          
              38 comments
            
          
              2781 stars
            
          
                veekaybee
                / normcore-llm.md
            
            
              Last active
              July 21, 2024 13:28
            
              
                Normcore LLM Reads
              
          
    Anti-hype LLM reading list

Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
Foundational Concepts


Pre-Transformer Models


## download_csv
        df = pd.read_csv("reuters-allcats.csv")
        csv = df.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode()
        href = f'<a href="data:file/csv;base64,{b64}" download="myfilename.csv">Try a sample first</a>'


        st.markdown("👆 Upload a .csv file.  " + href, unsafe_allow_html=True)

        st.stop()

## crawl_multiple_sites.py
from urllib.parse import urlsplit

import advertools as adv


sites = [
    'https://www.who.int',
    'https://www.nytimes.com',
    'https://www.washingtonpost.com',
]

## lottie_animation_in_streamlit.py
import json

import requests  # pip install requests
import streamlit as st  # pip install streamlit
from streamlit_lottie import st_lottie  # pip install streamlit-lottie

# GitHub: https://github.com/andfanilo/streamlit-lottie
# Lottie Files: https://lottiefiles.com/

def load_lottiefile(filepath: str):

## bing_scrape_streamlit.py
import pandas as pd
from bs4 import BeautifulSoup
import requests as r
import streamlit as st

st.markdown('<h1 style="background-color: gainsboro; padding-left: 10px; padding-bottom: 20px;">Search Engine Scraper</h1>', unsafe_allow_html=True)
query = st.text_input('', help='Enter the search string and hit Enter/Return')
query = query.replace(" ", "+") #replacing the spaces in query result with +

if query: #Activates the code below on hitting Enter/Return in the search textbox

## wave_use_uploaded_data.py
import os
import time
from h2o_wave import main, app, Q, ui, data
import pandas as pd
import numpy as np


@app('/')
async def serve(q: Q):
    print(q.args)

## wave_plot_from_pandas.py
# Plot / Dataframe
# Examples of how to format pandas data when plotting
# Use the `tolist()` function on `df.columns` and `df.values` along with Wave's `data` class
# ---
from h2o_wave import site, data, ui, main
import pandas as pd
import numpy as np

# Page to hold our charts
page = site['/demo']

## title_optimizer.py
import streamlit as st
from ludwig.api import LudwigModel
import pandas as pd

st.cache(show_spinner=False)
def load_model():
  #Update with the path to the Ludwig trained model
  model = LudwigModel.load("results/experiment_run_1/model/")
  return model

## les-miserables.json
{
  "nodes": [
    {
      "id": "0",
      "name": "Myriel",
      "itemStyle": null,
      "symbolSize": 10,
      "x": null,
      "y": null,
      "attributes": {

## keyworksFromUrl.py
from urllib.parse import urlparse
import re

url="https://www.amazon.com/SanDisk-128GB-microSDXC-Memory-Adapter/dp/B073JYC4XM/"

print(set(re.split("[/-]", urlparse(url).path)))

#output
#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}
	df = pd.read_csv("reuters-allcats.csv")
	csv = df.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="myfilename.csv">Try a sample first</a>'


	st.markdown("👆 Upload a .csv file. " + href, unsafe_allow_html=True)

	st.stop()
	from urllib.parse import urlsplit

	import advertools as adv


	sites = [
	'https://www.who.int',
	'https://www.nytimes.com',
	'https://www.washingtonpost.com',
	]
	import json

	import requests # pip install requests
	import streamlit as st # pip install streamlit
	from streamlit_lottie import st_lottie # pip install streamlit-lottie

	# GitHub: https://github.com/andfanilo/streamlit-lottie
	# Lottie Files: https://lottiefiles.com/

	def load_lottiefile(filepath: str):
	import pandas as pd
	from bs4 import BeautifulSoup
	import requests as r
	import streamlit as st

	st.markdown('<h1 style="background-color: gainsboro; padding-left: 10px; padding-bottom: 20px;">Search Engine Scraper</h1>', unsafe_allow_html=True)
	query = st.text_input('', help='Enter the search string and hit Enter/Return')
	query = query.replace(" ", "+") #replacing the spaces in query result with +

	if query: #Activates the code below on hitting Enter/Return in the search textbox
	import os
	import time
	from h2o_wave import main, app, Q, ui, data
	import pandas as pd
	import numpy as np


	@app('/')
	async def serve(q: Q):
	print(q.args)
	# Plot / Dataframe
	# Examples of how to format pandas data when plotting
	# Use the `tolist()` function on `df.columns` and `df.values` along with Wave's `data` class
	# ---
	from h2o_wave import site, data, ui, main
	import pandas as pd
	import numpy as np

	# Page to hold our charts
	page = site['/demo']
	import streamlit as st
	from ludwig.api import LudwigModel
	import pandas as pd

	st.cache(show_spinner=False)
	def load_model():
	#Update with the path to the Ludwig trained model
	model = LudwigModel.load("results/experiment_run_1/model/")
	return model
	{
	"nodes": [
	{
	"id": "0",
	"name": "Myriel",
	"itemStyle": null,
	"symbolSize": 10,
	"x": null,
	"y": null,
	"attributes": {
	from urllib.parse import urlparse
	import re

	url="https://www.amazon.com/SanDisk-128GB-microSDXC-Memory-Adapter/dp/B073JYC4XM/"

	print(set(re.split("[/-]", urlparse(url).path)))

	#output
	#{'', 'B073JYC4XM', 'dp', '128GB', 'microSDXC', 'Memory', 'SanDisk', 'Adapter'}