Knut Behrends knbknb

## hat.rkt
#lang racket

(require metapict)
(require metapict/pict)

(struct M-edge (turn) #:transparent)

(struct X+ M-edge () #:transparent)
(struct X- M-edge () #:transparent)
(struct A+ M-edge () #:transparent)

## guess_keyword.R
library(tidyverse)
library(jsonlite)
library(shiny)

# get_json_data(date)
# score_letters(letters)
# read_keywords() on mac
# split_char(word) into words
# limit_words(words, str_length) to specific length and lower case
# top_words(word, words_in, top_n = 50) - combo of split_char, limit_words

## copilot_keyword.R
library(tidyverse)
library(stringr)
library(jsonlite)

# create a function to solve the Keyword game
# this game uses a 6 letter horizontal word at the
# intersection of 6 other words, where a missing letter from each of the vertical words
# accounts for one letter of the mystery 6 letter length horizontal word

# how to play

## rl-for-llms.md

      
              1 file
            
          
              26 forks
            
          
              11 comments
            
          
              543 stars
            
          
                yoavg
                / rl-for-llms.md
            
            
              Last active
              July 23, 2024 08:52
            
          
    Reinforcement Learning for Language Models

Yoav Goldberg, April 2023.
Why RL?

With the release of the ChatGPT model and followup large language models (LLMs), there was a lot of discussion of the importance of "RLHF training", that is, "reinforcement learning from human feedback".
I was puzzled for a while as to why RL (Reinforcement Learning) is better than learning from demonstrations (a.k.a supervised learning) for training language models. Shouldn't learning from demonstrations (or, in language model terminology "instruction fine tuning", learning to immitate human written answers) be sufficient? I came up with a theoretical argument that was somewhat convincing. But I came to realize there is an additional argumment which not only supports the case of RL training, but also requires it, in particular for models like ChatGPT. This additional argument is spelled out in (the first half of) a talk by John Schulman from OpenAI. This post pretty much

  
## ubuntu-custom-setup.sh
#!/usr/bin/env bash
set -Eeu

trap 'STATUS=${?}; echo "${0}: Error on line ${LINENO}: ${BASH_COMMAND}"; exit ${STATUS}' ERR
trap 'rm -rf ${tempDir}' EXIT

readonly supportedUbuntuVersion="22.04"
readonly tempDir="/tmp/setup"
readonly devDir="${HOME}/dev"
readonly scriptsDir="${devDir}/scripts"

## mandeldrop.R
Rcpp::sourceCpp("mandeldrop.cpp")

xlims=c(-0.65,0.65)
ylims=c(-0.6,2.2)
m <- mandeldrop(xlims[[1]], xlims[[2]], ylims[[1]], ylims[[2]], 1560, 2160, 512)

# Colour palette from https://stackoverflow.com/q/48069990
rainbow=c(rgb(0.47,0.11,0.53),rgb(0.27,0.18,0.73),rgb(0.25,0.39,0.81),rgb(0.30,0.57,0.75),rgb(0.39,0.67,0.60),rgb(0.51,0.73,0.44),rgb(0.67,0.74,0.32),rgb(0.81,0.71,0.26),rgb(0.89,0.60,0.22),rgb(0.89,0.39,0.18),rgb(0.86,0.13,0.13))
cols=c(colorRampPalette(rainbow)(100),rev(colorRampPalette(rainbow)(100)),"black")

## pluck_recursive.R
# @drob's version from https://twitter.com/drob/status/1501747414780239879
pluck_recursive <- function(lst, name) {
  if (is.list(lst)) {
    if (!is.null(lst[[name]])) {
      return(lst[[name]])
    }
    return(unname(unlist(purrr::map(lst, pluck_recursive, name))))
  }
}

## dplyr-summarise.R
# What's the most natural way to express this code in base R?
library(dplyr, warn.conflicts = FALSE)
mtcars %>%
  group_by(cyl) %>%
  summarise(mean = mean(disp), n = n())
#> # A tibble: 3 x 3
#>     cyl  mean     n
#>   <dbl> <dbl> <int>
#> 1     4  105.    11
#> 2     6  183.     7

## cleanup-gitlab-pipelines.sh
#!/bin/bash
# Purpose: Bulk-delete GitLab pipelines older than a given date
# Author: github.com/chrishoerl
# GitLab API: v4
# Requirements: jq must be instaled ($ sudo apt install jq)
# API example: https://gitlab.example.com/api/v4/projects
# API example: https://gitlab.example.com/api/v4/projects/<projectid>/pipelines
#
# NOTE: Script is just a dryrun. To really delete pipelines, simply uncomment line 49 to activate
#

## tidyselect.md

      
              1 file
            
          
              0 forks
            
          
              3 comments
            
          
              2 stars
            
          
                isteves
                / tidyselect.md
            
            
              Last active
              May 4, 2020 15:28
            
              
                Trying to grok tidyselect
              
          
    Trying to grok tidyselect

Key takeaways:

everything inside vars(...) is exactly the same as the stuff inside select(...)!!!
vars() is used for all scoped variants of dplyr verbs (I assume bc the variables need to “fit” into a single argument, .vars. In select(...), the ellipses take everything)
vars_select() is probably more of a developer-facing function (seen in select_helpers documentation)

Some "gotchas":
	#lang racket

	(require metapict)
	(require metapict/pict)

	(struct M-edge (turn) #:transparent)

	(struct X+ M-edge () #:transparent)
	(struct X- M-edge () #:transparent)
	(struct A+ M-edge () #:transparent)
	library(tidyverse)
	library(jsonlite)
	library(shiny)

	# get_json_data(date)
	# score_letters(letters)
	# read_keywords() on mac
	# split_char(word) into words
	# limit_words(words, str_length) to specific length and lower case
	# top_words(word, words_in, top_n = 50) - combo of split_char, limit_words
	library(tidyverse)
	library(stringr)
	library(jsonlite)

	# create a function to solve the Keyword game
	# this game uses a 6 letter horizontal word at the
	# intersection of 6 other words, where a missing letter from each of the vertical words
	# accounts for one letter of the mystery 6 letter length horizontal word

	# how to play
	#!/usr/bin/env bash
	set -Eeu

	trap 'STATUS=${?}; echo "${0}: Error on line ${LINENO}: ${BASH_COMMAND}"; exit ${STATUS}' ERR
	trap 'rm -rf ${tempDir}' EXIT

	readonly supportedUbuntuVersion="22.04"
	readonly tempDir="/tmp/setup"
	readonly devDir="${HOME}/dev"
	readonly scriptsDir="${devDir}/scripts"
	Rcpp::sourceCpp("mandeldrop.cpp")

	xlims=c(-0.65,0.65)
	ylims=c(-0.6,2.2)
	m <- mandeldrop(xlims[[1]], xlims[[2]], ylims[[1]], ylims[[2]], 1560, 2160, 512)

	# Colour palette from https://stackoverflow.com/q/48069990
	rainbow=c(rgb(0.47,0.11,0.53),rgb(0.27,0.18,0.73),rgb(0.25,0.39,0.81),rgb(0.30,0.57,0.75),rgb(0.39,0.67,0.60),rgb(0.51,0.73,0.44),rgb(0.67,0.74,0.32),rgb(0.81,0.71,0.26),rgb(0.89,0.60,0.22),rgb(0.89,0.39,0.18),rgb(0.86,0.13,0.13))
	cols=c(colorRampPalette(rainbow)(100),rev(colorRampPalette(rainbow)(100)),"black")
	# @drob's version from https://twitter.com/drob/status/1501747414780239879
	pluck_recursive <- function(lst, name) {
	if (is.list(lst)) {
	if (!is.null(lst[[name]])) {
	return(lst[[name]])
	}
	return(unname(unlist(purrr::map(lst, pluck_recursive, name))))
	}
	}
	# What's the most natural way to express this code in base R?
	library(dplyr, warn.conflicts = FALSE)
	mtcars %>%
	group_by(cyl) %>%
	summarise(mean = mean(disp), n = n())
	#> # A tibble: 3 x 3
	#> cyl mean n
	#> <dbl> <dbl> <int>
	#> 1 4 105. 11
	#> 2 6 183. 7
	#!/bin/bash
	# Purpose: Bulk-delete GitLab pipelines older than a given date
	# Author: github.com/chrishoerl
	# GitLab API: v4
	# Requirements: jq must be instaled ($ sudo apt install jq)
	# API example: https://gitlab.example.com/api/v4/projects
	# API example: https://gitlab.example.com/api/v4/projects/<projectid>/pipelines
	#
	# NOTE: Script is just a dryrun. To really delete pipelines, simply uncomment line 49 to activate
	#