Juncheng Yang 1a1a11a

## download_llama.sh
#!/bin/bash

set -eux

PRESIGNED_URL="https://agi.gpt4.org/llama/LLaMA/*"

MODEL_SIZE="7B,13B,30B,65B"  # edit this list with the model sizes you wish to download
TARGET_FOLDER="models/"             # where all files should end up
mkdir -p ${TARGET_FOLDER} || true

## dataseries_compilation.sh
# https://github.com/dataseries/DataSeries/tree/613a2b654b14934d2e097586e70fbc3cd20580c3

# requires ubuntu18

sudo apt update
sudo apt install -yqq cmake libboost-all-dev perl gnuplot libxml2 libxml2-dev libdbi-dev doxygen libbz2-dev;


git clone https://github.com/dataseries/Lintel.git
cd Lintel;

## segcache.patch.diff
diff --git a/src/storage/seg/segevict.c b/src/storage/seg/segevict.c
index da940e3..66c4988 100644
--- a/src/storage/seg/segevict.c
+++ b/src/storage/seg/segevict.c
@@ -278,7 +278,8 @@ segevict_setup(evict_policy_e ev_policy, uintmax_t seg_mature_time)
     struct merge_opts *mopt = &evict_info.merge_opt;
     mopt->target_ratio = 1.0 / mopt->seg_n_merge;
     /* stop if the bytes on the merged seg is more than the threshold */
-    mopt->stop_ratio   = mopt->target_ratio * (mopt->seg_n_merge - 1) + 0.05;
+    // mopt->stop_ratio   = mopt->target_ratio * (mopt->seg_n_merge - 1) + 0.05;

## goBenchmark
package main

import (
    "fmt"
    "time"
    "sync"
)

func main() {

## datasets.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                1a1a11a
                / datasets.md
            
            
              Created
              August 8, 2016 14:35
                — forked from mrflip/datasets.md
            
              
                Overview of Datasets
              
          
    == Overview of Datasets ==
The examples in this book use the "Chimpmark" datasets: a set of freely-redistributable datasets, converted to simple standard formats, with traceable provenance and documented schema. They are the same datasets as used in the upcoming Chimpmark Challenge big-data benchmark. The datasets are:


Wikipedia English-language Article Corpus (wikipedia_corpus; 38 GB, 619 million records, 4 billion tokens): the full text of every English-language wikipedia article, in


Wikipedia Pagelink Graph (wikipedia_pagelinks; ) --


Wikipedia Pageview Stats (wikipedia_pageviews; 2.3 TB, about 250 billion records (FIXME: verify num records)) -- hour-by-hour pageview
	#!/bin/bash

	set -eux

	PRESIGNED_URL="https://agi.gpt4.org/llama/LLaMA/*"

	MODEL_SIZE="7B,13B,30B,65B" # edit this list with the model sizes you wish to download
	TARGET_FOLDER="models/" # where all files should end up
	mkdir -p ${TARGET_FOLDER} \|\| true
	# https://github.com/dataseries/DataSeries/tree/613a2b654b14934d2e097586e70fbc3cd20580c3

	# requires ubuntu18

	sudo apt update
	sudo apt install -yqq cmake libboost-all-dev perl gnuplot libxml2 libxml2-dev libdbi-dev doxygen libbz2-dev;


	git clone https://github.com/dataseries/Lintel.git
	cd Lintel;
	diff --git a/src/storage/seg/segevict.c b/src/storage/seg/segevict.c
	index da940e3..66c4988 100644
	--- a/src/storage/seg/segevict.c
	+++ b/src/storage/seg/segevict.c
	@@ -278,7 +278,8 @@ segevict_setup(evict_policy_e ev_policy, uintmax_t seg_mature_time)
	struct merge_opts *mopt = &evict_info.merge_opt;
	mopt->target_ratio = 1.0 / mopt->seg_n_merge;
	/* stop if the bytes on the merged seg is more than the threshold */
	- mopt->stop_ratio = mopt->target_ratio * (mopt->seg_n_merge - 1) + 0.05;
	+ // mopt->stop_ratio = mopt->target_ratio * (mopt->seg_n_merge - 1) + 0.05;