Harry Wang harrywang

## samsum-sample.jsonl
{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nAmanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-) [\/INST]\n\nSummary: Amanda baked cookies and will bring Jerry some tomorrow."}
{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nOlivia: Who are you voting for in this election? \r\nOliver: Liberals as always.\r\nOlivia: Me too!!\r\nOliver: Great [\/INST]\n\nSummary: Olivia and Olivier are voting for liberals in this election. "}
{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nTim: Hi, what's up?\r\nKim: Bad mood tbh, I was going to do lots of stuff but ended up procrastinating\r\nTim: What did you plan on doing?\r\nKim: Oh you know, uni stuff and unfucking my room\r\nKim: Maybe tomorrow I'll move my ass and do everything\r\nKim: We were going to defrost a fridge so instead of shopping I'll eat some defrosted

## price-data.txt
make a comparison table to compare the api pricing between claude, mistral, openai, and gemini based on the following data:

Claude 3

MTok = million tokens. All Claude 3 models support vision and 200,000 token context windows.
Light & fast
Haiku
Input: $0.25 / MTok
Output: $1.25 / MTok
Hard-working

## gist:4d9cda28385b2019898b
/*
  This file must be valid JSON. But comments are allowed

  Please edit settings.json, not settings.json.template
*/
{
  // Name your instance!
  "title": "Etherpad",

  // favicon default name

## sentiment-analysis.py
import torch
import pandas as pd
import logging
import os

from dotenv import load_dotenv
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail
from datetime import datetime

## ask_iching.py
def consult_oracle():
    import random
    hex = ''

    for i in range(6):  # toss 6 times to get the hex

        toss = 0
        line = 0

        for i in range(3):  # each toss with three coins to generate one line

## ensemble-learning-hints.py
# partial code snippets shown as hints

# voting regressor
from sklearn.ensemble import VotingRegressor

voting_reg = VotingRegressor(
    estimators = [
        ('lin', lin_reg_pipeline),
        ('svm', svm_reg_pipeline),
        ('sgd', sgd_reg_pipeline),

## sgd-grid-search-example.py
# sgd regression pipeline with grid search
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV

param_grid = [
    {
        'sgd_reg__max_iter':[100000, 1000000], # if number is too small, you will get a warning
        'sgd_reg__tol':[1e-10, 1e-3],
        'sgd_reg__eta0':[0.001, 0.01]
    }

## feature-type-based-selection.py
# example to show how to select features based on type: numerical vs. categorical
import pandas as pd
# load the data
df = pd.read_csv("housing.csv")

# find numerical features and categorical features based on the type of feature
df_num = df.select_dtypes(exclude ='object')
df_cat = df.select_dtypes(include ='object')

# select numerical features and categorical features

## duplicates_pipeline.py
class DuplicatesPipeline(object):

    def __init__(self):
        """
        Initializes database connection and sessionmaker.
        Creates tables.
        """
        engine = db_connect()
        create_table(engine)
        self.Session = sessionmaker(bind=engine)

## items_v1.py
from scrapy.item import Item, Field


class QuoteItem(Item):
    quote_content = Field()
    tags = Field()
    author_name = Field()
    author_birthday = Field()
    author_bornlocation = Field()
    author_bio = Field()
	{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nAmanda: I baked cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-) [\/INST]\n\nSummary: Amanda baked cookies and will bring Jerry some tomorrow."}
	{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nOlivia: Who are you voting for in this election? \r\nOliver: Liberals as always.\r\nOlivia: Me too!!\r\nOliver: Great [\/INST]\n\nSummary: Olivia and Olivier are voting for liberals in this election. "}
	{"text":"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<<\/SYS>>\n\nInput:\nTim: Hi, what's up?\r\nKim: Bad mood tbh, I was going to do lots of stuff but ended up procrastinating\r\nTim: What did you plan on doing?\r\nKim: Oh you know, uni stuff and unfucking my room\r\nKim: Maybe tomorrow I'll move my ass and do everything\r\nKim: We were going to defrost a fridge so instead of shopping I'll eat some defrosted
	make a comparison table to compare the api pricing between claude, mistral, openai, and gemini based on the following data:

	Claude 3

	MTok = million tokens. All Claude 3 models support vision and 200,000 token context windows.
	Light & fast
	Haiku
	Input: $0.25 / MTok
	Output: $1.25 / MTok
	Hard-working
	/*
	This file must be valid JSON. But comments are allowed

	Please edit settings.json, not settings.json.template
	*/
	{
	// Name your instance!
	"title": "Etherpad",

	// favicon default name
	import torch
	import pandas as pd
	import logging
	import os

	from dotenv import load_dotenv
	from sendgrid import SendGridAPIClient
	from sendgrid.helpers.mail import Mail
	from datetime import datetime
	def consult_oracle():
	import random
	hex = ''

	for i in range(6): # toss 6 times to get the hex

	toss = 0
	line = 0

	for i in range(3): # each toss with three coins to generate one line
	# partial code snippets shown as hints

	# voting regressor
	from sklearn.ensemble import VotingRegressor

	voting_reg = VotingRegressor(
	estimators = [
	('lin', lin_reg_pipeline),
	('svm', svm_reg_pipeline),
	('sgd', sgd_reg_pipeline),
	# sgd regression pipeline with grid search
	from sklearn.linear_model import SGDRegressor
	from sklearn.model_selection import GridSearchCV

	param_grid = [
	{
	'sgd_reg__max_iter':[100000, 1000000], # if number is too small, you will get a warning
	'sgd_reg__tol':[1e-10, 1e-3],
	'sgd_reg__eta0':[0.001, 0.01]
	}
	# example to show how to select features based on type: numerical vs. categorical
	import pandas as pd
	# load the data
	df = pd.read_csv("housing.csv")

	# find numerical features and categorical features based on the type of feature
	df_num = df.select_dtypes(exclude ='object')
	df_cat = df.select_dtypes(include ='object')

	# select numerical features and categorical features
	class DuplicatesPipeline(object):

	def __init__(self):
	"""
	Initializes database connection and sessionmaker.
	Creates tables.
	"""
	engine = db_connect()
	create_table(engine)
	self.Session = sessionmaker(bind=engine)
	from scrapy.item import Item, Field


	class QuoteItem(Item):
	quote_content = Field()
	tags = Field()
	author_name = Field()
	author_birthday = Field()
	author_bornlocation = Field()
	author_bio = Field()