Mehmet Öner Yalçın oneryalcin

## kor_example.py
from langchain.chat_models import ChatOpenAI
from kor import create_extraction_chain, Object, Text


text = """
PELOTON APPOINTS DALANA BRAND AS CHIEF PEOPLE OFFICER
PDF Version
People Leader Completes Company's Lead Team

NEW YORK, March 1, 2023 /PRNewswire/ -- Peloton (NASDAQ: PTON), the leading connected fitness platform, today announced the appointment of Dalana Brand as Peloton's Chief People Officer (CPO), effective March 13, 2023. As a seasoned executive with significant global leadership experience in multiple industries, Brand joins the team with a strong reputation for organizational transformation. She will report to CEO Barry McCarthy and serve as a member of the leadership team, leading the company's Global People Team.

## sse_fast_api.py
# I couldn't get return generators from chains so I had to do a bit of low level SSE, Hope this is useful
# Probably you'll use another Vector Store instead of OpenSearch, but if you want to mimic what I did here,
# please use the fork of `OpenSearchVectorSearch` in https://github.com/oneryalcin/langchain


import json
import os
import logging
from typing import List, Generator

## kafkcat_hints.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                oneryalcin
                / kafkcat_hints.md
            
            
              Last active
              November 16, 2022 14:57
            
              
                Kafkacat notes
              
          
Note: Assuming you add --bootstrap-server  to all of them

Select few keys from stream and convert it to CSV using jq

> kcat -C -t <TOPIC> -e -o -beginning  |   jq -r '. | {id: .id, name: .name, tombstone: .tombstone} | [.[]] | @csv' > prod_companies.csv

Reset the consumer to beginning of topic (Make sure consumer is not stable at that point)


## highcharter.R
library(highcharter)
library(dplyr)

mapdata <- get_data_from_map(download_map_data("countries/gb/gb-all"))

set.seed(1234)

data_fake <- mapdata %>%
    select(code = `hc-a2`) %>%
    mutate(value = 1e5 * abs(rt(nrow(.), df = 10)))

## coranavirus_reading.R
df <- read_csv('https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv')
df <- df %>%  rename(cases=`Daily lab-confirmed cases`, day =`Specimen date`, area_name='Area name')
reading = df %>%  filter(area_name=='Reading') %>%  filter(`Area type` == "Lower tier local authority")
g <- reading %>%  filter(day > ymd("2020-06-01")) %>%  ggplot(aes(x=day, y=cases)) + geom_col()

## covidtest.py
import random
from collections import Counter

class Person:

    def __init__(self, issick):
        self.issick = issick

    def __repr__(self):
        if self.issick:

## tokenize.py
import re
import nltk
nltk.download(['punkt', 'stopwords', 'wordnet'])

from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def tokenize(text):
    """

## sparkify_13_cross_validation.py
# Our Grid Search object allows us to define various hyperparameters to test our model
params = ParamGridBuilder()\
    .addGrid(classifier.maxDepth, [2, 5, 10])\
    .addGrid(classifier.featureSubsetStrategy, ['all', 'onethird', 'sqrt', 'log2'])\
    .build()

# Define the evaluator, this will measure teh success of model(s)
evaluator = binary_evaluator = BinaryClassificationEvaluator(labelCol='churned')

# CrossValidator will build pipeline, create models based on ParamGridBuilder,

## sparkify_12_pipeline_stages.py
# Keep a list for Pipeline stages
stages = []

# Convert categorical variables to indexes
indexers = [StringIndexer(inputCol=column, outputCol=column+"_idx").fit(j) for column in ['level', 'gender']]

# Convert indexes to OnHotEncoded Sparse Vectors
onehotencoder = OneHotEncoderEstimator(inputCols=['gender_idx', 'level_idx'],
                                       outputCols=['gender_dummy','level_dummy'])

## sparkify_11_vector_assembler.py
joined_vector = VectorAssembler(inputCols=['gender_dummy', 'level_dummy', 'logSessionCount',
                                           'sqrtMeanSongCount', 'sqrtSessionsFreqDay'],
                                outputCol='nonScaledFeatures')\
                                .transform(joined)

joined_vector = joined_vector.withColumn('label', joined_vector.churned.cast('integer'))

joined_vector.drop('userId','level','gender', 'sessionCount', 'meanSongCount',
                   'sessionsFreqDay', 'gender_idx', 'level_idx', 'churned').show(4)
	from langchain.chat_models import ChatOpenAI
	from kor import create_extraction_chain, Object, Text


	text = """
	PELOTON APPOINTS DALANA BRAND AS CHIEF PEOPLE OFFICER
	PDF Version
	People Leader Completes Company's Lead Team

	NEW YORK, March 1, 2023 /PRNewswire/ -- Peloton (NASDAQ: PTON), the leading connected fitness platform, today announced the appointment of Dalana Brand as Peloton's Chief People Officer (CPO), effective March 13, 2023. As a seasoned executive with significant global leadership experience in multiple industries, Brand joins the team with a strong reputation for organizational transformation. She will report to CEO Barry McCarthy and serve as a member of the leadership team, leading the company's Global People Team.
	# I couldn't get return generators from chains so I had to do a bit of low level SSE, Hope this is useful
	# Probably you'll use another Vector Store instead of OpenSearch, but if you want to mimic what I did here,
	# please use the fork of `OpenSearchVectorSearch` in https://github.com/oneryalcin/langchain


	import json
	import os
	import logging
	from typing import List, Generator
	library(highcharter)
	library(dplyr)

	mapdata <- get_data_from_map(download_map_data("countries/gb/gb-all"))

	set.seed(1234)

	data_fake <- mapdata %>%
	select(code = `hc-a2`) %>%
	mutate(value = 1e5 * abs(rt(nrow(.), df = 10)))
	df <- read_csv('https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv')
	df <- df %>% rename(cases=`Daily lab-confirmed cases`, day =`Specimen date`, area_name='Area name')
	reading = df %>% filter(area_name=='Reading') %>% filter(`Area type` == "Lower tier local authority")
	g <- reading %>% filter(day > ymd("2020-06-01")) %>% ggplot(aes(x=day, y=cases)) + geom_col()
	import random
	from collections import Counter

	class Person:

	def __init__(self, issick):
	self.issick = issick

	def __repr__(self):
	if self.issick:
	import re
	import nltk
	nltk.download(['punkt', 'stopwords', 'wordnet'])

	from nltk.stem import WordNetLemmatizer
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	def tokenize(text):
	"""
	# Our Grid Search object allows us to define various hyperparameters to test our model
	params = ParamGridBuilder()\
	.addGrid(classifier.maxDepth, [2, 5, 10])\
	.addGrid(classifier.featureSubsetStrategy, ['all', 'onethird', 'sqrt', 'log2'])\
	.build()

	# Define the evaluator, this will measure teh success of model(s)
	evaluator = binary_evaluator = BinaryClassificationEvaluator(labelCol='churned')

	# CrossValidator will build pipeline, create models based on ParamGridBuilder,
	# Keep a list for Pipeline stages
	stages = []

	# Convert categorical variables to indexes
	indexers = [StringIndexer(inputCol=column, outputCol=column+"_idx").fit(j) for column in ['level', 'gender']]

	# Convert indexes to OnHotEncoded Sparse Vectors
	onehotencoder = OneHotEncoderEstimator(inputCols=['gender_idx', 'level_idx'],
	outputCols=['gender_dummy','level_dummy'])
	joined_vector = VectorAssembler(inputCols=['gender_dummy', 'level_dummy', 'logSessionCount',
	'sqrtMeanSongCount', 'sqrtSessionsFreqDay'],
	outputCol='nonScaledFeatures')\
	.transform(joined)

	joined_vector = joined_vector.withColumn('label', joined_vector.churned.cast('integer'))

	joined_vector.drop('userId','level','gender', 'sessionCount', 'meanSongCount',
	'sessionsFreqDay', 'gender_idx', 'level_idx', 'churned').show(4)