Note: Assuming you add
--bootstrap-server
to all of them
> kcat -C -t <TOPIC> -e -o -beginning | jq -r '. | {id: .id, name: .name, tombstone: .tombstone} | [.[]] | @csv' > prod_companies.csv
from langchain.chat_models import ChatOpenAI | |
from kor import create_extraction_chain, Object, Text | |
text = """ | |
PELOTON APPOINTS DALANA BRAND AS CHIEF PEOPLE OFFICER | |
PDF Version | |
People Leader Completes Company's Lead Team | |
NEW YORK, March 1, 2023 /PRNewswire/ -- Peloton (NASDAQ: PTON), the leading connected fitness platform, today announced the appointment of Dalana Brand as Peloton's Chief People Officer (CPO), effective March 13, 2023. As a seasoned executive with significant global leadership experience in multiple industries, Brand joins the team with a strong reputation for organizational transformation. She will report to CEO Barry McCarthy and serve as a member of the leadership team, leading the company's Global People Team. |
# I couldn't get return generators from chains so I had to do a bit of low level SSE, Hope this is useful | |
# Probably you'll use another Vector Store instead of OpenSearch, but if you want to mimic what I did here, | |
# please use the fork of `OpenSearchVectorSearch` in https://github.com/oneryalcin/langchain | |
import json | |
import os | |
import logging | |
from typing import List, Generator |
Note: Assuming you add
--bootstrap-server
to all of them
> kcat -C -t <TOPIC> -e -o -beginning | jq -r '. | {id: .id, name: .name, tombstone: .tombstone} | [.[]] | @csv' > prod_companies.csv
library(highcharter) | |
library(dplyr) | |
mapdata <- get_data_from_map(download_map_data("countries/gb/gb-all")) | |
set.seed(1234) | |
data_fake <- mapdata %>% | |
select(code = `hc-a2`) %>% | |
mutate(value = 1e5 * abs(rt(nrow(.), df = 10))) |
import random | |
from collections import Counter | |
class Person: | |
def __init__(self, issick): | |
self.issick = issick | |
def __repr__(self): | |
if self.issick: |
import re | |
import nltk | |
nltk.download(['punkt', 'stopwords', 'wordnet']) | |
from nltk.stem import WordNetLemmatizer | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
def tokenize(text): | |
""" |
# Our Grid Search object allows us to define various hyperparameters to test our model | |
params = ParamGridBuilder()\ | |
.addGrid(classifier.maxDepth, [2, 5, 10])\ | |
.addGrid(classifier.featureSubsetStrategy, ['all', 'onethird', 'sqrt', 'log2'])\ | |
.build() | |
# Define the evaluator, this will measure teh success of model(s) | |
evaluator = binary_evaluator = BinaryClassificationEvaluator(labelCol='churned') | |
# CrossValidator will build pipeline, create models based on ParamGridBuilder, |
# Keep a list for Pipeline stages | |
stages = [] | |
# Convert categorical variables to indexes | |
indexers = [StringIndexer(inputCol=column, outputCol=column+"_idx").fit(j) for column in ['level', 'gender']] | |
# Convert indexes to OnHotEncoded Sparse Vectors | |
onehotencoder = OneHotEncoderEstimator(inputCols=['gender_idx', 'level_idx'], | |
outputCols=['gender_dummy','level_dummy']) |
joined_vector = VectorAssembler(inputCols=['gender_dummy', 'level_dummy', 'logSessionCount', | |
'sqrtMeanSongCount', 'sqrtSessionsFreqDay'], | |
outputCol='nonScaledFeatures')\ | |
.transform(joined) | |
joined_vector = joined_vector.withColumn('label', joined_vector.churned.cast('integer')) | |
joined_vector.drop('userId','level','gender', 'sessionCount', 'meanSongCount', | |
'sessionsFreqDay', 'gender_idx', 'level_idx', 'churned').show(4) |