Skip to content

Instantly share code, notes, and snippets.

@nickjevershed
Created January 14, 2024 23:13
Show Gist options
  • Save nickjevershed/757cd8b8b615654137f1c5ecd379ecc3 to your computer and use it in GitHub Desktop.
Save nickjevershed/757cd8b8b615654137f1c5ecd379ecc3 to your computer and use it in GitHub Desktop.
#%%
import scraperwiki
import re
import requests
import json
import pandas as pd
import time
#%%
queryString = "* from comments"
queryResult = scraperwiki.sqlite.select(queryString)
results = []
llama_url = 'http://localhost:11434/api/generate'
for row in queryResult:
start = time.time()
comment = row['comment']
query = f'In the following text someone will specify their favourite word. Please return the favourite word from their text in the following format - "Favourite word: Word" - with no other words in your response. Here is the text: {comment}'
print(query)
data = {
"model": "llama2:13b",
"prompt": query,
"stream": False
}
r = requests.post(llama_url, data=json.dumps(data))
word = r.json()['response']
print(word)
row = {"word":word, "count":1, "comment":row['comment']}
results.append(row)
end = time.time()
print(end - start)
#%%
df = pd.DataFrame(results)
#%%
df.to_csv("llama-words.csv")
#%%
df['word'] = df['word'].str.strip()
#%%
#%%
df['word2'] = df['word'].str.split("Favourite word: ").str[-1]
#%%
df['word2'] = df['word2'].str.replace("#", "")
#%%
df['word2'] = df['word2'].str.lower()
#%%
sum_df = df[df['word_check'] != False]
blah = df[df['word_check'] == False]
summary = sum_df.groupby('word2').count()
#%%
summary.to_csv('summary.csv')
#%%
def checkWord(row):
if row['word2'] in row['comment'].lower():
return True
else:
return False
df['word_check'] = df.apply(checkWord, axis=1)
#%%
df.to_csv("llama-words2.csv")
#%%
#%%
dg = df.groupby(['word']).count()
#%%
dg.to_csv('words.csv')
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment