Skip to content

Instantly share code, notes, and snippets.

@shawngraham
Last active December 8, 2023 01:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shawngraham/613e70878d5e0c6603824fea45bf02d9 to your computer and use it in GitHub Desktop.
Save shawngraham/613e70878d5e0c6603824fea45bf02d9 to your computer and use it in GitHub Desktop.
a little tkinter app that gets headlines from newsapi, and if you want, will go to the url, read the article, and format output as knowledgraph triples. But don't forget to export results.
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk, filedialog
import pandas as pd
from newsapi import NewsApiClient
import llm
import requests
from strip_tags import strip_tags
model = llm.get_model("orca-mini-3b-gguf2-q4_0") #local model through llm plugin llm-gpt4all.
#model = llm.get_model("4t") #or use gpt modesl
#model.key = 'model api key here' #for which you'll need a key
# Initialize the NewsApiClient with a placeholder key
newsapi = NewsApiClient(api_key='api here')
# Add a global variable to hold the articles data
articles_data = None # This will be populated with the articles data
#function to process text with llm
def llm_processing(content):
# Process the content and return the result
try:
# prompt
prompt_text = f"You are a knowledge graph assistant. Extract entities and predicates from the provided text:\n\n{content}\n\nReturn ONLY the triples formatted for cvs: entity1,predicate,entity2."
# Generate a response from the model
response = model.prompt(prompt_text)
return response
except Exception as e:
# Handle any exceptions that occur during processing
print(f"An error occurred: {e}")
return None
# Function to get the news data
def get_news():
global articles_data # Declare articles_data as global to modify it
query = query_entry.get()
all_stories = newsapi.get_everything(q=query, language='en')
articles = all_stories.get('articles', [])
# Construct a data dictionary for DataFrame initialization
data = {'title': [], 'content': [], 'url': [], 'llm': []}
for article in articles:
# Use the get method to avoid KeyError and provide a default value if the key is missing
title = article.get('title', 'No Title Available')
content = article.get('content', 'No Content Available')
url = article.get('url', 'No Url Available')
# Append the results to the data dictionary (llm is empty for now)
data['title'].append(title)
data['content'].append(content)
data['url'].append(url)
data['llm'].append('') # Placeholder for llm result
# Store the articles data for later processing
articles_data = pd.DataFrame(data)
# Show the DataFrame with empty llm column
show_dataframe(articles_data)
# Enable the process button
process_button.config(state='normal')
# Function to retrieve HTML and process it
def process_article_url(url):
try:
response = requests.get(url)
response.raise_for_status() # Raise an error for bad status
# Use strip_tags to clean up the HTML
html_content = strip_tags(
response.text,
["div"], # Exclude <div> tags
minify=True, # Minify the html to remove extra spaces and new lines
keep_tags=["h1"] # Keep <h1> tags
)
# Once we get the cleaned text, we can process it through llm_processing
return llm_processing(html_content)
except requests.RequestException as e:
print(f"An error occurred while fetching the article: {e}")
return None
# Function to process news data with llm ; which works, but 'content' isn't very much
def process_news_with_llm():
global articles_data
if articles_data is not None:
# Update the 'llm' column by processing each url with process_article_url
articles_data['llm'] = articles_data['url'].apply(process_article_url)
# Update the displayed DataFrame
show_dataframe(articles_data)
# Enable the export button and set the command to export the df to a file
export_button.config(state='normal', command=lambda: export_to_file(articles_data))
# Function to export the dataframe to a file
def export_to_file(df):
filename = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv"), ("All files", "*.*")])
if filename:
df.to_csv(filename, sep='|', index=False)
# Set up the tkinter window
root = tk.Tk()
root.title('News API Interface')
# Add the entry widget for queries
query_entry = tk.Entry(root, width=50)
query_entry.pack()
# Add the get news button
get_news_button = tk.Button(root, text='Get Headlines', command=get_news)
get_news_button.pack()
# Add the process button to trigger llm processing
process_button = tk.Button(root, text='Process Articles with LLM', state='disabled', command=process_news_with_llm)
process_button.pack()
# Add the export button
export_button = tk.Button(root, text='Export', state='disabled') # Initially disabled until news is fetched
export_button.pack()
# Create the label widget
top_left_label = tk.Label(root, text="News will load fast, if there is any. LLM will process slowly.", anchor="nw")
# Place the label at the top left using pack
top_left_label.pack(anchor="nw", pady=(5, 0), padx=(5, 0))
def update_treeview(df, treeview):
# Clear current items in the treeview
treeview.delete(*treeview.get_children())
# Add new items to the treeview
for _, row in df.iterrows():
treeview.insert('', 'end', values=list(row))
# Create Treeview widget inside a frame for a scrollbar
tree_frame = tk.Frame(root)
tree_frame.pack()
# Add a scrollbar
tree_scroll = tk.Scrollbar(tree_frame)
tree_scroll.pack(side=tk.RIGHT, fill=tk.Y)
# Define the Treeview
tree = ttk.Treeview(tree_frame, yscrollcommand=tree_scroll.set, selectmode='browse')
tree.pack()
# Configure scrollbar
tree_scroll.config(command=tree.yview)
# Define our columns
tree['columns'] = ('Title', 'Content', 'URL', 'LLM Result')
# Format our columns
for col in tree['columns']:
tree.column(col, anchor='w', width=240)
tree.heading(col, text=col, anchor='w')
# Set the overall width of the Treeview widget
tree_width = 1200 # Calculate the total width based on individual column widths
tree.pack(fill='x', expand=True) # Allow the treeview to expand and fill the x direction of its container
def show_dataframe(df):
# Call the function to update the treeview with the new DataFrame
update_treeview(df, tree)
# Run the application
root.mainloop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment