Last active
January 26, 2025 16:19
curbalertbot - filters free stuff by location and tweets new items. https://twitter.com/CurbAlertBot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# curbalert | |
# jeffehobbs@gmail.com // last revision Jan 2025 | |
import asyncio | |
import tweepy, requests, configparser, json, os, shutil, hashlib, time | |
from termcolor import colored, cprint | |
from pyppeteer import launch | |
from mastodon import Mastodon | |
from atproto import Client, client_utils | |
# globals | |
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) | |
URL = 'https://westernmass.craigslist.org/search/zip?hasPic=1#search=1~list~0~0' # base index URL to scrape | |
LOCATIONS = ['amherst','ashfield','easthampton','conway','deerfield','florence','goshen','greenfield','hatfield','holyoke','leeds','montague','northampton','shelburne','south deerfield','southampton','sunderland','west hatfield','whately','williamsburg'] | |
HIGH_ALERTS = ['bookshelf','bookshelves', 'bookcase', 'bookcases', 'bricks','laptop','monitor','computer'] | |
DEBUG = False | |
# set up API keys from external config apikeys.txt file | |
config = configparser.ConfigParser() | |
config.read(SCRIPT_PATH +'/apikeys.txt') | |
TWITTER_CONSUMER_KEY = config.get('twitter', 'consumer_key') | |
TWITTER_CONSUMER_SECRET = config.get('twitter', 'consumer_secret') | |
TWITTER_ACCESS_TOKEN = config.get('twitter', 'access_token') | |
TWITTER_ACCESS_TOKEN_SECRET = config.get('twitter', 'access_token_secret') | |
PUSHOVER_USER_KEY = config.get('pushover', 'user_key') | |
PUSHOVER_APP_TOKEN = config.get('pushover', 'app_token') | |
MASTODON_ACCESS_TOKEN = config.get('mastodon', 'access_token') | |
BLUESKY_USERNAME = config.get('bluesky', 'username') | |
BLUESKY_PASSWORD = config.get('bluesky', 'password') | |
# get content of index | |
async def get_CL_index(url): | |
print("getting posts...") | |
data = [] | |
browser = await launch({ | |
'executablePath':'/usr/bin/chromium' | |
}) | |
page = await browser.newPage() | |
await page.goto(url,{ | |
'waitUntil': 'networkidle0'} | |
) | |
posts = await page.querySelectorAll('.cl-search-result') | |
for index, element in enumerate(posts): | |
meta_el = await element.querySelector('.meta') | |
href_el = await element.querySelector('.cl-app-anchor') | |
try: | |
supertitle_el = await element.querySelector('.supertitle') | |
supertitle = await page.evaluate('(supertitle_el) => supertitle_el.textContent', supertitle_el) | |
except: | |
continue | |
title = await page.evaluate('(element) => element.title', element) | |
meta = await page.evaluate('(meta_el) => meta_el.textContent', meta_el) | |
href = await page.evaluate('(href_el) => href_el.href', href_el) | |
post_id = hashlib.md5(str(href).encode('utf-8')).hexdigest() | |
location = supertitle.split('·')[0].lower().replace('(','').replace(')','').replace(', ma','') | |
data.append({'title': title.strip(), 'url': href, 'id': post_id, 'location': location}) | |
return(data) | |
# get content of post | |
async def get_CL_article(url): | |
print("getting data for " + url + "...") | |
data = [] | |
browser = await launch({ | |
'executablePath':'/usr/bin/chromium' | |
}) | |
page = await browser.newPage() | |
await page.goto(url,{ | |
'waitUntil': 'networkidle0'} | |
) | |
content = await page.querySelectorAll('[id*="postingbody"]') | |
for index, chunk in enumerate(content): | |
text = await chunk.getProperty("textContent") | |
post_text = await text.jsonValue() | |
text_chunks = post_text.splitlines() | |
full_content = '' | |
for text_chunk in text_chunks: | |
if (not text_chunk.isspace()) and ("QR Code" not in text_chunk): | |
full_content = full_content + str(text_chunk) | |
og_image = await page.querySelector('meta[property="og:image"]') | |
og_image_content = await page.evaluate('(element) => element.content', og_image) | |
status_icons = '' | |
# high alert parsing | |
for item in HIGH_ALERTS: | |
if item.lower() in full_content.lower(): | |
status_icons = '🚨 ' | |
else: | |
pass | |
# map address parsing | |
element = await page.querySelector('div.mapaddress') | |
if(element): | |
address = await page.evaluate('(element) => element.textContent', element) | |
status_icons = '🌐 ' | |
else: | |
pass | |
element = await page.querySelector('p.mapaddress a') | |
if(element): | |
map_address = await page.evaluate('(element) => element.href', element) | |
status_icons = status_icons + '📍 ' | |
else: | |
pass | |
# end map address parsing | |
return(full_content, og_image_content, status_icons) | |
# save image locally | |
def save_image(url, id): | |
file_path = SCRIPT_PATH + '/images/' + id + '.jpg' | |
r = requests.get(url) | |
open(file_path, 'wb').write(r.content) | |
# tweet that stuff | |
def send_tweet(status, image_file_path, url): | |
media_ids = [] | |
if (len(status) > 256): | |
status = status[:240] + "..." | |
tweet = status + ' ' + url | |
client = tweepy.Client(consumer_key=TWITTER_CONSUMER_KEY, | |
consumer_secret=TWITTER_CONSUMER_SECRET, | |
access_token=TWITTER_ACCESS_TOKEN, | |
access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) | |
auth = tweepy.OAuth1UserHandler( | |
TWITTER_CONSUMER_KEY, | |
TWITTER_CONSUMER_SECRET, | |
TWITTER_ACCESS_TOKEN, | |
TWITTER_ACCESS_TOKEN_SECRET, | |
) | |
api = tweepy.API(auth) | |
media_upload_response = api.media_upload(image_file_path) | |
media_ids.append(media_upload_response.media_id) | |
response = client.create_tweet(text=tweet, user_auth=True, media_ids=media_ids) | |
return | |
# send a pushover push | |
def send_pushover(status, image_file_path, url): | |
r = requests.post("https://api.pushover.net/1/messages.json", data = { | |
"token": PUSHOVER_APP_TOKEN, | |
"user": PUSHOVER_USER_KEY, | |
"message": status, | |
"url": url, | |
"url_title": "More info", | |
"priority": "1" | |
}, | |
files = { | |
"attachment": ("image.jpg", open(image_file_path, "rb"), "image/jpeg") | |
}) | |
print(r.text) | |
return | |
# send it to mastodon, why not | |
def send_mastodon(status, image_file_path, url): | |
post = status + ' ' + url | |
mastodon = Mastodon( | |
access_token = MASTODON_ACCESS_TOKEN, | |
api_base_url = 'https://mastodon.social/' | |
) | |
media = mastodon.media_post(image_file_path, description=status) | |
mastodon.status_post(post, media_ids=media) | |
return | |
def send_bluesky(status, image_file_path, url): | |
client = Client() | |
client.login(BLUESKY_USERNAME, BLUESKY_PASSWORD) | |
with open(image_file_path, 'rb') as f: | |
img_data = f.read() | |
status = status + ' ' | |
text = client_utils.TextBuilder().text(status).link('More info', url) | |
client.send_image(text=text, image=img_data, image_alt=status) | |
return | |
# main logic | |
def main(): | |
if(DEBUG): | |
print('starting in debug mode...') | |
else: | |
print('starting in production mode...') | |
posts = asyncio.get_event_loop().run_until_complete(get_CL_index(URL)) | |
for post in posts: | |
if post['location'] in LOCATIONS: | |
image_file_path = SCRIPT_PATH + '/images/' + post['id'] + '.jpg' | |
file_exists = os.path.isfile(image_file_path) | |
if(DEBUG): | |
post['content'], post['image_url'], post['status_icons'] = asyncio.get_event_loop().run_until_complete(get_CL_article(post['url'])) | |
cprint(post['status_icons'] + post['location'].upper() + ': ' + post['title'] + ' // debug mode, no posting','blue','on_white') | |
continue | |
if not file_exists: | |
post['content'], post['image_url'], post['status_icons'] = asyncio.get_event_loop().run_until_complete(get_CL_article(post['url'])) | |
save_image(post['image_url'], post['id']) | |
print('posting:') | |
print(json.dumps(post, indent=4)) | |
if '🚨' in post['status_icons']: | |
try: | |
send_pushover(post['status_icons'] + post['location'].upper() + ': ' + post['title'] + '. ' + post['content'], image_file_path, post['url']) | |
print('SENT PUSHOVER...') | |
# give myself a 10-minute advantage :) | |
time.sleep(600) | |
except: | |
print('PUSHOVER FAILED...') | |
try: | |
send_tweet(post['status_icons'] + post['location'].upper() + ': ' + post['title'] + '. ' + post['content'], image_file_path, post['url']) | |
print('SENT TWEET...') | |
except: | |
print('TWEET FAILED...') | |
try: | |
send_mastodon(post['status_icons'] + post['location'].upper() + ': ' + post['title'] + '. ' + post['content'], image_file_path, post['url']) | |
print('SENT MASTODON...') | |
except: | |
print('MASTODON FAILED...') | |
try: | |
send_bluesky(post['status_icons'] + post['location'].upper() + ': ' + post['title'] + '. ' + post['content'], image_file_path, post['url']) | |
print('SENT BLUESKY...') | |
except: | |
print('BLUESKY FAILED...') | |
print('...DONE.') | |
else: | |
cprint(post['location'].upper() + ': ' + post['title'] + ' // already posted', 'red', 'on_green') | |
else: | |
cprint(post['location'].upper() + ': ' + post['title'] + ' // out of range','green','on_red') | |
pass | |
exit() | |
if __name__ == '__main__': | |
main() | |
#fin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment