Skip to content

Instantly share code, notes, and snippets.

@kika
Created August 16, 2023 15:48
Show Gist options
  • Save kika/64141272c2b03728e08c7a64a43af204 to your computer and use it in GitHub Desktop.
Save kika/64141272c2b03728e08c7a64a43af204 to your computer and use it in GitHub Desktop.
Classificados Online Email Notifier
use select::document::Document;
use select::predicate::Class;
use serde_json::json;
use worker::*;
mod utils;
struct ParseError(String);
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)?;
Ok(())
}
}
impl std::fmt::Debug for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ParseError").field(&self.0).finish()
}
}
impl std::error::Error for ParseError {}
impl std::convert::From<String> for ParseError {
fn from(value: String) -> Self {
Self(value)
}
}
impl std::convert::From<&str> for ParseError {
fn from(value: &str) -> Self {
Self(value.to_string())
}
}
fn parse_html(
html: &[u8],
) -> std::result::Result<Vec<u64>, Box<dyn std::error::Error>> {
let doc = Document::from_read(html)?;
let links = doc.find(Class("Tahoma17Blacknounder"));
let mut ids = Vec::new();
for link in links {
let link = link.attr("href").ok_or("No href attribute")?;
let id = link
.split("=")
.nth(1)
.ok_or(format!("No = in URL {}", link))?;
println!("https://www.clasificadosonline.com{}", link);
let id = id.parse::<u64>()?;
ids.push(id);
}
Ok(ids)
}
fn diff_sorted_vecs(a: &Vec<u64>, b: &Vec<u64>) -> (Vec<u64>, Vec<u64>) {
let mut missing = Vec::new();
let mut new = Vec::new();
let mut i = 0;
let mut j = 0;
while i < a.len() && j < b.len() {
if a[i] < b[j] {
missing.push(a[i]);
i += 1;
} else if a[i] > b[j] {
new.push(b[j]);
j += 1;
} else {
i += 1;
j += 1;
}
}
while i < a.len() {
missing.push(a[i]);
i += 1;
}
while j < b.len() {
new.push(b[j]);
j += 1;
}
(missing, new)
}
fn print_ids(ids: &Vec<u64>) -> String {
ids
.iter()
.map(|id| {
format!(
"https://www.clasificadosonline.com/UDRealEstateDetail.asp?ID={}",
id
)
})
.collect::<Vec<String>>()
.join("\n")
}
async fn send_email(
missing: &Vec<u64>,
new: &Vec<u64>,
) -> std::result::Result<(), Box<dyn std::error::Error>> {
let text = (vec![
"These listings are gone: ".to_string(),
print_ids(missing),
"These listings are new: ".to_string(),
print_ids(new),
])
.join("\n");
let body = json!({
"personalizations": [
{"to": [
{"email": "kika@kikap.com", "name": "Kirill Pertsev"},
{"email": "wife@example.com", "name": "Wife"},
]}
],
"from": {"email": "kika@kikap.com", "name": "Kirill Pertsev"},
"subject": "Classificados",
"content": [{
"type": "text/plain",
"value": text
}],
});
let res = reqwest::Client::new()
.post("https://api.mailchannels.net/tx/v1/send")
.json(&body)
.send()
.await?;
console_debug!("Sent email status: {} {}", res.status(), res.text().await?);
Ok(())
}
async fn write_kv(
kv: &kv::KvStore,
ids: &Vec<u64>,
) -> std::result::Result<(), Box<dyn std::error::Error>> {
let ids_json = serde_json::to_string(&ids)?;
let pob = kv.put("latest", ids_json)?;
pob.execute().await?;
Ok(())
}
#[event(scheduled)]
pub async fn scheduled(_ev: ScheduledEvent, env: Env, _ctx: ScheduleContext) {
// Optionally, get more helpful error messages written to the console in the case of a panic.
utils::set_panic_hook();
let url = match env.var("URL") {
Ok(url) => url.to_string(),
Err(_) => {
console_log!("URL variable not found");
return;
}
};
let kv = match env.kv("ids") {
Ok(kv) => kv,
Err(e) => {
console_log!("KV store error: {}", e);
return;
}
};
let html = reqwest::get(url).await.unwrap().text().await.unwrap();
let mut ids = parse_html(html.as_bytes()).unwrap_or_else(|err| {
console_error!("Parse HTML error: {}", err);
vec![]
});
ids.sort();
console_debug!("IDs parsed from HTML: {:?}", ids);
let mut kv_ids: Vec<u64> = match kv.get("latest").json().await {
Ok(Some(ids)) => ids,
Ok(None) => vec![],
Err(e) => {
console_error!("KV error: {}", e);
vec![]
}
};
kv_ids.sort();
console_debug!("IDs read from KV: {:?}", kv_ids);
let (missing, new) = diff_sorted_vecs(&kv_ids, &ids);
console_debug!("Missing IDs: {:?}", missing);
console_debug!("New IDs: {:?}", new);
if missing.len() != 0 || new.len() != 0 {
if let Err(e) = send_email(&missing, &new).await {
console_error!("Error sending email: {}", e);
}
if let Err(e) = write_kv(&kv, &ids).await {
console_error!("Failed to write KV: {}", e);
}
console_debug!("Email sent and KV written");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment