Created
August 16, 2023 15:48
-
-
Save kika/64141272c2b03728e08c7a64a43af204 to your computer and use it in GitHub Desktop.
Classificados Online Email Notifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use select::document::Document; | |
use select::predicate::Class; | |
use serde_json::json; | |
use worker::*; | |
mod utils; | |
struct ParseError(String); | |
impl std::fmt::Display for ParseError { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
f.write_str(&self.0)?; | |
Ok(()) | |
} | |
} | |
impl std::fmt::Debug for ParseError { | |
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
f.debug_tuple("ParseError").field(&self.0).finish() | |
} | |
} | |
impl std::error::Error for ParseError {} | |
impl std::convert::From<String> for ParseError { | |
fn from(value: String) -> Self { | |
Self(value) | |
} | |
} | |
impl std::convert::From<&str> for ParseError { | |
fn from(value: &str) -> Self { | |
Self(value.to_string()) | |
} | |
} | |
fn parse_html( | |
html: &[u8], | |
) -> std::result::Result<Vec<u64>, Box<dyn std::error::Error>> { | |
let doc = Document::from_read(html)?; | |
let links = doc.find(Class("Tahoma17Blacknounder")); | |
let mut ids = Vec::new(); | |
for link in links { | |
let link = link.attr("href").ok_or("No href attribute")?; | |
let id = link | |
.split("=") | |
.nth(1) | |
.ok_or(format!("No = in URL {}", link))?; | |
println!("https://www.clasificadosonline.com{}", link); | |
let id = id.parse::<u64>()?; | |
ids.push(id); | |
} | |
Ok(ids) | |
} | |
fn diff_sorted_vecs(a: &Vec<u64>, b: &Vec<u64>) -> (Vec<u64>, Vec<u64>) { | |
let mut missing = Vec::new(); | |
let mut new = Vec::new(); | |
let mut i = 0; | |
let mut j = 0; | |
while i < a.len() && j < b.len() { | |
if a[i] < b[j] { | |
missing.push(a[i]); | |
i += 1; | |
} else if a[i] > b[j] { | |
new.push(b[j]); | |
j += 1; | |
} else { | |
i += 1; | |
j += 1; | |
} | |
} | |
while i < a.len() { | |
missing.push(a[i]); | |
i += 1; | |
} | |
while j < b.len() { | |
new.push(b[j]); | |
j += 1; | |
} | |
(missing, new) | |
} | |
fn print_ids(ids: &Vec<u64>) -> String { | |
ids | |
.iter() | |
.map(|id| { | |
format!( | |
"https://www.clasificadosonline.com/UDRealEstateDetail.asp?ID={}", | |
id | |
) | |
}) | |
.collect::<Vec<String>>() | |
.join("\n") | |
} | |
async fn send_email( | |
missing: &Vec<u64>, | |
new: &Vec<u64>, | |
) -> std::result::Result<(), Box<dyn std::error::Error>> { | |
let text = (vec![ | |
"These listings are gone: ".to_string(), | |
print_ids(missing), | |
"These listings are new: ".to_string(), | |
print_ids(new), | |
]) | |
.join("\n"); | |
let body = json!({ | |
"personalizations": [ | |
{"to": [ | |
{"email": "kika@kikap.com", "name": "Kirill Pertsev"}, | |
{"email": "wife@example.com", "name": "Wife"}, | |
]} | |
], | |
"from": {"email": "kika@kikap.com", "name": "Kirill Pertsev"}, | |
"subject": "Classificados", | |
"content": [{ | |
"type": "text/plain", | |
"value": text | |
}], | |
}); | |
let res = reqwest::Client::new() | |
.post("https://api.mailchannels.net/tx/v1/send") | |
.json(&body) | |
.send() | |
.await?; | |
console_debug!("Sent email status: {} {}", res.status(), res.text().await?); | |
Ok(()) | |
} | |
async fn write_kv( | |
kv: &kv::KvStore, | |
ids: &Vec<u64>, | |
) -> std::result::Result<(), Box<dyn std::error::Error>> { | |
let ids_json = serde_json::to_string(&ids)?; | |
let pob = kv.put("latest", ids_json)?; | |
pob.execute().await?; | |
Ok(()) | |
} | |
#[event(scheduled)] | |
pub async fn scheduled(_ev: ScheduledEvent, env: Env, _ctx: ScheduleContext) { | |
// Optionally, get more helpful error messages written to the console in the case of a panic. | |
utils::set_panic_hook(); | |
let url = match env.var("URL") { | |
Ok(url) => url.to_string(), | |
Err(_) => { | |
console_log!("URL variable not found"); | |
return; | |
} | |
}; | |
let kv = match env.kv("ids") { | |
Ok(kv) => kv, | |
Err(e) => { | |
console_log!("KV store error: {}", e); | |
return; | |
} | |
}; | |
let html = reqwest::get(url).await.unwrap().text().await.unwrap(); | |
let mut ids = parse_html(html.as_bytes()).unwrap_or_else(|err| { | |
console_error!("Parse HTML error: {}", err); | |
vec![] | |
}); | |
ids.sort(); | |
console_debug!("IDs parsed from HTML: {:?}", ids); | |
let mut kv_ids: Vec<u64> = match kv.get("latest").json().await { | |
Ok(Some(ids)) => ids, | |
Ok(None) => vec![], | |
Err(e) => { | |
console_error!("KV error: {}", e); | |
vec![] | |
} | |
}; | |
kv_ids.sort(); | |
console_debug!("IDs read from KV: {:?}", kv_ids); | |
let (missing, new) = diff_sorted_vecs(&kv_ids, &ids); | |
console_debug!("Missing IDs: {:?}", missing); | |
console_debug!("New IDs: {:?}", new); | |
if missing.len() != 0 || new.len() != 0 { | |
if let Err(e) = send_email(&missing, &new).await { | |
console_error!("Error sending email: {}", e); | |
} | |
if let Err(e) = write_kv(&kv, &ids).await { | |
console_error!("Failed to write KV: {}", e); | |
} | |
console_debug!("Email sent and KV written"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment