From d971927b4db8a41bc469599f4a531456c88067b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20B=C3=B6hme?= Date: Thu, 4 Sep 2025 08:03:03 +0200 Subject: [PATCH] feat: add delay for same website requests --- Cargo.lock | 16 ++++++++++++++ Cargo.toml | 1 + src/main.rs | 60 +++++++++++++++++++++++++++++++++++------------------ 3 files changed, 57 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2176b0e..56353e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -164,6 +164,7 @@ name = "complete-rss" version = "0.1.0" dependencies = [ "anyhow", + "itertools", "llm_readability", "reqwest", "rss", @@ -287,6 +288,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -743,6 +750,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" diff --git a/Cargo.toml b/Cargo.toml index 77d6fd5..234c696 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] anyhow = "1.0.99" +itertools = "0.14.0" llm_readability = "0.0.11" reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false } rss = "2.0.12" diff --git a/src/main.rs b/src/main.rs index a3b9691..31ce77e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,7 @@ +use std::time::Duration; + use anyhow::Result; +use itertools::Itertools; use llm_readability::extractor; use reqwest::{Client, Url}; use rss::Channel; @@ -12,38 +15,55 @@ async fn get_feed(url: String, client: &Client) -> Result { Ok(channel) } +fn get_domain(item: &rss::Item) -> Option { + item.link().map(|link| { + Url::parse(link) + .ok() + .map(|parsed| parsed.domain().map(|domain| domain.to_string())) + }).flatten()? +} + async fn complete(channel: Channel, client: &Client) -> Result> { - let items: Vec = channel.items().into_iter().cloned().collect(); + let grouped: Vec> = channel + .items() + .into_iter() + .chunk_by(|item| get_domain(*item)) + .into_iter() + .map(|(_k, v)| v.cloned().collect()) + .collect(); let mut set = JoinSet::new(); - for mut item in items { - set.spawn({ - let client = client.clone(); - async move { - if let Some(link) = item.link.clone() { - if let Ok(content) = get_content(link, &client.clone()).await { - item.set_description(content); + for items in grouped.into_iter() { + let client = client.clone(); + set.spawn(async move { + let mut new_items = vec![]; + let mut wait_time = Duration::from_secs(0); + for item in items { + tokio::time::sleep(wait_time).await; + let mut new_item: rss::Item = item.clone().to_owned(); + if let Some(link) = item.link() { + if let Ok(content) = get_content(link, &client.clone()).await { + new_item.set_description(content); + }; }; + new_items.push(new_item); + wait_time = Duration::from_secs(1); } - item - } + new_items }); } - let updated_items = set.join_all().await; + let items: Vec = set.join_all().await.concat(); let mut new_channel = channel.clone(); - new_channel.set_items(updated_items); + new_channel.set_items(items); Ok(Box::new(new_channel)) } -async fn get_content(link: String, client: &Client) -> Result { - let response = client.get(&link).send().await?; - let content = extractor::extract( - &mut response.bytes().await?.as_ref(), - &Url::parse(link.as_str())?, - )? - .content; +async fn get_content(link: &str, client: &Client) -> Result { + let response = client.get(link).send().await?; + let content = + extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content; Ok(content) } @@ -69,6 +89,6 @@ async fn main() { Ok::(format!("{}", updated)) } }) - .map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml")); + .map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml")); warp::serve(path).run(([127, 0, 0, 1], 3030)).await; }