feat: add delay for same website requests

This commit is contained in:
Moritz Böhme 2025-09-04 08:03:03 +02:00
parent 87a001f0cc
commit d971927b4d
No known key found for this signature in database
GPG key ID: 970C6E89EB0547A9
3 changed files with 57 additions and 20 deletions

16
Cargo.lock generated
View file

@ -164,6 +164,7 @@ name = "complete-rss"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"itertools",
"llm_readability", "llm_readability",
"reqwest", "reqwest",
"rss", "rss",
@ -287,6 +288,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.35" version = "0.8.35"
@ -743,6 +750,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.15" version = "1.0.15"

View file

@ -5,6 +5,7 @@ edition = "2024"
[dependencies] [dependencies]
anyhow = "1.0.99" anyhow = "1.0.99"
itertools = "0.14.0"
llm_readability = "0.0.11" llm_readability = "0.0.11"
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
rss = "2.0.12" rss = "2.0.12"

View file

@ -1,4 +1,7 @@
use std::time::Duration;
use anyhow::Result; use anyhow::Result;
use itertools::Itertools;
use llm_readability::extractor; use llm_readability::extractor;
use reqwest::{Client, Url}; use reqwest::{Client, Url};
use rss::Channel; use rss::Channel;
@ -12,38 +15,55 @@ async fn get_feed(url: String, client: &Client) -> Result<Channel> {
Ok(channel) Ok(channel)
} }
fn get_domain(item: &rss::Item) -> Option<String> {
item.link().map(|link| {
Url::parse(link)
.ok()
.map(|parsed| parsed.domain().map(|domain| domain.to_string()))
}).flatten()?
}
async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> { async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> {
let items: Vec<rss::Item> = channel.items().into_iter().cloned().collect(); let grouped: Vec<Vec<rss::Item>> = channel
.items()
.into_iter()
.chunk_by(|item| get_domain(*item))
.into_iter()
.map(|(_k, v)| v.cloned().collect())
.collect();
let mut set = JoinSet::new(); let mut set = JoinSet::new();
for mut item in items { for items in grouped.into_iter() {
set.spawn({ let client = client.clone();
let client = client.clone(); set.spawn(async move {
async move { let mut new_items = vec![];
if let Some(link) = item.link.clone() { let mut wait_time = Duration::from_secs(0);
if let Ok(content) = get_content(link, &client.clone()).await { for item in items {
item.set_description(content); tokio::time::sleep(wait_time).await;
let mut new_item: rss::Item = item.clone().to_owned();
if let Some(link) = item.link() {
if let Ok(content) = get_content(link, &client.clone()).await {
new_item.set_description(content);
};
}; };
new_items.push(new_item);
wait_time = Duration::from_secs(1);
} }
item new_items
}
}); });
} }
let updated_items = set.join_all().await; let items: Vec<rss::Item> = set.join_all().await.concat();
let mut new_channel = channel.clone(); let mut new_channel = channel.clone();
new_channel.set_items(updated_items); new_channel.set_items(items);
Ok(Box::new(new_channel)) Ok(Box::new(new_channel))
} }
async fn get_content(link: String, client: &Client) -> Result<String> { async fn get_content(link: &str, client: &Client) -> Result<String> {
let response = client.get(&link).send().await?; let response = client.get(link).send().await?;
let content = extractor::extract( let content =
&mut response.bytes().await?.as_ref(), extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content;
&Url::parse(link.as_str())?,
)?
.content;
Ok(content) Ok(content)
} }
@ -69,6 +89,6 @@ async fn main() {
Ok::<String, warp::Rejection>(format!("{}", updated)) Ok::<String, warp::Rejection>(format!("{}", updated))
} }
}) })
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml")); .map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
warp::serve(path).run(([127, 0, 0, 1], 3030)).await; warp::serve(path).run(([127, 0, 0, 1], 3030)).await;
} }