feat: add delay for same website requests
This commit is contained in:
parent
87a001f0cc
commit
d971927b4d
3 changed files with 57 additions and 20 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
|
@ -164,6 +164,7 @@ name = "complete-rss"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"itertools",
|
||||||
"llm_readability",
|
"llm_readability",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"rss",
|
"rss",
|
||||||
|
|
@ -287,6 +288,12 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.35"
|
version = "0.8.35"
|
||||||
|
|
@ -743,6 +750,15 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.15"
|
version = "1.0.15"
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.99"
|
anyhow = "1.0.99"
|
||||||
|
itertools = "0.14.0"
|
||||||
llm_readability = "0.0.11"
|
llm_readability = "0.0.11"
|
||||||
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
|
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
|
||||||
rss = "2.0.12"
|
rss = "2.0.12"
|
||||||
|
|
|
||||||
60
src/main.rs
60
src/main.rs
|
|
@ -1,4 +1,7 @@
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use itertools::Itertools;
|
||||||
use llm_readability::extractor;
|
use llm_readability::extractor;
|
||||||
use reqwest::{Client, Url};
|
use reqwest::{Client, Url};
|
||||||
use rss::Channel;
|
use rss::Channel;
|
||||||
|
|
@ -12,38 +15,55 @@ async fn get_feed(url: String, client: &Client) -> Result<Channel> {
|
||||||
Ok(channel)
|
Ok(channel)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_domain(item: &rss::Item) -> Option<String> {
|
||||||
|
item.link().map(|link| {
|
||||||
|
Url::parse(link)
|
||||||
|
.ok()
|
||||||
|
.map(|parsed| parsed.domain().map(|domain| domain.to_string()))
|
||||||
|
}).flatten()?
|
||||||
|
}
|
||||||
|
|
||||||
async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> {
|
async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> {
|
||||||
let items: Vec<rss::Item> = channel.items().into_iter().cloned().collect();
|
let grouped: Vec<Vec<rss::Item>> = channel
|
||||||
|
.items()
|
||||||
|
.into_iter()
|
||||||
|
.chunk_by(|item| get_domain(*item))
|
||||||
|
.into_iter()
|
||||||
|
.map(|(_k, v)| v.cloned().collect())
|
||||||
|
.collect();
|
||||||
|
|
||||||
let mut set = JoinSet::new();
|
let mut set = JoinSet::new();
|
||||||
for mut item in items {
|
for items in grouped.into_iter() {
|
||||||
set.spawn({
|
let client = client.clone();
|
||||||
let client = client.clone();
|
set.spawn(async move {
|
||||||
async move {
|
let mut new_items = vec![];
|
||||||
if let Some(link) = item.link.clone() {
|
let mut wait_time = Duration::from_secs(0);
|
||||||
if let Ok(content) = get_content(link, &client.clone()).await {
|
for item in items {
|
||||||
item.set_description(content);
|
tokio::time::sleep(wait_time).await;
|
||||||
|
let mut new_item: rss::Item = item.clone().to_owned();
|
||||||
|
if let Some(link) = item.link() {
|
||||||
|
if let Ok(content) = get_content(link, &client.clone()).await {
|
||||||
|
new_item.set_description(content);
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
new_items.push(new_item);
|
||||||
|
wait_time = Duration::from_secs(1);
|
||||||
}
|
}
|
||||||
item
|
new_items
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let updated_items = set.join_all().await;
|
let items: Vec<rss::Item> = set.join_all().await.concat();
|
||||||
|
|
||||||
let mut new_channel = channel.clone();
|
let mut new_channel = channel.clone();
|
||||||
new_channel.set_items(updated_items);
|
new_channel.set_items(items);
|
||||||
Ok(Box::new(new_channel))
|
Ok(Box::new(new_channel))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_content(link: String, client: &Client) -> Result<String> {
|
async fn get_content(link: &str, client: &Client) -> Result<String> {
|
||||||
let response = client.get(&link).send().await?;
|
let response = client.get(link).send().await?;
|
||||||
let content = extractor::extract(
|
let content =
|
||||||
&mut response.bytes().await?.as_ref(),
|
extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content;
|
||||||
&Url::parse(link.as_str())?,
|
|
||||||
)?
|
|
||||||
.content;
|
|
||||||
Ok(content)
|
Ok(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,6 +89,6 @@ async fn main() {
|
||||||
Ok::<String, warp::Rejection>(format!("{}", updated))
|
Ok::<String, warp::Rejection>(format!("{}", updated))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
|
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
|
||||||
warp::serve(path).run(([127, 0, 0, 1], 3030)).await;
|
warp::serve(path).run(([127, 0, 0, 1], 3030)).await;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue