refactor: change readablity lib

This commit is contained in:
Moritz Böhme 2025-10-03 12:36:43 +02:00
parent c242ca8cc6
commit 0424544abf
No known key found for this signature in database
GPG key ID: 970C6E89EB0547A9
3 changed files with 607 additions and 141 deletions

739
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -6,7 +6,7 @@ edition = "2024"
[dependencies]
anyhow = "1.0.99"
itertools = "0.14.0"
llm_readability = "0.0.11"
readability-rust = "0.1.0"
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
rss = "2.0.12"
tokio = { version = "1.47.1", features = ["full"] }

View file

@ -1,8 +1,7 @@
use std::time::Duration;
use anyhow::Result;
use anyhow::{Context, Result};
use itertools::Itertools;
use llm_readability::extractor;
use reqwest::{Client, Url};
use rss::Channel;
use tokio::{task::JoinSet, time::sleep};
@ -59,8 +58,8 @@ async fn complete(mut channel: Channel, client: &Client) -> Result<Box<Channel>>
async fn get_content(link: &str, client: &Client) -> Result<String> {
let response = client.get(link).send().await?;
let content =
extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content;
let mut readablity = readability_rust::Readability::new(response.text().await?.as_ref(), None)?;
let content = readablity.parse().context("readablity parse error")?.content.context("readablity no content")?;
Ok(content)
}