Skip to main content

max / balanced_breakfast

563 B · 17 lines History Blame Raw
1 // Reader-view extractor. Not a feed plugin — provides the extract(url)
2 // function for article content. Uses the extract_article host function
3 // (readability algorithm). Rewrites arXiv abstract URLs to ar5iv HTML.
4
5 fn extract(url) {
6 let fetch_url = url;
7
8 // arXiv: use ar5iv for rendered HTML instead of the abstract page
9 if str_contains(url, "arxiv.org/abs/") {
10 fetch_url = str_replace(url, "arxiv.org/abs/", "ar5iv.labs.arxiv.org/html/");
11 }
12
13 let html = http_get(fetch_url);
14 let article = extract_article(html);
15 article
16 }
17