Skip to main content

max / balanced_breakfast

4.7 KB · 184 lines History Blame Raw
1 // arXiv preprint feed. Queries the arXiv Atom API by category (e.g. cs.AI).
2 // Parses results with parse_feed(); provides PDF and ar5iv HTML actions.
3
4 const ARXIV_API = "http://export.arxiv.org/api/query";
5
6 fn id() {
7 "arxiv"
8 }
9
10 fn name() {
11 "arXiv"
12 }
13
14 fn capabilities() {
15 #{
16 supports_pagination: true,
17 supports_date_filter: true
18 }
19 }
20
21 fn config_schema() {
22 #{
23 description: "Fetch papers from arXiv by category. Common categories: cs.AI, cs.LG, cs.CL, cs.CV, stat.ML",
24 fields: [
25 #{
26 key: "category",
27 label: "Category",
28 field_type: "text",
29 required: true,
30 description: "arXiv category (e.g., cs.AI, cs.LG, cs.CL, stat.ML)",
31 placeholder: "cs.AI"
32 },
33 #{
34 key: "max_results",
35 label: "Max Results",
36 field_type: "text",
37 description: "Number of papers to fetch (max 100)",
38 default_value: "30",
39 placeholder: "30"
40 }
41 ]
42 }
43 }
44
45 fn fetch(config, cursor) {
46 // Get categories from config
47 let categories = [];
48 if config.category != () {
49 let parts = str_split(config.category, ",");
50 for part in parts {
51 let cat = str_trim(part);
52 if cat != "" {
53 categories.push(cat);
54 }
55 }
56 }
57
58 if categories.len() == 0 {
59 return #{ items: [], has_more: false };
60 }
61
62 // Get max results
63 let max_results = 30;
64 if config.max_results != () {
65 let parsed = parse_int(config.max_results);
66 if parsed != () && parsed > 0 {
67 if parsed > 100 {
68 max_results = 100;
69 } else {
70 max_results = parsed;
71 }
72 }
73 }
74
75 // Fetch papers for each category
76 let items = [];
77 for category in categories {
78 let cat_items = fetch_category(category, max_results, ARXIV_API);
79 for item in cat_items {
80 items.push(item);
81 }
82 }
83
84 #{
85 items: items,
86 has_more: false
87 }
88 }
89
90 fn fetch_category(category, max_results, api_base) {
91 let items = [];
92
93 // Build the arXiv API URL
94 let url = api_base + "?search_query=cat:" + category + "&sortBy=submittedDate&sortOrder=descending&max_results=" + max_results;
95
96 // Fetch the feed
97 let xml = http_get(url);
98 let feed = parse_feed(xml);
99
100 if feed.entries == () {
101 return items;
102 }
103
104 for entry in feed.entries {
105 // Extract paper ID
106 let full_id = "";
107 if entry.id != () {
108 full_id = entry.id;
109 }
110
111 let paper_id = full_id;
112 let parts = str_split(full_id, "/");
113 if parts.len() > 0 {
114 paper_id = parts[parts.len() - 1];
115 }
116
117 // Clean up title
118 let title = "";
119 if entry.title != () {
120 title = entry.title;
121 title = str_replace(title, "\n", " ");
122 title = str_trim(title);
123 }
124
125 // Get link
126 let link = "";
127 if entry.link != () {
128 link = entry.link;
129 }
130
131 // Get abstract
132 let abstract_text = "";
133 if entry.summary != () {
134 abstract_text = entry.summary;
135 abstract_text = str_replace(abstract_text, "\n", " ");
136 abstract_text = str_trim(abstract_text);
137 }
138
139 // Get publication date
140 let published = timestamp_now();
141 if entry.published != () {
142 published = entry.published;
143 }
144
145 // Get authors
146 let author_display = "Unknown";
147 if entry.author != () {
148 author_display = entry.author;
149 }
150
151 // Create bite text
152 let bite_text = truncate(title, 100);
153
154 // Build tags
155 let tags = ["arxiv", category];
156
157 items.push(#{
158 id: #{ source: "arxiv", item_id: paper_id },
159 bite: #{
160 author: author_display,
161 text: bite_text,
162 secondary: "[" + category + "]",
163 indicator: "📄"
164 },
165 content: #{
166 title: title,
167 body: abstract_text,
168 url: link,
169 actions: [
170 #{ label: "View PDF", action_type: "download", url: "https://arxiv.org/pdf/" + paper_id },
171 #{ label: "ar5iv HTML", action_type: "open", url: "https://ar5iv.labs.arxiv.org/html/" + paper_id }
172 ]
173 },
174 meta: #{
175 source_name: "arXiv " + category,
176 published_at: published,
177 tags: tags
178 }
179 });
180 }
181
182 items
183 }
184