max / balanced_breakfast

4.7 KB · 184 lines History Blame Raw

1	// arXiv preprint feed. Queries the arXiv Atom API by category (e.g. cs.AI).
2	// Parses results with parse_feed(); provides PDF and ar5iv HTML actions.
3
4	const ARXIV_API = "http://export.arxiv.org/api/query";
5
6	fn id() {
7	"arxiv"
8	}
9
10	fn name() {
11	"arXiv"
12	}
13
14	fn capabilities() {
15	#{
16	supports_pagination: true,
17	supports_date_filter: true
18	}
19	}
20
21	fn config_schema() {
22	#{
23	description: "Fetch papers from arXiv by category. Common categories: cs.AI, cs.LG, cs.CL, cs.CV, stat.ML",
24	fields: [
25	#{
26	key: "category",
27	label: "Category",
28	field_type: "text",
29	required: true,
30	description: "arXiv category (e.g., cs.AI, cs.LG, cs.CL, stat.ML)",
31	placeholder: "cs.AI"
32	},
33	#{
34	key: "max_results",
35	label: "Max Results",
36	field_type: "text",
37	description: "Number of papers to fetch (max 100)",
38	default_value: "30",
39	placeholder: "30"
40	}
41	]
42	}
43	}
44
45	fn fetch(config, cursor) {
46	// Get categories from config
47	let categories = [];
48	if config.category != () {
49	let parts = str_split(config.category, ",");
50	for part in parts {
51	let cat = str_trim(part);
52	if cat != "" {
53	categories.push(cat);
54	}
55	}
56	}
57
58	if categories.len() == 0 {
59	return #{ items: [], has_more: false };
60	}
61
62	// Get max results
63	let max_results = 30;
64	if config.max_results != () {
65	let parsed = parse_int(config.max_results);
66	if parsed != () && parsed > 0 {
67	if parsed > 100 {
68	max_results = 100;
69	} else {
70	max_results = parsed;
71	}
72	}
73	}
74
75	// Fetch papers for each category
76	let items = [];
77	for category in categories {
78	let cat_items = fetch_category(category, max_results, ARXIV_API);
79	for item in cat_items {
80	items.push(item);
81	}
82	}
83
84	#{
85	items: items,
86	has_more: false
87	}
88	}
89
90	fn fetch_category(category, max_results, api_base) {
91	let items = [];
92
93	// Build the arXiv API URL
94	let url = api_base + "?search_query=cat:" + category + "&sortBy=submittedDate&sortOrder=descending&max_results=" + max_results;
95
96	// Fetch the feed
97	let xml = http_get(url);
98	let feed = parse_feed(xml);
99
100	if feed.entries == () {
101	return items;
102	}
103
104	for entry in feed.entries {
105	// Extract paper ID
106	let full_id = "";
107	if entry.id != () {
108	full_id = entry.id;
109	}
110
111	let paper_id = full_id;
112	let parts = str_split(full_id, "/");
113	if parts.len() > 0 {
114	paper_id = parts[parts.len() - 1];
115	}
116
117	// Clean up title
118	let title = "";
119	if entry.title != () {
120	title = entry.title;
121	title = str_replace(title, "\n", " ");
122	title = str_trim(title);
123	}
124
125	// Get link
126	let link = "";
127	if entry.link != () {
128	link = entry.link;
129	}
130
131	// Get abstract
132	let abstract_text = "";
133	if entry.summary != () {
134	abstract_text = entry.summary;
135	abstract_text = str_replace(abstract_text, "\n", " ");
136	abstract_text = str_trim(abstract_text);
137	}
138
139	// Get publication date
140	let published = timestamp_now();
141	if entry.published != () {
142	published = entry.published;
143	}
144
145	// Get authors
146	let author_display = "Unknown";
147	if entry.author != () {
148	author_display = entry.author;
149	}
150
151	// Create bite text
152	let bite_text = truncate(title, 100);
153
154	// Build tags
155	let tags = ["arxiv", category];
156
157	items.push(#{
158	id: #{ source: "arxiv", item_id: paper_id },
159	bite: #{
160	author: author_display,
161	text: bite_text,
162	secondary: "[" + category + "]",
163	indicator: "📄"
164	},
165	content: #{
166	title: title,
167	body: abstract_text,
168	url: link,
169	actions: [
170	#{ label: "View PDF", action_type: "download", url: "https://arxiv.org/pdf/" + paper_id },
171	#{ label: "ar5iv HTML", action_type: "open", url: "https://ar5iv.labs.arxiv.org/html/" + paper_id }
172	]
173	},
174	meta: #{
175	source_name: "arXiv " + category,
176	published_at: published,
177	tags: tags
178	}
179	});
180	}
181
182	items
183	}
184