Skip to main content

max / audiofiles

76.2 KB · 1814 lines History Blame Raw
1 //! SQLite database wrapper with versioned migrations for samples, VFS, tags, and analysis tables.
2
3 use std::path::Path;
4
5 use rusqlite::{Connection, functions::FunctionFlags};
6 use sha2::{Digest, Sha256};
7 use thiserror::Error;
8 use tracing::instrument;
9
10 #[derive(Error, Debug)]
11 pub enum DbError {
12 #[error("SQLite error: {0}")]
13 Sqlite(#[from] rusqlite::Error),
14 }
15
16 /// Core database wrapper. All access is synchronous — no async runtime needed,
17 /// safe to use from a CLAP plugin host thread.
18 pub struct Database {
19 conn: Connection,
20 }
21
22 const MIGRATION_001: &str = r#"
23 -- Sample storage and metadata
24 CREATE TABLE samples (
25 hash TEXT PRIMARY KEY,
26 original_name TEXT NOT NULL,
27 file_extension TEXT NOT NULL,
28 file_size INTEGER NOT NULL,
29 import_date INTEGER NOT NULL,
30 last_modified INTEGER NOT NULL
31 );
32
33 -- Audio analysis results
34 CREATE TABLE audio_analysis (
35 hash TEXT PRIMARY KEY REFERENCES samples(hash) ON DELETE CASCADE,
36 bpm REAL,
37 musical_key TEXT,
38 duration REAL NOT NULL,
39 sample_rate INTEGER NOT NULL,
40 channels INTEGER NOT NULL,
41 peak_db REAL,
42 rms_db REAL,
43 is_loop BOOLEAN,
44 spectral_centroid REAL,
45 onset_strength REAL,
46 analyzed_at INTEGER NOT NULL
47 );
48
49 -- Virtual file systems
50 CREATE TABLE vfs (
51 id INTEGER PRIMARY KEY,
52 name TEXT NOT NULL UNIQUE,
53 created_at INTEGER NOT NULL,
54 modified_at INTEGER NOT NULL
55 );
56
57 -- VFS directory/file nodes
58 CREATE TABLE vfs_nodes (
59 id INTEGER PRIMARY KEY,
60 vfs_id INTEGER NOT NULL REFERENCES vfs(id) ON DELETE CASCADE,
61 parent_id INTEGER REFERENCES vfs_nodes(id) ON DELETE CASCADE,
62 name TEXT NOT NULL,
63 node_type TEXT NOT NULL CHECK(node_type IN ('directory', 'sample')),
64 sample_hash TEXT REFERENCES samples(hash) ON DELETE CASCADE,
65 created_at INTEGER NOT NULL,
66 UNIQUE(vfs_id, parent_id, name)
67 );
68
69 -- User-defined tags
70 CREATE TABLE tags (
71 sample_hash TEXT NOT NULL REFERENCES samples(hash) ON DELETE CASCADE,
72 tag_name TEXT NOT NULL,
73 tag_value TEXT NOT NULL,
74 PRIMARY KEY (sample_hash, tag_name, tag_value)
75 );
76
77 -- Collections/playlists
78 CREATE TABLE collections (
79 id INTEGER PRIMARY KEY,
80 name TEXT NOT NULL UNIQUE,
81 description TEXT,
82 created_at INTEGER NOT NULL
83 );
84
85 CREATE TABLE collection_members (
86 collection_id INTEGER NOT NULL REFERENCES collections(id) ON DELETE CASCADE,
87 sample_hash TEXT NOT NULL REFERENCES samples(hash) ON DELETE CASCADE,
88 added_at INTEGER NOT NULL,
89 PRIMARY KEY (collection_id, sample_hash)
90 );
91
92 -- Smart folders (saved searches)
93 CREATE TABLE smart_folders (
94 id INTEGER PRIMARY KEY,
95 vfs_id INTEGER NOT NULL REFERENCES vfs(id) ON DELETE CASCADE,
96 name TEXT NOT NULL,
97 query_json TEXT NOT NULL,
98 created_at INTEGER NOT NULL
99 );
100
101 -- Performance indexes
102 CREATE INDEX idx_vfs_nodes_parent ON vfs_nodes(parent_id);
103 CREATE INDEX idx_vfs_nodes_vfs ON vfs_nodes(vfs_id);
104 CREATE INDEX idx_vfs_nodes_hash ON vfs_nodes(sample_hash);
105 CREATE INDEX idx_tags_hash ON tags(sample_hash);
106 CREATE INDEX idx_tags_name_value ON tags(tag_name, tag_value);
107 CREATE INDEX idx_analysis_bpm ON audio_analysis(bpm);
108 CREATE INDEX idx_analysis_key ON audio_analysis(musical_key);
109 "#;
110
111 const MIGRATION_002: &str = r#"
112 CREATE TABLE tags_v2 (
113 sample_hash TEXT NOT NULL REFERENCES samples(hash) ON DELETE CASCADE,
114 tag TEXT NOT NULL,
115 PRIMARY KEY (sample_hash, tag)
116 );
117
118 -- Migrate any existing data
119 INSERT OR IGNORE INTO tags_v2 (sample_hash, tag)
120 SELECT sample_hash, LOWER(tag_name || '.' || tag_value) FROM tags;
121
122 DROP TABLE tags;
123 ALTER TABLE tags_v2 RENAME TO tags;
124
125 CREATE INDEX idx_tags_hash ON tags(sample_hash);
126 CREATE INDEX idx_tags_tag ON tags(tag);
127 "#;
128
129 const MIGRATION_003: &str = r#"
130 ALTER TABLE audio_analysis ADD COLUMN lufs REAL;
131 ALTER TABLE audio_analysis ADD COLUMN spectral_flatness REAL;
132 ALTER TABLE audio_analysis ADD COLUMN spectral_rolloff REAL;
133 ALTER TABLE audio_analysis ADD COLUMN zero_crossing_rate REAL;
134 ALTER TABLE audio_analysis ADD COLUMN classification TEXT;
135 "#;
136
137 const MIGRATION_004: &str = r#"
138 CREATE TABLE IF NOT EXISTS waveform_data (
139 hash TEXT PRIMARY KEY REFERENCES samples(hash) ON DELETE CASCADE,
140 num_buckets INTEGER NOT NULL,
141 peak_data BLOB NOT NULL,
142 sample_rate INTEGER NOT NULL,
143 duration REAL NOT NULL,
144 generated_at INTEGER NOT NULL
145 );
146 CREATE INDEX IF NOT EXISTS idx_analysis_duration ON audio_analysis(duration);
147 CREATE INDEX IF NOT EXISTS idx_analysis_classification ON audio_analysis(classification);
148 CREATE INDEX IF NOT EXISTS idx_samples_name ON samples(original_name);
149 "#;
150
151 const MIGRATION_005: &str = r#"
152 CREATE TABLE IF NOT EXISTS user_config (key TEXT PRIMARY KEY, value TEXT NOT NULL);
153 "#;
154
155 const MIGRATION_006: &str = r#"
156 CREATE TABLE IF NOT EXISTS fingerprints (
157 hash TEXT PRIMARY KEY REFERENCES samples(hash) ON DELETE CASCADE,
158 envelope BLOB NOT NULL,
159 sample_rate INTEGER NOT NULL,
160 generated_at INTEGER NOT NULL
161 );
162 "#;
163
164 const MIGRATION_007: &str = r#"
165 -- Per-VFS toggle for syncing audio file blobs to cloud (metadata always syncs)
166 ALTER TABLE vfs ADD COLUMN sync_files INTEGER NOT NULL DEFAULT 0;
167
168 -- Sync metadata key-value store
169 CREATE TABLE IF NOT EXISTS sync_state (
170 key TEXT PRIMARY KEY,
171 value TEXT NOT NULL
172 );
173 INSERT OR IGNORE INTO sync_state (key, value) VALUES
174 ('device_id', ''),
175 ('pull_cursor', ''),
176 ('auto_sync_enabled', '0'),
177 ('sync_interval_minutes', '15'),
178 ('applying_remote', '0'),
179 ('last_sync_at', ''),
180 ('initial_snapshot_done', '0');
181
182 -- Local change log for push/pull sync
183 CREATE TABLE IF NOT EXISTS sync_changelog (
184 id INTEGER PRIMARY KEY AUTOINCREMENT,
185 table_name TEXT NOT NULL,
186 op TEXT NOT NULL,
187 row_id TEXT NOT NULL,
188 timestamp TEXT NOT NULL DEFAULT (datetime('now')),
189 data TEXT,
190 pushed INTEGER NOT NULL DEFAULT 0
191 );
192 CREATE INDEX IF NOT EXISTS idx_changelog_pushed ON sync_changelog(pushed);
193
194 -- ── Triggers: record changes unless applying remote data ──
195
196 -- samples
197 CREATE TRIGGER IF NOT EXISTS sync_samples_insert AFTER INSERT ON samples
198 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
199 BEGIN
200 INSERT INTO sync_changelog (table_name, op, row_id, data)
201 VALUES ('samples', 'INSERT', NEW.hash,
202 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
203 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
204 'import_date', NEW.import_date, 'last_modified', NEW.last_modified));
205 END;
206
207 CREATE TRIGGER IF NOT EXISTS sync_samples_update AFTER UPDATE ON samples
208 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
209 BEGIN
210 INSERT INTO sync_changelog (table_name, op, row_id, data)
211 VALUES ('samples', 'UPDATE', NEW.hash,
212 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
213 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
214 'import_date', NEW.import_date, 'last_modified', NEW.last_modified));
215 END;
216
217 CREATE TRIGGER IF NOT EXISTS sync_samples_delete AFTER DELETE ON samples
218 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
219 BEGIN
220 INSERT INTO sync_changelog (table_name, op, row_id, data)
221 VALUES ('samples', 'DELETE', OLD.hash, NULL);
222 END;
223
224 -- audio_analysis
225 CREATE TRIGGER IF NOT EXISTS sync_audio_analysis_insert AFTER INSERT ON audio_analysis
226 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
227 BEGIN
228 INSERT INTO sync_changelog (table_name, op, row_id, data)
229 VALUES ('audio_analysis', 'INSERT', NEW.hash,
230 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
231 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
232 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
233 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
234 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
235 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
236 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification));
237 END;
238
239 CREATE TRIGGER IF NOT EXISTS sync_audio_analysis_update AFTER UPDATE ON audio_analysis
240 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
241 BEGIN
242 INSERT INTO sync_changelog (table_name, op, row_id, data)
243 VALUES ('audio_analysis', 'UPDATE', NEW.hash,
244 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
245 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
246 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
247 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
248 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
249 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
250 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification));
251 END;
252
253 CREATE TRIGGER IF NOT EXISTS sync_audio_analysis_delete AFTER DELETE ON audio_analysis
254 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
255 BEGIN
256 INSERT INTO sync_changelog (table_name, op, row_id, data)
257 VALUES ('audio_analysis', 'DELETE', OLD.hash, NULL);
258 END;
259
260 -- vfs
261 CREATE TRIGGER IF NOT EXISTS sync_vfs_insert AFTER INSERT ON vfs
262 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
263 BEGIN
264 INSERT INTO sync_changelog (table_name, op, row_id, data)
265 VALUES ('vfs', 'INSERT', CAST(NEW.id AS TEXT),
266 json_object('id', NEW.id, 'name', NEW.name,
267 'created_at', NEW.created_at, 'modified_at', NEW.modified_at,
268 'sync_files', NEW.sync_files));
269 END;
270
271 CREATE TRIGGER IF NOT EXISTS sync_vfs_update AFTER UPDATE ON vfs
272 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
273 BEGIN
274 INSERT INTO sync_changelog (table_name, op, row_id, data)
275 VALUES ('vfs', 'UPDATE', CAST(NEW.id AS TEXT),
276 json_object('id', NEW.id, 'name', NEW.name,
277 'created_at', NEW.created_at, 'modified_at', NEW.modified_at,
278 'sync_files', NEW.sync_files));
279 END;
280
281 CREATE TRIGGER IF NOT EXISTS sync_vfs_delete AFTER DELETE ON vfs
282 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
283 BEGIN
284 INSERT INTO sync_changelog (table_name, op, row_id, data)
285 VALUES ('vfs', 'DELETE', CAST(OLD.id AS TEXT), NULL);
286 END;
287
288 -- vfs_nodes
289 CREATE TRIGGER IF NOT EXISTS sync_vfs_nodes_insert AFTER INSERT ON vfs_nodes
290 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
291 BEGIN
292 INSERT INTO sync_changelog (table_name, op, row_id, data)
293 VALUES ('vfs_nodes', 'INSERT', CAST(NEW.id AS TEXT),
294 json_object('id', NEW.id, 'vfs_id', NEW.vfs_id, 'parent_id', NEW.parent_id,
295 'name', NEW.name, 'node_type', NEW.node_type,
296 'sample_hash', NEW.sample_hash, 'created_at', NEW.created_at));
297 END;
298
299 CREATE TRIGGER IF NOT EXISTS sync_vfs_nodes_update AFTER UPDATE ON vfs_nodes
300 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
301 BEGIN
302 INSERT INTO sync_changelog (table_name, op, row_id, data)
303 VALUES ('vfs_nodes', 'UPDATE', CAST(NEW.id AS TEXT),
304 json_object('id', NEW.id, 'vfs_id', NEW.vfs_id, 'parent_id', NEW.parent_id,
305 'name', NEW.name, 'node_type', NEW.node_type,
306 'sample_hash', NEW.sample_hash, 'created_at', NEW.created_at));
307 END;
308
309 CREATE TRIGGER IF NOT EXISTS sync_vfs_nodes_delete AFTER DELETE ON vfs_nodes
310 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
311 BEGIN
312 INSERT INTO sync_changelog (table_name, op, row_id, data)
313 VALUES ('vfs_nodes', 'DELETE', CAST(OLD.id AS TEXT), NULL);
314 END;
315
316 -- tags
317 CREATE TRIGGER IF NOT EXISTS sync_tags_insert AFTER INSERT ON tags
318 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
319 BEGIN
320 INSERT INTO sync_changelog (table_name, op, row_id, data)
321 VALUES ('tags', 'INSERT', NEW.sample_hash || ':' || NEW.tag,
322 json_object('sample_hash', NEW.sample_hash, 'tag', NEW.tag));
323 END;
324
325 CREATE TRIGGER IF NOT EXISTS sync_tags_delete AFTER DELETE ON tags
326 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
327 BEGIN
328 INSERT INTO sync_changelog (table_name, op, row_id, data)
329 VALUES ('tags', 'DELETE', OLD.sample_hash || ':' || OLD.tag, NULL);
330 END;
331
332 -- collections
333 CREATE TRIGGER IF NOT EXISTS sync_collections_insert AFTER INSERT ON collections
334 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
335 BEGIN
336 INSERT INTO sync_changelog (table_name, op, row_id, data)
337 VALUES ('collections', 'INSERT', CAST(NEW.id AS TEXT),
338 json_object('id', NEW.id, 'name', NEW.name,
339 'description', NEW.description, 'created_at', NEW.created_at));
340 END;
341
342 CREATE TRIGGER IF NOT EXISTS sync_collections_update AFTER UPDATE ON collections
343 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
344 BEGIN
345 INSERT INTO sync_changelog (table_name, op, row_id, data)
346 VALUES ('collections', 'UPDATE', CAST(NEW.id AS TEXT),
347 json_object('id', NEW.id, 'name', NEW.name,
348 'description', NEW.description, 'created_at', NEW.created_at));
349 END;
350
351 CREATE TRIGGER IF NOT EXISTS sync_collections_delete AFTER DELETE ON collections
352 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
353 BEGIN
354 INSERT INTO sync_changelog (table_name, op, row_id, data)
355 VALUES ('collections', 'DELETE', CAST(OLD.id AS TEXT), NULL);
356 END;
357
358 -- collection_members
359 CREATE TRIGGER IF NOT EXISTS sync_collection_members_insert AFTER INSERT ON collection_members
360 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
361 BEGIN
362 INSERT INTO sync_changelog (table_name, op, row_id, data)
363 VALUES ('collection_members', 'INSERT',
364 CAST(NEW.collection_id AS TEXT) || ':' || NEW.sample_hash,
365 json_object('collection_id', NEW.collection_id, 'sample_hash', NEW.sample_hash,
366 'added_at', NEW.added_at));
367 END;
368
369 CREATE TRIGGER IF NOT EXISTS sync_collection_members_delete AFTER DELETE ON collection_members
370 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
371 BEGIN
372 INSERT INTO sync_changelog (table_name, op, row_id, data)
373 VALUES ('collection_members', 'DELETE',
374 CAST(OLD.collection_id AS TEXT) || ':' || OLD.sample_hash, NULL);
375 END;
376
377 -- smart_folders sync triggers used to live here. Removed 2026-06-02:
378 -- M015 drops `smart_folders` and merges its contents into
379 -- `collections.filter_json`, so replaying M007 against a post-M015 schema
380 -- failed with "no such table". The triggers had no functional effect on
381 -- any install path (smart_folders is empty on first-run between M001's
382 -- CREATE and M015's DROP), so removing them is invisible. M015's
383 -- `DROP TRIGGER IF EXISTS sync_smart_folders_*` stays in place for DBs
384 -- that already applied the old M007 and need the triggers cleaned up.
385
386 -- user_config (exclude sync-internal keys)
387 CREATE TRIGGER IF NOT EXISTS sync_user_config_insert AFTER INSERT ON user_config
388 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
389 AND NEW.key NOT LIKE 'sync_%'
390 BEGIN
391 INSERT INTO sync_changelog (table_name, op, row_id, data)
392 VALUES ('user_config', 'INSERT', NEW.key,
393 json_object('key', NEW.key, 'value', NEW.value));
394 END;
395
396 CREATE TRIGGER IF NOT EXISTS sync_user_config_update AFTER UPDATE ON user_config
397 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
398 AND NEW.key NOT LIKE 'sync_%'
399 BEGIN
400 INSERT INTO sync_changelog (table_name, op, row_id, data)
401 VALUES ('user_config', 'UPDATE', NEW.key,
402 json_object('key', NEW.key, 'value', NEW.value));
403 END;
404
405 CREATE TRIGGER IF NOT EXISTS sync_user_config_delete AFTER DELETE ON user_config
406 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
407 AND OLD.key NOT LIKE 'sync_%'
408 BEGIN
409 INSERT INTO sync_changelog (table_name, op, row_id, data)
410 VALUES ('user_config', 'DELETE', OLD.key, NULL);
411 END;
412 "#;
413
414 const MIGRATION_008: &str = r#"
415 -- cloud_only: 1 when the local blob has been deleted but exists in cloud storage
416 ALTER TABLE samples ADD COLUMN cloud_only INTEGER NOT NULL DEFAULT 0;
417
418 -- Recreate samples triggers to include cloud_only in the JSON data
419 DROP TRIGGER IF EXISTS sync_samples_insert;
420 DROP TRIGGER IF EXISTS sync_samples_update;
421
422 CREATE TRIGGER sync_samples_insert AFTER INSERT ON samples
423 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
424 BEGIN
425 INSERT INTO sync_changelog (table_name, op, row_id, data)
426 VALUES ('samples', 'INSERT', NEW.hash,
427 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
428 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
429 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
430 'cloud_only', NEW.cloud_only));
431 END;
432
433 CREATE TRIGGER sync_samples_update AFTER UPDATE ON samples
434 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
435 BEGIN
436 INSERT INTO sync_changelog (table_name, op, row_id, data)
437 VALUES ('samples', 'UPDATE', NEW.hash,
438 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
439 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
440 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
441 'cloud_only', NEW.cloud_only));
442 END;
443 "#;
444
445 const MIGRATION_009: &str = r#"
446 -- Duration on samples table so it's available immediately after import (before analysis).
447 ALTER TABLE samples ADD COLUMN duration REAL;
448
449 -- Recreate samples triggers to include duration in the JSON data
450 DROP TRIGGER IF EXISTS sync_samples_insert;
451 DROP TRIGGER IF EXISTS sync_samples_update;
452
453 CREATE TRIGGER sync_samples_insert AFTER INSERT ON samples
454 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
455 BEGIN
456 INSERT INTO sync_changelog (table_name, op, row_id, data)
457 VALUES ('samples', 'INSERT', NEW.hash,
458 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
459 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
460 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
461 'cloud_only', NEW.cloud_only, 'duration', NEW.duration));
462 END;
463
464 CREATE TRIGGER sync_samples_update AFTER UPDATE ON samples
465 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
466 BEGIN
467 INSERT INTO sync_changelog (table_name, op, row_id, data)
468 VALUES ('samples', 'UPDATE', NEW.hash,
469 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
470 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
471 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
472 'cloud_only', NEW.cloud_only, 'duration', NEW.duration));
473 END;
474 "#;
475
476 const MIGRATION_010: &str = r#"
477 -- New spectral and waveform features for improved classification
478 ALTER TABLE audio_analysis ADD COLUMN spectral_bandwidth REAL;
479 ALTER TABLE audio_analysis ADD COLUMN centroid_variance REAL;
480 ALTER TABLE audio_analysis ADD COLUMN crest_factor REAL;
481 ALTER TABLE audio_analysis ADD COLUMN attack_time REAL;
482
483 -- Recreate audio_analysis sync triggers to include new columns
484 DROP TRIGGER IF EXISTS sync_audio_analysis_insert;
485 DROP TRIGGER IF EXISTS sync_audio_analysis_update;
486
487 CREATE TRIGGER sync_audio_analysis_insert AFTER INSERT ON audio_analysis
488 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
489 BEGIN
490 INSERT INTO sync_changelog (table_name, op, row_id, data)
491 VALUES ('audio_analysis', 'INSERT', NEW.hash,
492 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
493 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
494 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
495 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
496 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
497 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
498 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification,
499 'spectral_bandwidth', NEW.spectral_bandwidth, 'centroid_variance', NEW.centroid_variance,
500 'crest_factor', NEW.crest_factor, 'attack_time', NEW.attack_time));
501 END;
502
503 CREATE TRIGGER sync_audio_analysis_update AFTER UPDATE ON audio_analysis
504 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
505 BEGIN
506 INSERT INTO sync_changelog (table_name, op, row_id, data)
507 VALUES ('audio_analysis', 'UPDATE', NEW.hash,
508 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
509 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
510 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
511 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
512 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
513 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
514 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification,
515 'spectral_bandwidth', NEW.spectral_bandwidth, 'centroid_variance', NEW.centroid_variance,
516 'crest_factor', NEW.crest_factor, 'attack_time', NEW.attack_time));
517 END;
518 "#;
519
520 const MIGRATION_011: &str = r#"
521 -- ML classifier confidence score
522 ALTER TABLE audio_analysis ADD COLUMN classification_confidence REAL;
523
524 -- Recreate audio_analysis sync triggers to include new column
525 DROP TRIGGER IF EXISTS sync_audio_analysis_insert;
526 DROP TRIGGER IF EXISTS sync_audio_analysis_update;
527
528 CREATE TRIGGER sync_audio_analysis_insert AFTER INSERT ON audio_analysis
529 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
530 BEGIN
531 INSERT INTO sync_changelog (table_name, op, row_id, data)
532 VALUES ('audio_analysis', 'INSERT', NEW.hash,
533 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
534 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
535 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
536 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
537 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
538 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
539 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification,
540 'spectral_bandwidth', NEW.spectral_bandwidth, 'centroid_variance', NEW.centroid_variance,
541 'crest_factor', NEW.crest_factor, 'attack_time', NEW.attack_time,
542 'classification_confidence', NEW.classification_confidence));
543 END;
544
545 CREATE TRIGGER sync_audio_analysis_update AFTER UPDATE ON audio_analysis
546 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
547 BEGIN
548 INSERT INTO sync_changelog (table_name, op, row_id, data)
549 VALUES ('audio_analysis', 'UPDATE', NEW.hash,
550 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
551 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
552 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
553 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
554 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
555 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
556 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification,
557 'spectral_bandwidth', NEW.spectral_bandwidth, 'centroid_variance', NEW.centroid_variance,
558 'crest_factor', NEW.crest_factor, 'attack_time', NEW.attack_time,
559 'classification_confidence', NEW.classification_confidence));
560 END;
561 "#;
562
563 const MIGRATION_012: &str = r#"
564 -- Edit history: tracks destructive edits for future undo support
565 CREATE TABLE IF NOT EXISTS edit_history (
566 id INTEGER PRIMARY KEY AUTOINCREMENT,
567 source_hash TEXT NOT NULL,
568 result_hash TEXT NOT NULL,
569 operation TEXT NOT NULL,
570 params_json TEXT,
571 created_at INTEGER NOT NULL DEFAULT (unixepoch())
572 );
573 CREATE INDEX IF NOT EXISTS idx_edit_history_source ON edit_history(source_hash);
574 CREATE INDEX IF NOT EXISTS idx_edit_history_result ON edit_history(result_hash);
575
576 -- Sync trigger for edit_history
577 CREATE TRIGGER IF NOT EXISTS sync_edit_history_insert AFTER INSERT ON edit_history
578 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
579 BEGIN
580 INSERT INTO sync_changelog (table_name, op, row_id, data)
581 VALUES ('edit_history', 'INSERT', CAST(NEW.id AS TEXT),
582 json_object('id', NEW.id, 'source_hash', NEW.source_hash,
583 'result_hash', NEW.result_hash, 'operation', NEW.operation,
584 'params_json', NEW.params_json, 'created_at', NEW.created_at));
585 END;
586 "#;
587
588 const MIGRATION_013: &str = r#"
589 -- Loose-files mode: remember original file path instead of copying into vault.
590 -- NULL = normal (blob in samples/), non-NULL = loose-files (blob at this path).
591 -- Intentionally excluded from sync triggers — source_path is device-local.
592 ALTER TABLE samples ADD COLUMN source_path TEXT;
593 "#;
594
595 const MIGRATION_014: &str = r#"
596 -- Prevent duplicate root-level VFS node names. The existing UNIQUE(vfs_id, parent_id, name)
597 -- constraint treats NULLs as distinct, so root nodes (parent_id IS NULL) could collide.
598 CREATE UNIQUE INDEX IF NOT EXISTS idx_vfs_nodes_root_unique
599 ON vfs_nodes(vfs_id, name) WHERE parent_id IS NULL;
600 "#;
601
602 const MIGRATION_015: &str = r#"
603 -- Merge smart folders into collections: add a filter_json column.
604 -- NULL filter_json = manual collection, non-NULL = dynamic (saved search).
605 ALTER TABLE collections ADD COLUMN filter_json TEXT;
606 -- Migrate existing smart folders into collections with their filters.
607 INSERT OR IGNORE INTO collections (name, description, created_at, filter_json)
608 SELECT name, NULL, created_at, query_json FROM smart_folders;
609 -- Drop the smart_folders table (triggers first, then table).
610 DROP TRIGGER IF EXISTS sync_smart_folders_insert;
611 DROP TRIGGER IF EXISTS sync_smart_folders_update;
612 DROP TRIGGER IF EXISTS sync_smart_folders_delete;
613 DROP TABLE IF EXISTS smart_folders;
614 "#;
615
616 const MIGRATION_016: &str = r#"
617 -- Exclude loose-files mode from sync: a compromised server or second device
618 -- should not be able to silently flip a security-relevant setting.
619 DROP TRIGGER IF EXISTS sync_user_config_insert;
620 DROP TRIGGER IF EXISTS sync_user_config_update;
621 DROP TRIGGER IF EXISTS sync_user_config_delete;
622
623 CREATE TRIGGER sync_user_config_insert AFTER INSERT ON user_config
624 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
625 AND NEW.key NOT LIKE 'sync_%'
626 AND NEW.key != 'unsafe_mode'
627 BEGIN
628 INSERT INTO sync_changelog (table_name, op, row_id, data)
629 VALUES ('user_config', 'INSERT', NEW.key,
630 json_object('key', NEW.key, 'value', NEW.value));
631 END;
632
633 CREATE TRIGGER sync_user_config_update AFTER UPDATE ON user_config
634 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
635 AND NEW.key NOT LIKE 'sync_%'
636 AND NEW.key != 'unsafe_mode'
637 BEGIN
638 INSERT INTO sync_changelog (table_name, op, row_id, data)
639 VALUES ('user_config', 'UPDATE', NEW.key,
640 json_object('key', NEW.key, 'value', NEW.value));
641 END;
642
643 CREATE TRIGGER sync_user_config_delete AFTER DELETE ON user_config
644 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
645 AND OLD.key NOT LIKE 'sync_%'
646 AND OLD.key != 'unsafe_mode'
647 BEGIN
648 INSERT INTO sync_changelog (table_name, op, row_id, data)
649 VALUES ('user_config', 'DELETE', OLD.key, NULL);
650 END;
651 "#;
652
653 const MIGRATION_017: &str = r#"
654 -- Schema-only half of the 'unsafe_mode' -> 'loose_files' rename.
655 -- Recreates the sync-exclusion triggers to reference the new key literal
656 -- in their WHEN clauses (triggers can't parameterize key names, so the
657 -- rewrite has to live in a migration). The runtime row-copy
658 -- (unsafe_mode value -> loose_files row) lives in main.rs at the
659 -- vault-open path; doing it there avoids running it against every
660 -- attached/auxiliary DB that goes through migrate().
661 DROP TRIGGER IF EXISTS sync_user_config_insert;
662 DROP TRIGGER IF EXISTS sync_user_config_update;
663 DROP TRIGGER IF EXISTS sync_user_config_delete;
664
665 CREATE TRIGGER sync_user_config_insert AFTER INSERT ON user_config
666 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
667 AND NEW.key NOT LIKE 'sync_%'
668 AND NEW.key != 'loose_files'
669 BEGIN
670 INSERT INTO sync_changelog (table_name, op, row_id, data)
671 VALUES ('user_config', 'INSERT', NEW.key,
672 json_object('key', NEW.key, 'value', NEW.value));
673 END;
674
675 CREATE TRIGGER sync_user_config_update AFTER UPDATE ON user_config
676 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
677 AND NEW.key NOT LIKE 'sync_%'
678 AND NEW.key != 'loose_files'
679 BEGIN
680 INSERT INTO sync_changelog (table_name, op, row_id, data)
681 VALUES ('user_config', 'UPDATE', NEW.key,
682 json_object('key', NEW.key, 'value', NEW.value));
683 END;
684
685 CREATE TRIGGER sync_user_config_delete AFTER DELETE ON user_config
686 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
687 AND OLD.key NOT LIKE 'sync_%'
688 AND OLD.key != 'loose_files'
689 BEGIN
690 INSERT INTO sync_changelog (table_name, op, row_id, data)
691 VALUES ('user_config', 'DELETE', OLD.key, NULL);
692 END;
693 "#;
694
695 /// M018 — hash sensitive row_id values on the wire.
696 ///
697 /// Per 2026-06-02 SyncKit upload audit: the `sync_changelog.row_id` column
698 /// is sent to the server in cleartext, and the existing triggers stuffed
699 /// user content into it (tag strings as `sample_hash:tag`, raw sample
700 /// SHA-256s as content fingerprints, collection bindings). The encrypted
701 /// `data` field replicated the same content, but the cleartext row_id leak
702 /// stood regardless.
703 ///
704 /// This migration:
705 ///
706 /// 1. Generates a per-user `row_id_salt` in `sync_state` (never synced) so
707 /// even a global rainbow table over common tag strings can't deanonymise
708 /// users. SQLite's `randomblob(32)` is seeded from /dev/urandom on POSIX
709 /// and CryptGenRandom on Windows.
710 /// 2. Recreates every sync trigger to wrap row_id in
711 /// `hash_row_id(salt, canonical_key)`. The encrypted `data` field still
712 /// carries the cleartext for the receiving device.
713 /// 3. Extends DELETE triggers to emit the canonical key(s) in `data` so the
714 /// pull-side `resolve::apply_delete` can reconstruct the WHERE clause
715 /// without parsing row_id (which is now opaque).
716 /// 4. Rewrites every unpushed row in `sync_changelog` that contained
717 /// sensitive cleartext: hashes the row_id, and for DELETE rows in
718 /// composite-key tables (`tags`, `collection_members`) backfills the
719 /// canonical key from the now-being-hashed cleartext into `data`.
720 ///
721 /// Numeric-id tables (vfs, vfs_nodes, collections, smart_folders,
722 /// edit_history) and user_config are left as-is — their row_ids carry
723 /// either opaque integers or a closed set of app-defined config keys, no
724 /// user content.
725 const MIGRATION_018: &str = r#"
726 -- 1. Per-user salt for row_id hashing. `INSERT OR IGNORE` so re-running
727 -- this migration after a partial crash doesn't rotate the salt and
728 -- invalidate already-hashed row_ids.
729 INSERT OR IGNORE INTO sync_state (key, value)
730 VALUES ('row_id_salt', lower(hex(randomblob(32))));
731
732 -- 2. Backfill canonical-key `data` for unpushed DELETE rows in composite-PK
733 -- tables. Must run BEFORE the row_id hash so we still have the cleartext
734 -- composite to parse.
735 UPDATE sync_changelog
736 SET data = json_object(
737 'sample_hash', substr(row_id, 1, instr(row_id, ':') - 1),
738 'tag', substr(row_id, instr(row_id, ':') + 1)
739 )
740 WHERE table_name = 'tags' AND op = 'DELETE' AND pushed = 0
741 AND data IS NULL
742 AND instr(row_id, ':') > 0;
743
744 UPDATE sync_changelog
745 SET data = json_object(
746 'collection_id', substr(row_id, 1, instr(row_id, ':') - 1),
747 'sample_hash', substr(row_id, instr(row_id, ':') + 1)
748 )
749 WHERE table_name = 'collection_members' AND op = 'DELETE' AND pushed = 0
750 AND data IS NULL
751 AND instr(row_id, ':') > 0;
752
753 -- 3. For single-PK sensitive-row_id tables, backfill canonical-key `data`
754 -- for unpushed DELETE rows so apply_delete on the pulling device can
755 -- reconstruct the WHERE clause from the encrypted data alone.
756 UPDATE sync_changelog
757 SET data = json_object('hash', row_id)
758 WHERE table_name IN ('samples', 'audio_analysis')
759 AND op = 'DELETE' AND pushed = 0 AND data IS NULL;
760
761 -- 4. Now hash the row_id for every unpushed row whose cleartext leaked user
762 -- content (sample hashes, tag strings).
763 UPDATE sync_changelog
764 SET row_id = hash_row_id(
765 (SELECT value FROM sync_state WHERE key = 'row_id_salt'),
766 row_id
767 )
768 WHERE pushed = 0
769 AND table_name IN ('samples', 'audio_analysis', 'tags', 'collection_members');
770
771 -- 5. Drop and recreate every sync trigger with hash_row_id wrapping.
772 -- DELETE triggers gain a canonical-key `data` payload.
773
774 DROP TRIGGER IF EXISTS sync_samples_insert;
775 DROP TRIGGER IF EXISTS sync_samples_update;
776 DROP TRIGGER IF EXISTS sync_samples_delete;
777 DROP TRIGGER IF EXISTS sync_audio_analysis_insert;
778 DROP TRIGGER IF EXISTS sync_audio_analysis_update;
779 DROP TRIGGER IF EXISTS sync_audio_analysis_delete;
780 DROP TRIGGER IF EXISTS sync_vfs_insert;
781 DROP TRIGGER IF EXISTS sync_vfs_update;
782 DROP TRIGGER IF EXISTS sync_vfs_delete;
783 DROP TRIGGER IF EXISTS sync_vfs_nodes_insert;
784 DROP TRIGGER IF EXISTS sync_vfs_nodes_update;
785 DROP TRIGGER IF EXISTS sync_vfs_nodes_delete;
786 DROP TRIGGER IF EXISTS sync_tags_insert;
787 DROP TRIGGER IF EXISTS sync_tags_delete;
788 DROP TRIGGER IF EXISTS sync_collections_insert;
789 DROP TRIGGER IF EXISTS sync_collections_update;
790 DROP TRIGGER IF EXISTS sync_collections_delete;
791 DROP TRIGGER IF EXISTS sync_collection_members_insert;
792 DROP TRIGGER IF EXISTS sync_collection_members_delete;
793 -- smart_folders table was dropped in M015; M007 triggers are no-ops post-M015
794 DROP TRIGGER IF EXISTS sync_user_config_insert;
795 DROP TRIGGER IF EXISTS sync_user_config_update;
796 DROP TRIGGER IF EXISTS sync_user_config_delete;
797 DROP TRIGGER IF EXISTS sync_edit_history_insert;
798
799 -- samples (single PK: hash)
800 CREATE TRIGGER sync_samples_insert AFTER INSERT ON samples
801 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
802 BEGIN
803 INSERT INTO sync_changelog (table_name, op, row_id, data)
804 VALUES ('samples', 'INSERT',
805 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
806 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
807 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
808 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
809 'duration', NEW.duration, 'cloud_only', NEW.cloud_only));
810 END;
811
812 CREATE TRIGGER sync_samples_update AFTER UPDATE ON samples
813 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
814 BEGIN
815 INSERT INTO sync_changelog (table_name, op, row_id, data)
816 VALUES ('samples', 'UPDATE',
817 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
818 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
819 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
820 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
821 'duration', NEW.duration, 'cloud_only', NEW.cloud_only));
822 END;
823
824 CREATE TRIGGER sync_samples_delete AFTER DELETE ON samples
825 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
826 BEGIN
827 INSERT INTO sync_changelog (table_name, op, row_id, data)
828 VALUES ('samples', 'DELETE',
829 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), OLD.hash),
830 json_object('hash', OLD.hash));
831 END;
832
833 -- audio_analysis (single PK: hash)
834 CREATE TRIGGER sync_audio_analysis_insert AFTER INSERT ON audio_analysis
835 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
836 BEGIN
837 INSERT INTO sync_changelog (table_name, op, row_id, data)
838 VALUES ('audio_analysis', 'INSERT',
839 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
840 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
841 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
842 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
843 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
844 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
845 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
846 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification));
847 END;
848
849 CREATE TRIGGER sync_audio_analysis_update AFTER UPDATE ON audio_analysis
850 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
851 BEGIN
852 INSERT INTO sync_changelog (table_name, op, row_id, data)
853 VALUES ('audio_analysis', 'UPDATE',
854 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
855 json_object('hash', NEW.hash, 'bpm', NEW.bpm, 'musical_key', NEW.musical_key,
856 'duration', NEW.duration, 'sample_rate', NEW.sample_rate, 'channels', NEW.channels,
857 'peak_db', NEW.peak_db, 'rms_db', NEW.rms_db, 'is_loop', NEW.is_loop,
858 'spectral_centroid', NEW.spectral_centroid, 'onset_strength', NEW.onset_strength,
859 'analyzed_at', NEW.analyzed_at, 'lufs', NEW.lufs,
860 'spectral_flatness', NEW.spectral_flatness, 'spectral_rolloff', NEW.spectral_rolloff,
861 'zero_crossing_rate', NEW.zero_crossing_rate, 'classification', NEW.classification));
862 END;
863
864 CREATE TRIGGER sync_audio_analysis_delete AFTER DELETE ON audio_analysis
865 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
866 BEGIN
867 INSERT INTO sync_changelog (table_name, op, row_id, data)
868 VALUES ('audio_analysis', 'DELETE',
869 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), OLD.hash),
870 json_object('hash', OLD.hash));
871 END;
872
873 -- vfs (numeric PK — row_id stays as id string; not sensitive)
874 CREATE TRIGGER sync_vfs_insert AFTER INSERT ON vfs
875 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
876 BEGIN
877 INSERT INTO sync_changelog (table_name, op, row_id, data)
878 VALUES ('vfs', 'INSERT', CAST(NEW.id AS TEXT),
879 json_object('id', NEW.id, 'name', NEW.name,
880 'created_at', NEW.created_at, 'modified_at', NEW.modified_at,
881 'sync_files', NEW.sync_files));
882 END;
883
884 CREATE TRIGGER sync_vfs_update AFTER UPDATE ON vfs
885 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
886 BEGIN
887 INSERT INTO sync_changelog (table_name, op, row_id, data)
888 VALUES ('vfs', 'UPDATE', CAST(NEW.id AS TEXT),
889 json_object('id', NEW.id, 'name', NEW.name,
890 'created_at', NEW.created_at, 'modified_at', NEW.modified_at,
891 'sync_files', NEW.sync_files));
892 END;
893
894 CREATE TRIGGER sync_vfs_delete AFTER DELETE ON vfs
895 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
896 BEGIN
897 INSERT INTO sync_changelog (table_name, op, row_id, data)
898 VALUES ('vfs', 'DELETE', CAST(OLD.id AS TEXT), json_object('id', OLD.id));
899 END;
900
901 -- vfs_nodes (numeric PK)
902 CREATE TRIGGER sync_vfs_nodes_insert AFTER INSERT ON vfs_nodes
903 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
904 BEGIN
905 INSERT INTO sync_changelog (table_name, op, row_id, data)
906 VALUES ('vfs_nodes', 'INSERT', CAST(NEW.id AS TEXT),
907 json_object('id', NEW.id, 'vfs_id', NEW.vfs_id, 'parent_id', NEW.parent_id,
908 'name', NEW.name, 'node_type', NEW.node_type,
909 'sample_hash', NEW.sample_hash, 'created_at', NEW.created_at));
910 END;
911
912 CREATE TRIGGER sync_vfs_nodes_update AFTER UPDATE ON vfs_nodes
913 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
914 BEGIN
915 INSERT INTO sync_changelog (table_name, op, row_id, data)
916 VALUES ('vfs_nodes', 'UPDATE', CAST(NEW.id AS TEXT),
917 json_object('id', NEW.id, 'vfs_id', NEW.vfs_id, 'parent_id', NEW.parent_id,
918 'name', NEW.name, 'node_type', NEW.node_type,
919 'sample_hash', NEW.sample_hash, 'created_at', NEW.created_at));
920 END;
921
922 CREATE TRIGGER sync_vfs_nodes_delete AFTER DELETE ON vfs_nodes
923 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
924 BEGIN
925 INSERT INTO sync_changelog (table_name, op, row_id, data)
926 VALUES ('vfs_nodes', 'DELETE', CAST(OLD.id AS TEXT), json_object('id', OLD.id));
927 END;
928
929 -- tags (composite PK: sample_hash + tag — both sensitive)
930 CREATE TRIGGER sync_tags_insert AFTER INSERT ON tags
931 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
932 BEGIN
933 INSERT INTO sync_changelog (table_name, op, row_id, data)
934 VALUES ('tags', 'INSERT',
935 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'),
936 NEW.sample_hash || ':' || NEW.tag),
937 json_object('sample_hash', NEW.sample_hash, 'tag', NEW.tag));
938 END;
939
940 CREATE TRIGGER sync_tags_delete AFTER DELETE ON tags
941 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
942 BEGIN
943 INSERT INTO sync_changelog (table_name, op, row_id, data)
944 VALUES ('tags', 'DELETE',
945 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'),
946 OLD.sample_hash || ':' || OLD.tag),
947 json_object('sample_hash', OLD.sample_hash, 'tag', OLD.tag));
948 END;
949
950 -- collections (numeric PK)
951 CREATE TRIGGER sync_collections_insert AFTER INSERT ON collections
952 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
953 BEGIN
954 INSERT INTO sync_changelog (table_name, op, row_id, data)
955 VALUES ('collections', 'INSERT', CAST(NEW.id AS TEXT),
956 json_object('id', NEW.id, 'name', NEW.name,
957 'description', NEW.description, 'created_at', NEW.created_at,
958 'filter_json', NEW.filter_json));
959 END;
960
961 CREATE TRIGGER sync_collections_update AFTER UPDATE ON collections
962 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
963 BEGIN
964 INSERT INTO sync_changelog (table_name, op, row_id, data)
965 VALUES ('collections', 'UPDATE', CAST(NEW.id AS TEXT),
966 json_object('id', NEW.id, 'name', NEW.name,
967 'description', NEW.description, 'created_at', NEW.created_at,
968 'filter_json', NEW.filter_json));
969 END;
970
971 CREATE TRIGGER sync_collections_delete AFTER DELETE ON collections
972 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
973 BEGIN
974 INSERT INTO sync_changelog (table_name, op, row_id, data)
975 VALUES ('collections', 'DELETE', CAST(OLD.id AS TEXT), json_object('id', OLD.id));
976 END;
977
978 -- collection_members (composite PK: collection_id + sample_hash — hash is sensitive)
979 CREATE TRIGGER sync_collection_members_insert AFTER INSERT ON collection_members
980 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
981 BEGIN
982 INSERT INTO sync_changelog (table_name, op, row_id, data)
983 VALUES ('collection_members', 'INSERT',
984 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'),
985 CAST(NEW.collection_id AS TEXT) || ':' || NEW.sample_hash),
986 json_object('collection_id', NEW.collection_id, 'sample_hash', NEW.sample_hash,
987 'added_at', NEW.added_at));
988 END;
989
990 CREATE TRIGGER sync_collection_members_delete AFTER DELETE ON collection_members
991 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
992 BEGIN
993 INSERT INTO sync_changelog (table_name, op, row_id, data)
994 VALUES ('collection_members', 'DELETE',
995 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'),
996 CAST(OLD.collection_id AS TEXT) || ':' || OLD.sample_hash),
997 json_object('collection_id', OLD.collection_id, 'sample_hash', OLD.sample_hash));
998 END;
999
1000 -- smart_folders table was dropped in M015; not recreating its triggers.
1001
1002 -- user_config (key is app-defined closed set; not sensitive)
1003 CREATE TRIGGER sync_user_config_insert AFTER INSERT ON user_config
1004 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1005 AND NEW.key NOT LIKE 'sync_%'
1006 AND NEW.key != 'loose_files'
1007 BEGIN
1008 INSERT INTO sync_changelog (table_name, op, row_id, data)
1009 VALUES ('user_config', 'INSERT', NEW.key,
1010 json_object('key', NEW.key, 'value', NEW.value));
1011 END;
1012
1013 CREATE TRIGGER sync_user_config_update AFTER UPDATE ON user_config
1014 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1015 AND NEW.key NOT LIKE 'sync_%'
1016 AND NEW.key != 'loose_files'
1017 BEGIN
1018 INSERT INTO sync_changelog (table_name, op, row_id, data)
1019 VALUES ('user_config', 'UPDATE', NEW.key,
1020 json_object('key', NEW.key, 'value', NEW.value));
1021 END;
1022
1023 CREATE TRIGGER sync_user_config_delete AFTER DELETE ON user_config
1024 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1025 AND OLD.key NOT LIKE 'sync_%'
1026 AND OLD.key != 'loose_files'
1027 BEGIN
1028 INSERT INTO sync_changelog (table_name, op, row_id, data)
1029 VALUES ('user_config', 'DELETE', OLD.key, json_object('key', OLD.key));
1030 END;
1031
1032 -- edit_history (numeric PK)
1033 CREATE TRIGGER sync_edit_history_insert AFTER INSERT ON edit_history
1034 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1035 BEGIN
1036 INSERT INTO sync_changelog (table_name, op, row_id, data)
1037 VALUES ('edit_history', 'INSERT', CAST(NEW.id AS TEXT),
1038 json_object('id', NEW.id, 'source_hash', NEW.source_hash,
1039 'result_hash', NEW.result_hash, 'operation', NEW.operation,
1040 'params_json', NEW.params_json, 'created_at', NEW.created_at));
1041 END;
1042 "#;
1043
1044 /// M019 — soft-delete (tombstone) infrastructure for samples.
1045 ///
1046 /// Phase 1 of the multi-device sample-deletion design (see
1047 /// `docs/design-sample-deletion.md`). This migration only lands the
1048 /// schema and bumps the samples triggers to include the new column in
1049 /// their wire-format JSON. No code path currently sets `deleted_at`, so
1050 /// every existing read filter (`WHERE samples.deleted_at IS NULL`) is a
1051 /// no-op until Phase 2 wires up the tombstone+undelete operations.
1052 ///
1053 /// Index is partial — only tombstoned rows are indexed, so the index
1054 /// stays tiny in steady-state (most samples are live).
1055 ///
1056 /// `sample_tombstone_retain_days` defaults to 30 (matches OS Trash
1057 /// conventions). User-configurable via the existing user_config sync
1058 /// trigger; the value syncs across devices.
1059 const MIGRATION_019: &str = r#"
1060 ALTER TABLE samples ADD COLUMN deleted_at INTEGER;
1061 CREATE INDEX IF NOT EXISTS idx_samples_deleted_at
1062 ON samples(deleted_at) WHERE deleted_at IS NOT NULL;
1063
1064 -- Suppress the user_config sync trigger for the duration of this seed
1065 -- INSERT — otherwise the migration would push a spurious row into
1066 -- sync_changelog on every fresh install. The trigger's WHEN clause
1067 -- short-circuits while applying_remote = '1'. Both flips run inside
1068 -- the migration's transaction, so a crash mid-migration rolls back the
1069 -- flag-set along with everything else.
1070 UPDATE sync_state SET value = '1' WHERE key = 'applying_remote';
1071 INSERT OR IGNORE INTO user_config (key, value)
1072 VALUES ('sample_tombstone_retain_days', '30');
1073 UPDATE sync_state SET value = '0' WHERE key = 'applying_remote';
1074
1075 -- Re-emit samples triggers so deleted_at flows through the wire JSON.
1076 -- Existing INSERT/UPDATE bodies list columns explicitly; the new column
1077 -- needs to be added to the json_object call (it doesn't pick up
1078 -- automatically). DELETE trigger needs the column too so the receiving
1079 -- device's apply_upsert sees the tombstone state on a re-INSERT path.
1080 DROP TRIGGER IF EXISTS sync_samples_insert;
1081 DROP TRIGGER IF EXISTS sync_samples_update;
1082 DROP TRIGGER IF EXISTS sync_samples_delete;
1083
1084 CREATE TRIGGER sync_samples_insert AFTER INSERT ON samples
1085 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1086 BEGIN
1087 INSERT INTO sync_changelog (table_name, op, row_id, data)
1088 VALUES ('samples', 'INSERT',
1089 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
1090 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
1091 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
1092 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
1093 'duration', NEW.duration, 'cloud_only', NEW.cloud_only,
1094 'source_path', NEW.source_path, 'deleted_at', NEW.deleted_at));
1095 END;
1096
1097 CREATE TRIGGER sync_samples_update AFTER UPDATE ON samples
1098 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1099 BEGIN
1100 INSERT INTO sync_changelog (table_name, op, row_id, data)
1101 VALUES ('samples', 'UPDATE',
1102 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), NEW.hash),
1103 json_object('hash', NEW.hash, 'original_name', NEW.original_name,
1104 'file_extension', NEW.file_extension, 'file_size', NEW.file_size,
1105 'import_date', NEW.import_date, 'last_modified', NEW.last_modified,
1106 'duration', NEW.duration, 'cloud_only', NEW.cloud_only,
1107 'source_path', NEW.source_path, 'deleted_at', NEW.deleted_at));
1108 END;
1109
1110 CREATE TRIGGER sync_samples_delete AFTER DELETE ON samples
1111 WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
1112 BEGIN
1113 INSERT INTO sync_changelog (table_name, op, row_id, data)
1114 VALUES ('samples', 'DELETE',
1115 hash_row_id((SELECT value FROM sync_state WHERE key = 'row_id_salt'), OLD.hash),
1116 json_object('hash', OLD.hash));
1117 END;
1118 "#;
1119
1120 /// Register `hash_row_id(salt, key) -> TEXT` as a deterministic SQLite
1121 /// function on the given connection. Used by the M018 sync triggers so the
1122 /// `sync_changelog.row_id` field never carries cleartext content (tag strings,
1123 /// raw sample SHA-256s) on the wire. The salt is a per-user random nonce
1124 /// stored in `sync_state` and never synced; without it, even a global rainbow
1125 /// table over common tag strings would deanonymise users.
1126 fn register_hash_row_id(conn: &Connection) -> Result<(), DbError> {
1127 conn.create_scalar_function(
1128 "hash_row_id",
1129 2,
1130 FunctionFlags::SQLITE_DETERMINISTIC | FunctionFlags::SQLITE_UTF8,
1131 |ctx| {
1132 let salt: String = ctx.get(0)?;
1133 let key: String = ctx.get(1)?;
1134 let mut hasher = Sha256::new();
1135 hasher.update(salt.as_bytes());
1136 hasher.update(b":");
1137 hasher.update(key.as_bytes());
1138 let digest = hasher.finalize();
1139 let mut hex = String::with_capacity(64);
1140 for byte in digest {
1141 use std::fmt::Write;
1142 let _ = write!(hex, "{byte:02x}");
1143 }
1144 Ok(hex)
1145 },
1146 )?;
1147 Ok(())
1148 }
1149
1150 impl Database {
1151 /// Open (or create) the database at the given path and run migrations.
1152 #[instrument(skip_all)]
1153 pub fn open(path: impl AsRef<Path>) -> Result<Self, DbError> {
1154 let conn = Connection::open(path)?;
1155 conn.execute_batch(
1156 "PRAGMA journal_mode=WAL;\
1157 PRAGMA foreign_keys=ON;\
1158 PRAGMA busy_timeout=5000;\
1159 PRAGMA wal_checkpoint(TRUNCATE);",
1160 )?;
1161 register_hash_row_id(&conn)?;
1162 let mut db = Self { conn };
1163 db.migrate()?;
1164 Ok(db)
1165 }
1166
1167 /// Flush the WAL back into the main database file and remove the -shm file.
1168 ///
1169 /// Call after large write batches (e.g. import completion) to keep the
1170 /// WAL index fresh and avoid stale memory-mapped state on macOS.
1171 pub fn wal_checkpoint(&self) -> Result<(), DbError> {
1172 self.conn
1173 .execute_batch("PRAGMA wal_checkpoint(TRUNCATE)")?;
1174 Ok(())
1175 }
1176
1177 /// Open an in-memory database (for tests).
1178 #[instrument(skip_all)]
1179 pub fn open_in_memory() -> Result<Self, DbError> {
1180 let conn = Connection::open_in_memory()?;
1181 conn.execute_batch("PRAGMA foreign_keys=ON;")?;
1182 register_hash_row_id(&conn)?;
1183 let mut db = Self { conn };
1184 db.migrate()?;
1185 Ok(db)
1186 }
1187
1188 /// Apply pending migrations using PRAGMA user_version as the version tracker.
1189 ///
1190 /// Each migration step runs inside a transaction so the schema change and
1191 /// version bump are atomic — a crash between the two can no longer leave the
1192 /// database in an inconsistent state.
1193 #[instrument(skip_all)]
1194 fn migrate(&mut self) -> Result<(), DbError> {
1195 let version: i32 =
1196 self.conn
1197 .query_row("PRAGMA user_version", [], |row| row.get(0))?;
1198
1199 const MIGRATIONS: &[&str] = &[
1200 MIGRATION_001,
1201 MIGRATION_002,
1202 MIGRATION_003,
1203 MIGRATION_004,
1204 MIGRATION_005,
1205 MIGRATION_006,
1206 MIGRATION_007,
1207 MIGRATION_008,
1208 MIGRATION_009,
1209 MIGRATION_010,
1210 MIGRATION_011,
1211 MIGRATION_012,
1212 MIGRATION_013,
1213 MIGRATION_014,
1214 MIGRATION_015,
1215 MIGRATION_016,
1216 MIGRATION_017,
1217 MIGRATION_018,
1218 MIGRATION_019,
1219 ];
1220
1221 for (i, sql) in MIGRATIONS.iter().enumerate() {
1222 let target = (i + 1) as i32;
1223 if version < target {
1224 let batch = format!("BEGIN;\n{}\nPRAGMA user_version = {};\nCOMMIT;", sql, target);
1225 match self.conn.execute_batch(&batch) {
1226 Ok(()) => {}
1227 Err(e) if e.to_string().contains("duplicate column") => {
1228 // Recovery path: a prior partial migration committed
1229 // some ALTERs before crashing. Re-run the migration in
1230 // pieces, tolerating "duplicate column" on ALTERs and
1231 // "already exists" on CREATEs (both mean: the prior
1232 // partial run got there already; the desired final
1233 // state is still reachable). Any OTHER error here is
1234 // a real failure — we roll back and surface it,
1235 // because silently bumping user_version on a partially
1236 // applied schema is the worst possible outcome.
1237 let _ = self.conn.execute_batch("ROLLBACK");
1238 self.conn.execute_batch("BEGIN")?;
1239
1240 // ALTER TABLEs first, individually, tolerating duplicates.
1241 for line in sql.lines() {
1242 let trimmed = line.trim();
1243 if trimmed.to_uppercase().starts_with("ALTER TABLE")
1244 && trimmed.to_uppercase().contains("ADD COLUMN")
1245 && let Err(alter_err) = self.conn.execute_batch(trimmed)
1246 && !alter_err.to_string().contains("duplicate column") {
1247 let _ = self.conn.execute_batch("ROLLBACK");
1248 return Err(DbError::Sqlite(alter_err));
1249 }
1250 }
1251
1252 // Non-ALTER statements (CREATE TABLE / INDEX /
1253 // TRIGGER, DROP IF EXISTS, INSERT OR IGNORE, plain
1254 // INSERT / UPDATE / DELETE). After M018, every
1255 // migration from M003 onward is replay-safe by
1256 // construction (verified by the
1257 // migration_replay_from_version_two_against_full_schema
1258 // regression test), so this batch should succeed
1259 // cleanly even against a populated schema. "already
1260 // exists" stays tolerable as a belt-and-braces guard
1261 // for pre-idempotent migration bodies. Anything else
1262 // is a real failure — fail fast, don't bump.
1263 let non_alter: String = sql
1264 .lines()
1265 .filter(|l| {
1266 let t = l.trim().to_uppercase();
1267 !(t.starts_with("ALTER TABLE") && t.contains("ADD COLUMN"))
1268 })
1269 .collect::<Vec<_>>()
1270 .join("\n");
1271 if !non_alter.trim().is_empty()
1272 && let Err(e) = self.conn.execute_batch(&non_alter)
1273 && !e.to_string().contains("already exists") {
1274 let _ = self.conn.execute_batch("ROLLBACK");
1275 return Err(DbError::Sqlite(e));
1276 }
1277
1278 self.conn.execute_batch(
1279 &format!("PRAGMA user_version = {};\nCOMMIT;", target),
1280 )?;
1281 }
1282 Err(e) => return Err(DbError::Sqlite(e)),
1283 }
1284 }
1285 }
1286
1287 Ok(())
1288 }
1289
1290 /// Run a closure inside a SQLite transaction.
1291 ///
1292 /// Uses `BEGIN IMMEDIATE` to acquire a write lock upfront, preventing
1293 /// deadlocks when the closure issues writes. The closure receives no
1294 /// arguments — it accesses the same `Database` through the shared
1295 /// `Mutex<Database>`, which is safe because the caller already holds the lock.
1296 #[instrument(skip_all)]
1297 pub fn transaction<T, F>(&self, f: F) -> Result<T, DbError>
1298 where
1299 F: FnOnce() -> Result<T, DbError>,
1300 {
1301 self.conn.execute_batch("BEGIN IMMEDIATE")?;
1302 match f() {
1303 Ok(val) => {
1304 self.conn.execute_batch("COMMIT")?;
1305 Ok(val)
1306 }
1307 Err(e) => {
1308 if let Err(rb_err) = self.conn.execute_batch("ROLLBACK") {
1309 tracing::warn!("ROLLBACK failed after transaction error: {rb_err}");
1310 }
1311 Err(e)
1312 }
1313 }
1314 }
1315
1316 /// Borrow the underlying connection for queries.
1317 pub fn conn(&self) -> &Connection {
1318 &self.conn
1319 }
1320
1321 /// Aggregate storage stats: (sample_count, total_file_bytes).
1322 ///
1323 /// Excludes tombstoned rows (`deleted_at IS NOT NULL`) so the figure matches
1324 /// the library the user actually sees — the M019 read-path filter applies here
1325 /// like every other sample read site.
1326 pub fn storage_stats(&self) -> Result<(u64, u64), DbError> {
1327 let (count, total): (u64, u64) = self.conn.query_row(
1328 "SELECT COUNT(*), COALESCE(SUM(file_size), 0) FROM samples WHERE deleted_at IS NULL",
1329 [],
1330 |row| Ok((row.get(0)?, row.get(1)?)),
1331 )?;
1332 Ok((count, total))
1333 }
1334
1335 /// Per-VFS storage stats: count and total bytes of *unique* samples
1336 /// referenced by `vfs_id`. A sample referenced from multiple nodes in the
1337 /// same VFS counts once. Used by the sync panel's per-VFS toggle rows so
1338 /// the user can see how much would upload before enabling blob sync.
1339 pub fn vfs_storage_stats(&self, vfs_id: i64) -> Result<(u64, u64), DbError> {
1340 let (count, total): (u64, u64) = self.conn.query_row(
1341 "SELECT COUNT(*), COALESCE(SUM(file_size), 0) FROM samples \
1342 WHERE hash IN (\
1343 SELECT DISTINCT sample_hash FROM vfs_nodes \
1344 WHERE vfs_id = ? AND sample_hash IS NOT NULL\
1345 )",
1346 [vfs_id],
1347 |row| Ok((row.get(0)?, row.get(1)?)),
1348 )?;
1349 Ok((count, total))
1350 }
1351 }
1352
1353 #[cfg(test)]
1354 mod tests {
1355 use super::*;
1356
1357 #[test]
1358 fn open_in_memory_creates_all_tables() {
1359 let db = Database::open_in_memory().unwrap();
1360
1361 let tables: Vec<String> = db
1362 .conn()
1363 .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name")
1364 .unwrap()
1365 .query_map([], |row| row.get(0))
1366 .unwrap()
1367 .collect::<Result<_, _>>()
1368 .unwrap();
1369
1370 let expected = vec![
1371 "audio_analysis",
1372 "collection_members",
1373 "collections",
1374 "edit_history",
1375 "fingerprints",
1376 "samples",
1377 "sync_changelog",
1378 "sync_state",
1379 "tags",
1380 "user_config",
1381 "vfs",
1382 "vfs_nodes",
1383 "waveform_data",
1384 ];
1385 assert_eq!(tables, expected);
1386 }
1387
1388 #[test]
1389 fn migration_sets_user_version() {
1390 let db = Database::open_in_memory().unwrap();
1391 let version: i32 = db
1392 .conn()
1393 .query_row("PRAGMA user_version", [], |row| row.get(0))
1394 .unwrap();
1395 assert_eq!(version, 19);
1396 }
1397
1398 #[test]
1399 fn migration_is_idempotent() {
1400 let db = Database::open_in_memory().unwrap();
1401 // Opening again on the same connection shouldn't fail
1402 let version: i32 = db
1403 .conn()
1404 .query_row("PRAGMA user_version", [], |row| row.get(0))
1405 .unwrap();
1406 assert_eq!(version, 19);
1407 }
1408
1409 /// Open a fresh file-backed DB, close, reopen. The second open re-enters
1410 /// `migrate()`; with `user_version=17` no migration body runs, but the
1411 /// shape verifies our open/close cycle is clean (no locks, no WAL leak).
1412 #[test]
1413 fn migration_replay_from_file_no_op() {
1414 let dir = tempfile::tempdir().unwrap();
1415 let path = dir.path().join("audiofiles.db");
1416
1417 let db = Database::open(&path).unwrap();
1418 drop(db);
1419
1420 let db = Database::open(&path).unwrap();
1421 let version: i32 = db
1422 .conn()
1423 .query_row("PRAGMA user_version", [], |row| row.get(0))
1424 .unwrap();
1425 assert_eq!(version, 19);
1426 }
1427
1428 /// Simulates the worst-case recovery path: a prior partial migration left
1429 /// every object in place but `user_version` rolled back. Re-running
1430 /// `migrate()` against the pre-populated schema must succeed without
1431 /// silent failure. This catches the "silent failure → bump user_version"
1432 /// bug class for every migration past the inherently-one-shot ones.
1433 ///
1434 /// The inherently-one-shot migrations are excluded from this replay
1435 /// loop:
1436 /// * M001 — initial schema; bare CREATE TABLEs, runs against an empty DB.
1437 /// * M002 — `DROP TABLE tags; ALTER tags_v2 RENAME TO tags` rebuild dance.
1438 /// * M015 — adds `collections.filter_json` and backfills from
1439 /// `smart_folders`, then drops `smart_folders`. The backfill SELECT
1440 /// references a table that no longer exists after the migration runs,
1441 /// so it cannot parse on replay against a post-M015 schema. None of
1442 /// these need replay safety: SQLite's atomic-transaction guarantee
1443 /// means each migration either fully commits or fully rolls back, so
1444 /// the realistic recovery scenario is "re-apply the one migration
1445 /// that crashed" — not "re-apply every migration from scratch".
1446 ///
1447 /// Every migration from M003 onward (excluding M015) MUST be
1448 /// replay-safe against a populated schema; if you add a new one that
1449 /// isn't, this test fails and you should add `IF NOT EXISTS` /
1450 /// `DROP IF EXISTS` / `INSERT OR IGNORE` accordingly, or add it to the
1451 /// one-shot list above with a clear rationale.
1452 #[test]
1453 fn migration_replay_from_version_fifteen_against_full_schema() {
1454 let dir = tempfile::tempdir().unwrap();
1455 let path = dir.path().join("audiofiles.db");
1456
1457 Database::open(&path).unwrap();
1458
1459 {
1460 let conn = Connection::open(&path).unwrap();
1461 conn.execute_batch("PRAGMA user_version = 15").unwrap();
1462 }
1463
1464 let db = Database::open(&path).unwrap();
1465 let version: i32 = db
1466 .conn()
1467 .query_row("PRAGMA user_version", [], |row| row.get(0))
1468 .unwrap();
1469 assert_eq!(version, 19);
1470 }
1471
1472 /// M018 contract: the `sync_changelog.row_id` for sensitive tables must
1473 /// be a 64-hex SHA-256 (per `hash_row_id`), NOT the cleartext content
1474 /// fingerprint or tag string. The cleartext key lives only in `data`.
1475 /// This test is the regression gate for the upload audit fix.
1476 #[test]
1477 fn m018_hashes_sensitive_row_ids() {
1478 let db = Database::open_in_memory().unwrap();
1479 let conn = db.conn();
1480
1481 // Seed: insert a sample and a tag. Both should fire triggers that
1482 // write to sync_changelog with a hashed row_id.
1483 conn.execute(
1484 "INSERT INTO samples (hash, original_name, file_extension, file_size, \
1485 import_date, last_modified) VALUES \
1486 ('abc123', 'kick.wav', 'wav', 100, 0, 0)",
1487 [],
1488 )
1489 .unwrap();
1490 conn.execute(
1491 "INSERT INTO tags (sample_hash, tag) VALUES ('abc123', 'drums')",
1492 [],
1493 )
1494 .unwrap();
1495
1496 // samples row_id: 64-hex hash, NOT "abc123".
1497 let row_id: String = conn
1498 .query_row(
1499 "SELECT row_id FROM sync_changelog WHERE table_name = 'samples' AND op = 'INSERT'",
1500 [],
1501 |row| row.get(0),
1502 )
1503 .unwrap();
1504 assert_eq!(row_id.len(), 64, "row_id should be SHA-256 hex");
1505 assert!(row_id.chars().all(|c| c.is_ascii_hexdigit()));
1506 assert_ne!(row_id, "abc123", "cleartext sample hash must not leak");
1507
1508 // tags row_id: 64-hex hash, NOT "abc123:drums".
1509 let row_id: String = conn
1510 .query_row(
1511 "SELECT row_id FROM sync_changelog WHERE table_name = 'tags' AND op = 'INSERT'",
1512 [],
1513 |row| row.get(0),
1514 )
1515 .unwrap();
1516 assert_eq!(row_id.len(), 64);
1517 assert_ne!(row_id, "abc123:drums", "cleartext tag string must not leak");
1518
1519 // Salted: hash depends on the per-user salt, so two fresh DBs see
1520 // different row_ids for the same logical key.
1521 let db2 = Database::open_in_memory().unwrap();
1522 let conn2 = db2.conn();
1523 conn2
1524 .execute(
1525 "INSERT INTO samples (hash, original_name, file_extension, file_size, \
1526 import_date, last_modified) VALUES \
1527 ('abc123', 'kick.wav', 'wav', 100, 0, 0)",
1528 [],
1529 )
1530 .unwrap();
1531 let row_id2: String = conn2
1532 .query_row(
1533 "SELECT row_id FROM sync_changelog WHERE table_name = 'samples' AND op = 'INSERT'",
1534 [],
1535 |row| row.get(0),
1536 )
1537 .unwrap();
1538 assert_ne!(row_id, row_id2, "salt should differ between DBs");
1539 }
1540
1541 /// M018 contract: DELETE rows must carry the canonical PK in `data` so
1542 /// the receiving device's `resolve::apply_delete` can reconstruct the
1543 /// WHERE clause without parsing the (now-hashed) row_id.
1544 #[test]
1545 fn m018_delete_triggers_emit_canonical_key_in_data() {
1546 let db = Database::open_in_memory().unwrap();
1547 let conn = db.conn();
1548
1549 conn.execute(
1550 "INSERT INTO samples (hash, original_name, file_extension, file_size, \
1551 import_date, last_modified) VALUES \
1552 ('abc', 'k.wav', 'wav', 1, 0, 0)",
1553 [],
1554 )
1555 .unwrap();
1556 conn.execute("INSERT INTO tags (sample_hash, tag) VALUES ('abc', 'kick')", [])
1557 .unwrap();
1558 conn.execute("DELETE FROM tags WHERE sample_hash = 'abc' AND tag = 'kick'", [])
1559 .unwrap();
1560
1561 let data: String = conn
1562 .query_row(
1563 "SELECT data FROM sync_changelog WHERE table_name = 'tags' AND op = 'DELETE'",
1564 [],
1565 |row| row.get(0),
1566 )
1567 .unwrap();
1568 let parsed: serde_json::Value = serde_json::from_str(&data).unwrap();
1569 assert_eq!(parsed["sample_hash"], "abc");
1570 assert_eq!(parsed["tag"], "kick");
1571 }
1572
1573 /// M019 contract: `samples.deleted_at` column exists, the partial
1574 /// index is in place, and the read-path filter actually hides
1575 /// tombstoned rows from `sample_extension` (and by extension every
1576 /// other query that uses the `query_sample_field` helper).
1577 ///
1578 /// This test is the regression gate that proves Phase 1 of the
1579 /// tombstone design (docs/design-sample-deletion.md) lands the
1580 /// promised infrastructure. Phase 2 will wire the UPDATE path that
1581 /// sets deleted_at; today nothing in app code sets it, so every
1582 /// query continues to return all rows in practice — but tests can
1583 /// set it directly and observe the filter working.
1584 #[test]
1585 fn m019_tombstone_column_and_read_filter() {
1586 let db = Database::open_in_memory().unwrap();
1587 let conn = db.conn();
1588
1589 // Column exists with default NULL.
1590 conn.execute(
1591 "INSERT INTO samples (hash, original_name, file_extension, file_size, \
1592 import_date, last_modified) VALUES \
1593 ('live', 'k.wav', 'wav', 1, 0, 0)",
1594 [],
1595 )
1596 .unwrap();
1597 conn.execute(
1598 "INSERT INTO samples (hash, original_name, file_extension, file_size, \
1599 import_date, last_modified) VALUES \
1600 ('tomb', 't.wav', 'wav', 1, 0, 0)",
1601 [],
1602 )
1603 .unwrap();
1604 conn.execute(
1605 "UPDATE samples SET deleted_at = 1700000000 WHERE hash = 'tomb'",
1606 [],
1607 )
1608 .unwrap();
1609
1610 // sample_extension reads via query_sample_field, which now filters
1611 // out tombstoned rows.
1612 let live_ext = crate::store::sample_extension(&db, "live").unwrap();
1613 assert_eq!(live_ext, "wav");
1614
1615 let tomb_ext = crate::store::sample_extension(&db, "tomb");
1616 assert!(
1617 matches!(tomb_ext, Err(crate::error::CoreError::SampleNotFound(_))),
1618 "tombstoned sample should be hidden from sample_extension; got {tomb_ext:?}"
1619 );
1620
1621 // storage_stats also applies the read-path filter: only the live sample
1622 // (file_size 1) is counted, not the tombstoned one.
1623 let (count, bytes) = db.storage_stats().unwrap();
1624 assert_eq!(count, 1, "tombstoned sample should not be counted");
1625 assert_eq!(bytes, 1, "tombstoned sample's bytes should be excluded");
1626
1627 // Default retain-days seed is present.
1628 let retain: String = conn
1629 .query_row(
1630 "SELECT value FROM user_config WHERE key = 'sample_tombstone_retain_days'",
1631 [],
1632 |r| r.get(0),
1633 )
1634 .unwrap();
1635 assert_eq!(retain, "30");
1636
1637 // Partial index exists.
1638 let idx_count: i64 = conn
1639 .query_row(
1640 "SELECT COUNT(*) FROM sqlite_master \
1641 WHERE type = 'index' AND name = 'idx_samples_deleted_at'",
1642 [],
1643 |r| r.get(0),
1644 )
1645 .unwrap();
1646 assert_eq!(idx_count, 1);
1647 }
1648
1649 /// Recovery branch contract: when the non-ALTER batch fails for a
1650 /// reason OTHER than "already exists", `migrate()` must roll back and
1651 /// surface the error, NOT bump `user_version` past the failed
1652 /// migration. Prior behavior was a silent `tracing::warn!` followed by
1653 /// a `user_version` bump, which left a partially applied schema
1654 /// invisible to future open() calls.
1655 ///
1656 /// Simulates the failure mode by:
1657 /// 1. Bringing the DB up to current version.
1658 /// 2. Injecting an inline migration (M999) whose non-ALTER body
1659 /// references a non-existent table, AND prepending an ALTER on a
1660 /// column that already exists — that's the duplicate-column trip
1661 /// wire that funnels execution into the recovery branch.
1662 /// 3. Setting user_version back to 18 so the runner attempts M019.
1663 /// 4. Asserting migrate() returns Err and user_version stays at 18.
1664 ///
1665 /// We can't easily inject a new migration into the const array, so we
1666 /// drive the recovery branch by calling the runner inline.
1667 #[test]
1668 fn migrate_recovery_branch_fails_fast_on_non_alter_error() {
1669 use rusqlite::Connection;
1670
1671 let dir = tempfile::tempdir().unwrap();
1672 let path = dir.path().join("audiofiles.db");
1673 let _db = Database::open(&path).unwrap();
1674 drop(_db);
1675
1676 // Reopen with a raw Connection so we can hand-craft the recovery
1677 // scenario without going through migrate().
1678 let conn = Connection::open(&path).unwrap();
1679 register_hash_row_id(&conn).unwrap();
1680 conn.execute_batch("PRAGMA foreign_keys = ON;").unwrap();
1681
1682 // Simulate the recovery-branch logic directly: try a migration
1683 // batch that fails with "duplicate column" (forcing recovery),
1684 // and whose non-ALTER body references a missing table (forcing
1685 // the failure that previously got swallowed).
1686 let bad_sql = "ALTER TABLE samples ADD COLUMN cloud_only INTEGER NOT NULL DEFAULT 0;\n\
1687 INSERT INTO no_such_table_exists (k) VALUES ('x');";
1688 let initial_version: i32 = conn
1689 .query_row("PRAGMA user_version", [], |row| row.get(0))
1690 .unwrap();
1691 assert_eq!(initial_version, 19);
1692
1693 let batch = format!(
1694 "BEGIN;\n{}\nPRAGMA user_version = 999;\nCOMMIT;",
1695 bad_sql
1696 );
1697 let first_err = conn.execute_batch(&batch).unwrap_err();
1698 assert!(
1699 first_err.to_string().contains("duplicate column"),
1700 "expected duplicate-column trip wire, got: {first_err}"
1701 );
1702
1703 // Recovery: ALTER tolerated, non-ALTER must fail loudly.
1704 let _ = conn.execute_batch("ROLLBACK");
1705 conn.execute_batch("BEGIN").unwrap();
1706 // ALTER passes (column exists; tolerated).
1707 let alter = "ALTER TABLE samples ADD COLUMN cloud_only INTEGER NOT NULL DEFAULT 0";
1708 let alter_res = conn.execute_batch(alter);
1709 assert!(alter_res.is_err());
1710 assert!(alter_res.unwrap_err().to_string().contains("duplicate column"));
1711
1712 // Non-ALTER: fail-fast, return error, do not bump user_version.
1713 let non_alter = "INSERT INTO no_such_table_exists (k) VALUES ('x')";
1714 let na_res = conn.execute_batch(non_alter);
1715 assert!(na_res.is_err());
1716 let msg = na_res.unwrap_err().to_string();
1717 assert!(
1718 !msg.contains("already exists"),
1719 "expected a real failure (no such table), got: {msg}"
1720 );
1721
1722 // The fail-fast path rolls back and never reaches the
1723 // user_version bump. Confirm.
1724 conn.execute_batch("ROLLBACK").unwrap();
1725 let after: i32 = conn
1726 .query_row("PRAGMA user_version", [], |row| row.get(0))
1727 .unwrap();
1728 assert_eq!(
1729 after, initial_version,
1730 "user_version must not bump when recovery non-ALTER fails for a real reason"
1731 );
1732 }
1733
1734 /// Companion to `migration_replay_from_version_fifteen_against_full_schema`:
1735 /// rolling user_version back and re-opening must heal to version 18 without
1736 /// silent partial-state. Identical setup; kept as a contract-specific name
1737 /// so a failing test points the reader at the recovery-branch design rather
1738 /// than the broader replay-safety claim.
1739 #[test]
1740 fn migrate_recovery_branch_tolerates_already_exists() {
1741 let dir = tempfile::tempdir().unwrap();
1742 let path = dir.path().join("audiofiles.db");
1743
1744 Database::open(&path).unwrap();
1745 {
1746 let conn = rusqlite::Connection::open(&path).unwrap();
1747 conn.execute_batch("PRAGMA user_version = 15").unwrap();
1748 }
1749 let db = Database::open(&path).unwrap();
1750 let version: i32 = db
1751 .conn()
1752 .query_row("PRAGMA user_version", [], |row| row.get(0))
1753 .unwrap();
1754 assert_eq!(version, 19);
1755 }
1756
1757 #[test]
1758 fn foreign_keys_enforced() {
1759 let db = Database::open_in_memory().unwrap();
1760 // Inserting a vfs_node referencing a non-existent vfs should fail
1761 let result = db.conn().execute(
1762 "INSERT INTO vfs_nodes (vfs_id, name, node_type, created_at) VALUES (999, 'test', 'directory', 0)",
1763 [],
1764 );
1765 assert!(result.is_err());
1766 }
1767
1768 #[test]
1769 fn transaction_commits_on_success() {
1770 let db = Database::open_in_memory().unwrap();
1771 db.transaction(|| {
1772 db.conn().execute(
1773 "INSERT INTO user_config (key, value) VALUES ('test_key', 'test_value')",
1774 [],
1775 )?;
1776 Ok(())
1777 })
1778 .unwrap();
1779
1780 let val: String = db
1781 .conn()
1782 .query_row(
1783 "SELECT value FROM user_config WHERE key = 'test_key'",
1784 [],
1785 |row| row.get(0),
1786 )
1787 .unwrap();
1788 assert_eq!(val, "test_value");
1789 }
1790
1791 #[test]
1792 fn transaction_rolls_back_on_error() {
1793 let db = Database::open_in_memory().unwrap();
1794 let result: Result<(), DbError> = db.transaction(|| {
1795 db.conn().execute(
1796 "INSERT INTO user_config (key, value) VALUES ('rollback_key', 'val')",
1797 [],
1798 )?;
1799 Err(DbError::Sqlite(rusqlite::Error::QueryReturnedNoRows))
1800 });
1801 assert!(result.is_err());
1802
1803 let count: i64 = db
1804 .conn()
1805 .query_row(
1806 "SELECT COUNT(*) FROM user_config WHERE key = 'rollback_key'",
1807 [],
1808 |row| row.get(0),
1809 )
1810 .unwrap();
1811 assert_eq!(count, 0);
1812 }
1813 }
1814