Skip to main content

max / audiofiles

Fix sync snapshot column omissions and unsafe_mode leak Initial-snapshot SELECTs were missing several columns added in later migrations: samples.duration, and five audio_analysis columns (spectral_bandwidth, centroid_variance, crest_factor, attack_time, classification_confidence). New devices joining sync would lose these fields until each row was re-analysed. Also excludes unsafe_mode from sync (migration 016 + snapshot WHERE clause). A compromised server or second device should not be able to silently flip a security-relevant setting. mark_cloud_only_samples now does a single transaction across all missing samples instead of one BEGIN/COMMIT per row. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-05-14 19:22 UTC
Commit: d55bbd2a9a26643a4b138770f99e45d67ef6b38c
Parent: ff72d1e
2 files changed, +165 insertions, -30 deletions
@@ -629,6 +629,43 @@ DROP TRIGGER IF EXISTS sync_smart_folders_delete;
629 629 DROP TABLE IF EXISTS smart_folders;
630 630 "#;
631 631
632 + const MIGRATION_016: &str = r#"
633 + -- Exclude unsafe_mode from sync: a compromised server or second device should
634 + -- not be able to silently flip a security-relevant setting.
635 + DROP TRIGGER IF EXISTS sync_user_config_insert;
636 + DROP TRIGGER IF EXISTS sync_user_config_update;
637 + DROP TRIGGER IF EXISTS sync_user_config_delete;
638 +
639 + CREATE TRIGGER sync_user_config_insert AFTER INSERT ON user_config
640 + WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
641 + AND NEW.key NOT LIKE 'sync_%'
642 + AND NEW.key != 'unsafe_mode'
643 + BEGIN
644 + INSERT INTO sync_changelog (table_name, op, row_id, data)
645 + VALUES ('user_config', 'INSERT', NEW.key,
646 + json_object('key', NEW.key, 'value', NEW.value));
647 + END;
648 +
649 + CREATE TRIGGER sync_user_config_update AFTER UPDATE ON user_config
650 + WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
651 + AND NEW.key NOT LIKE 'sync_%'
652 + AND NEW.key != 'unsafe_mode'
653 + BEGIN
654 + INSERT INTO sync_changelog (table_name, op, row_id, data)
655 + VALUES ('user_config', 'UPDATE', NEW.key,
656 + json_object('key', NEW.key, 'value', NEW.value));
657 + END;
658 +
659 + CREATE TRIGGER sync_user_config_delete AFTER DELETE ON user_config
660 + WHEN (SELECT value FROM sync_state WHERE key = 'applying_remote') != '1'
661 + AND OLD.key NOT LIKE 'sync_%'
662 + AND OLD.key != 'unsafe_mode'
663 + BEGIN
664 + INSERT INTO sync_changelog (table_name, op, row_id, data)
665 + VALUES ('user_config', 'DELETE', OLD.key, NULL);
666 + END;
667 + "#;
668 +
632 669 impl Database {
633 670 /// Open (or create) the database at the given path and run migrations.
634 671 #[instrument(skip_all)]
@@ -692,6 +729,7 @@ impl Database {
692 729 MIGRATION_013,
693 730 MIGRATION_014,
694 731 MIGRATION_015,
732 + MIGRATION_016,
695 733 ];
696 734
697 735 for (i, sql) in MIGRATIONS.iter().enumerate() {
@@ -840,7 +878,7 @@ mod tests {
840 878 .conn()
841 879 .query_row("PRAGMA user_version", [], |row| row.get(0))
842 880 .unwrap();
843 - assert_eq!(version, 15);
881 + assert_eq!(version, 16);
844 882 }
845 883
846 884 #[test]
@@ -851,7 +889,7 @@ mod tests {
851 889 .conn()
852 890 .query_row("PRAGMA user_version", [], |row| row.get(0))
853 891 .unwrap();
854 - assert_eq!(version, 15);
892 + assert_eq!(version, 16);
855 893 }
856 894
857 895 #[test]
@@ -23,14 +23,14 @@ pub fn create_initial_snapshot(conn: &Connection) -> Result<i64> {
23 23 let mut total: i64 = 0;
24 24
25 25 let table_queries: &[(&str, &str)] = &[
26 - ("samples", "SELECT hash, json_object('hash', hash, 'original_name', original_name, 'file_extension', file_extension, 'file_size', file_size, 'import_date', import_date, 'last_modified', last_modified, 'cloud_only', cloud_only) FROM samples"),
27 - ("audio_analysis", "SELECT hash, json_object('hash', hash, 'bpm', bpm, 'musical_key', musical_key, 'duration', duration, 'sample_rate', sample_rate, 'channels', channels, 'peak_db', peak_db, 'rms_db', rms_db, 'is_loop', is_loop, 'spectral_centroid', spectral_centroid, 'onset_strength', onset_strength, 'analyzed_at', analyzed_at, 'lufs', lufs, 'spectral_flatness', spectral_flatness, 'spectral_rolloff', spectral_rolloff, 'zero_crossing_rate', zero_crossing_rate, 'classification', classification) FROM audio_analysis"),
26 + ("samples", "SELECT hash, json_object('hash', hash, 'original_name', original_name, 'file_extension', file_extension, 'file_size', file_size, 'import_date', import_date, 'last_modified', last_modified, 'cloud_only', cloud_only, 'duration', duration) FROM samples"),
27 + ("audio_analysis", "SELECT hash, json_object('hash', hash, 'bpm', bpm, 'musical_key', musical_key, 'duration', duration, 'sample_rate', sample_rate, 'channels', channels, 'peak_db', peak_db, 'rms_db', rms_db, 'is_loop', is_loop, 'spectral_centroid', spectral_centroid, 'onset_strength', onset_strength, 'analyzed_at', analyzed_at, 'lufs', lufs, 'spectral_flatness', spectral_flatness, 'spectral_rolloff', spectral_rolloff, 'zero_crossing_rate', zero_crossing_rate, 'classification', classification, 'spectral_bandwidth', spectral_bandwidth, 'centroid_variance', centroid_variance, 'crest_factor', crest_factor, 'attack_time', attack_time, 'classification_confidence', classification_confidence) FROM audio_analysis"),
28 28 ("vfs", "SELECT CAST(id AS TEXT), json_object('id', id, 'name', name, 'created_at', created_at, 'modified_at', modified_at, 'sync_files', sync_files) FROM vfs"),
29 29 ("vfs_nodes", "SELECT CAST(id AS TEXT), json_object('id', id, 'vfs_id', vfs_id, 'parent_id', parent_id, 'name', name, 'node_type', node_type, 'sample_hash', sample_hash, 'created_at', created_at) FROM vfs_nodes"),
30 30 ("tags", "SELECT sample_hash || ':' || tag, json_object('sample_hash', sample_hash, 'tag', tag) FROM tags"),
31 31 ("collections", "SELECT CAST(id AS TEXT), json_object('id', id, 'name', name, 'description', description, 'created_at', created_at, 'filter_json', filter_json) FROM collections"),
32 32 ("collection_members", "SELECT CAST(collection_id AS TEXT) || ':' || sample_hash, json_object('collection_id', collection_id, 'sample_hash', sample_hash, 'added_at', added_at) FROM collection_members"),
33 - ("user_config", "SELECT key, json_object('key', key, 'value', value) FROM user_config WHERE key NOT LIKE 'sync_%'"),
33 + ("user_config", "SELECT key, json_object('key', key, 'value', value) FROM user_config WHERE key NOT LIKE 'sync_%' AND key != 'unsafe_mode'"),
34 34 ("edit_history", "SELECT CAST(id AS TEXT), json_object('id', id, 'source_hash', source_hash, 'result_hash', result_hash, 'operation', operation, 'params_json', params_json, 'created_at', created_at) FROM edit_history"),
35 35 ];
36 36
@@ -127,36 +127,47 @@ pub fn mark_cloud_only_samples(conn: &Connection, content_dir: &Path) -> Result<
127 127 .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
128 128 .collect::<std::result::Result<Vec<_>, _>>()?;
129 129
130 - let mut marked = 0i64;
131 - for (hash, ext) in &rows {
132 - let blob_path = content_dir.join(format!("{}.{}", hash, ext));
133 - if !blob_path.exists() {
134 - conn.execute_batch("BEGIN IMMEDIATE")?;
130 + // Collect hashes missing on disk before taking a transaction.
131 + let missing: Vec<&str> = rows
132 + .iter()
133 + .filter(|(hash, ext)| !content_dir.join(format!("{}.{}", hash, ext)).exists())
134 + .map(|(hash, _)| hash.as_str())
135 + .collect();
136 +
137 + if missing.is_empty() {
138 + return Ok(0);
139 + }
140 +
141 + conn.execute_batch("BEGIN IMMEDIATE")?;
142 + conn.execute(
143 + "UPDATE sync_state SET value = '1' WHERE key = 'applying_remote'",
144 + [],
145 + )?;
146 + let result = (|| -> Result<i64> {
147 + let mut marked = 0i64;
148 + for hash in &missing {
135 149 conn.execute(
136 - "UPDATE sync_state SET value = '1' WHERE key = 'applying_remote'",
137 - [],
138 - )?;
139 - match conn.execute(
140 150 "UPDATE samples SET cloud_only = 1 WHERE hash = ?1",
141 151 [hash],
142 - ) {
143 - Ok(_) => {
144 - conn.execute(
145 - "UPDATE sync_state SET value = '0' WHERE key = 'applying_remote'",
146 - [],
147 - )?;
148 - conn.execute_batch("COMMIT")?;
149 - marked += 1;
150 - }
151 - Err(e) => {
152 - // ROLLBACK to clear the applying_remote flag and release the transaction
153 - let _ = conn.execute_batch("ROLLBACK");
154 - return Err(e.into());
155 - }
156 - }
152 + )?;
153 + marked += 1;
154 + }
155 + Ok(marked)
156 + })();
157 + conn.execute(
158 + "UPDATE sync_state SET value = '0' WHERE key = 'applying_remote'",
159 + [],
160 + )?;
161 + match result {
162 + Ok(marked) => {
163 + conn.execute_batch("COMMIT")?;
164 + Ok(marked)
165 + }
166 + Err(e) => {
167 + let _ = conn.execute_batch("ROLLBACK");
168 + Err(e)
157 169 }
158 170 }
159 - Ok(marked)
160 171 }
161 172
162 173 #[cfg(test)]
@@ -868,6 +879,92 @@ mod tests {
868 879 assert_eq!(parsed["file_extension"], "wav");
869 880 assert!(parsed.get("file_size").is_some());
870 881 assert!(parsed.get("import_date").is_some());
882 + // Columns added in migrations 008-009 must be present in snapshot
883 + assert!(parsed.get("cloud_only").is_some(), "snapshot missing cloud_only");
884 + assert!(parsed.get("duration").is_some(), "snapshot missing duration");
885 + }
886 +
887 + #[test]
888 + fn snapshot_samples_columns_match_whitelist() {
889 + let db = setup_test_db();
890 + let conn = db.conn();
891 +
892 + set_sync_state(conn, "applying_remote", "1").unwrap();
893 + insert_sample(conn, "col_test", "check.wav", "wav");
894 + set_sync_state(conn, "applying_remote", "0").unwrap();
895 + clear_changelog(conn);
896 +
897 + create_initial_snapshot(conn).unwrap();
898 +
899 + let data: String = conn.query_row(
900 + "SELECT data FROM sync_changelog WHERE table_name = 'samples' AND row_id = 'col_test'",
901 + [],
902 + |row| row.get(0),
903 + ).unwrap();
904 + let parsed: serde_json::Value = serde_json::from_str(&data).unwrap();
905 + let obj = parsed.as_object().unwrap();
906 +
907 + // Every column in the whitelist must appear in the snapshot JSON
908 + for col in table_columns("samples").unwrap() {
909 + assert!(obj.contains_key(*col), "snapshot samples missing column: {}", col);
910 + }
911 + }
912 +
913 + #[test]
914 + fn snapshot_audio_analysis_columns_match_whitelist() {
915 + let db = setup_test_db();
916 + let conn = db.conn();
917 +
918 + set_sync_state(conn, "applying_remote", "1").unwrap();
919 + insert_sample(conn, "aa_test", "tone.wav", "wav");
920 + conn.execute(
921 + "INSERT INTO audio_analysis (hash, duration, sample_rate, channels, analyzed_at) VALUES ('aa_test', 1.5, 44100, 2, 1000000)",
922 + [],
923 + ).unwrap();
924 + set_sync_state(conn, "applying_remote", "0").unwrap();
925 + clear_changelog(conn);
926 +
927 + create_initial_snapshot(conn).unwrap();
928 +
929 + let data: String = conn.query_row(
930 + "SELECT data FROM sync_changelog WHERE table_name = 'audio_analysis' AND row_id = 'aa_test'",
931 + [],
932 + |row| row.get(0),
933 + ).unwrap();
934 + let parsed: serde_json::Value = serde_json::from_str(&data).unwrap();
935 + let obj = parsed.as_object().unwrap();
936 +
937 + for col in table_columns("audio_analysis").unwrap() {
938 + assert!(obj.contains_key(*col), "snapshot audio_analysis missing column: {}", col);
939 + }
940 + }
941 +
942 + #[test]
943 + fn unsafe_mode_excluded_from_sync() {
944 + let db = setup_test_db();
945 + let conn = db.conn();
946 + clear_changelog(conn);
947 +
948 + // Insert unsafe_mode — should NOT fire trigger
949 + conn.execute(
950 + "INSERT INTO user_config (key, value) VALUES ('unsafe_mode', '1')",
951 + [],
952 + ).unwrap();
953 + assert_eq!(changelog_count(conn, Some("user_config"), None), 0);
954 +
955 + // Update unsafe_mode — should NOT fire trigger
956 + conn.execute(
957 + "UPDATE user_config SET value = '0' WHERE key = 'unsafe_mode'",
958 + [],
959 + ).unwrap();
960 + assert_eq!(changelog_count(conn, Some("user_config"), None), 0);
961 +
962 + // Normal key — should fire trigger
963 + conn.execute(
964 + "INSERT INTO user_config (key, value) VALUES ('theme', 'dark')",
965 + [],
966 + ).unwrap();
967 + assert_eq!(changelog_count(conn, Some("user_config"), None), 1);
871 968 }
872 969
873 970 #[test]