//! Content file export: ZIP archive of audio, covers, videos, versions, and insertions. //! //! Writes the ZIP to a temporary file and uploads via S3 multipart upload, //! so peak memory is O(single_file) regardless of total export size. use std::io::Write; use axum::{ extract::{Query, State}, http::header::HeaderMap, response::{IntoResponse, Response}, }; use serde::Deserialize; use zip::write::SimpleFileOptions; use crate::{ auth::AuthUser, db, error::{AppError, Result, ResultExt}, helpers::is_htmx_request, templates::ExportContentReadyTemplate, AppState, }; use super::export_error_html; /// Max content exports running at once. Each can move up to 2 GB through a /// synchronous zip on the blocking pool; without a cap a burst could saturate /// the blocking pool and stall unrelated `spawn_blocking` work. Excess exports /// queue on the semaphore instead. const MAX_CONCURRENT_EXPORTS: usize = 3; static EXPORT_LIMITER: tokio::sync::Semaphore = tokio::sync::Semaphore::const_new(MAX_CONCURRENT_EXPORTS); /// Query parameters for the content export endpoint. #[derive(Deserialize)] pub(in crate::routes::api) struct ContentExportQuery { /// When set, only export files from this project (useful for large /// catalogs or to stay within the 2GB per-export memory limit). pub project_id: Option, } /// Export content files as a ZIP archive uploaded to S3. /// /// Collects audio, covers, version downloads, and insertion clips, /// bundles them with a README.txt manifest, uploads to S3 as a /// temporary export, and returns a presigned download link. /// /// Pass `?project_id=` to limit the export to a single project /// (insertions are user-scoped and always excluded from per-project exports). #[tracing::instrument(skip_all, name = "exports::export_content")] pub(in crate::routes::api) async fn export_content( State(state): State, headers: HeaderMap, Query(query): Query, AuthUser(user): AuthUser, ) -> Result { let is_htmx = is_htmx_request(&headers); // Hold a concurrency permit for the lifetime of the export so a burst can't // saturate the blocking pool. Acquired before any DB/S3 work, so a queued // request holds no connection while it waits. let _export_permit = EXPORT_LIMITER .acquire() .await .expect("export limiter semaphore is never closed"); let s3 = state.s3.as_ref().ok_or_else(|| { AppError::ServiceUnavailable("File storage is not configured".to_string()) })?; // Collect all S3 keys from items, versions, and insertions let item_keys = db::items::get_user_s3_keys(&state.db, user.id).await?; let version_keys = db::versions::get_user_version_s3_keys(&state.db, user.id).await?; // Build the list of (s3_key, zip_path, db_size) triples. The DB-known file // size lets us enforce the per-file/total caps without a per-file S3 HEAD // round-trip (these columns are written at upload-confirm time). `None` only // for legacy rows missing the size; those fall through to the post-download // total guard. let mut files: Vec<(String, String, Option)> = Vec::new(); for item in &item_keys { if let Some(pid) = query.project_id && item.project_id != pid { continue; } let slug = item.project_slug.as_str(); let title = sanitize_filename(&item.title); if let Some(ref key) = item.audio_s3_key { let ext = extension_from_key(key); files.push((key.clone(), format!("projects/{}/{}.{}", slug, title, ext), item.audio_file_size_bytes)); } if let Some(ref key) = item.cover_s3_key { let ext = extension_from_key(key); files.push((key.clone(), format!("projects/{}/{}-cover.{}", slug, title, ext), item.cover_file_size_bytes)); } if let Some(ref key) = item.video_s3_key { let ext = extension_from_key(key); files.push((key.clone(), format!("projects/{}/{}-video.{}", slug, title, ext), item.video_file_size_bytes)); } } for ver in &version_keys { if let Some(pid) = query.project_id && ver.project_id != pid { continue; } if let Some(ref key) = ver.s3_key { let slug = ver.project_slug.as_str(); let title = sanitize_filename(&ver.item_title); let fname = ver.file_name.as_deref().unwrap_or("file"); files.push((key.clone(), format!("projects/{}/{}/v{}-{}", slug, title, ver.version_number, fname), ver.file_size_bytes)); } } // Insertions are user-scoped (not project-scoped), so only include // them when exporting all content (no project_id filter). if query.project_id.is_none() { let insertions = db::content_insertions::list_insertions(&state.db, user.id).await?; for ins in &insertions { let ext = extension_from_key(&ins.storage_key); let title = sanitize_filename(&ins.title); files.push((ins.storage_key.clone(), format!("insertions/{}.{}", title, ext), Some(ins.file_size))); } } if files.is_empty() { if is_htmx { return Ok(export_error_html("No content files to export.")); } return Err(AppError::BadRequest("No content files to export.".to_string())); } // Write ZIP to a temporary file, downloading files one at a time. // Peak memory is O(largest_single_file) — the ZIP itself lives on disk. let s3_clone = s3.clone(); let username = user.username.to_string(); let tmp_dir = tempfile::tempdir() .context("create temp dir for export")?; let zip_path = tmp_dir.path().join("export.zip"); { // The `zip` crate's IO is synchronous; a single `write_all` of up to // 500 MB (compression is Stored, so this is raw disk IO) would stall a // tokio worker. Every blocking zip operation below runs on the blocking // pool via `spawn_blocking`; the writer is moved in and handed back out // each step. S3 downloads stay async, and peak memory is still // O(largest_single_file) — one file is in RAM at a time. let create_path = zip_path.clone(); let mut zip = tokio::task::spawn_blocking( move || -> std::result::Result<_, std::io::Error> { let zip_file = std::fs::File::create(&create_path)?; Ok(zip::ZipWriter::new(std::io::BufWriter::new(zip_file))) }, ) .await .context("join zip create task")? .context("create export zip file")?; let options = SimpleFileOptions::default() .compression_method(zip::CompressionMethod::Stored); let mut manifest: Vec<(String, i64)> = Vec::new(); let mut total_size: u64 = 0; const MAX_TOTAL_SIZE: u64 = 2 * 1024 * 1024 * 1024; // 2 GB const MAX_FILE_SIZE: u64 = 500 * 1024 * 1024; // 500 MB per file let mut skipped: Vec = Vec::new(); for (s3_key, zip_path_entry, db_size) in &files { // Per-file size pre-check BEFORE downloading so a single 20 GB video // can't blow the heap before the post-download total check fires. // The size comes from the DB column written at upload-confirm — no // S3 HEAD round-trip. The post-download total guard below is the // backstop for any row with a missing (None) size. if let Some(size) = db_size { let size = (*size).max(0) as u64; if size > MAX_FILE_SIZE { skipped.push(format!( "{} (exceeds 500 MB per-file export cap)", zip_path_entry )); continue; } if total_size + size > MAX_TOTAL_SIZE { let msg = "Content export exceeds 2 GB limit. Try exporting a single project instead."; if is_htmx { return Ok(export_error_html(msg)); } return Err(AppError::BadRequest(msg.to_string())); } } match s3_clone.download_object(s3_key).await { Ok(data) => { total_size += data.len() as u64; if total_size > MAX_TOTAL_SIZE { let msg = "Content export exceeds 2 GB limit. Try exporting a single project instead."; if is_htmx { return Ok(export_error_html(msg)); } return Err(AppError::BadRequest(msg.to_string())); } let file_size = data.len() as i64; // Move writer + file bytes onto the blocking pool, write, get // the writer back. `data` drops inside the task afterward, so // only one file is in RAM at a time. let entry = zip_path_entry.clone(); zip = tokio::task::spawn_blocking( move || -> std::result::Result<_, zip::result::ZipError> { zip.start_file(&entry, options)?; zip.write_all(&data)?; Ok(zip) }, ) .await .context("join zip write task")? .context("write file into export zip")?; manifest.push((zip_path_entry.clone(), file_size)); } Err(e) => { tracing::warn!("Failed to download S3 key {}: {}", s3_key, e); skipped.push(zip_path_entry.clone()); } } } if manifest.is_empty() { let msg = "Could not download any files from storage. Please try again later."; if is_htmx { return Ok(export_error_html(msg)); } return Err(AppError::Storage(msg.to_string())); } // Build README.txt as the last ZIP entry (cheap string work, async side) let now = chrono::Utc::now(); let mut readme = format!( "Makenot.work Content Export\n\ Creator: {}\n\ Exported: {}\n\ Files: {}\n\n\ Manifest:\n", username, now.format("%Y-%m-%d %H:%M:%S UTC"), manifest.len(), ); for (path, size) in &manifest { readme.push_str(&format!(" {} ({})\n", path, crate::helpers::format_file_size(*size))); } if !skipped.is_empty() { readme.push_str(&format!("\nSkipped ({} files could not be downloaded):\n", skipped.len())); for path in &skipped { readme.push_str(&format!(" {}\n", path)); } } readme.push_str("\nNote: Git repositories are not included in this export.\n"); readme.push_str("Clone them separately: git clone https://makenot.work/source//.git\n"); // Append README, finalize the central directory, and flush the buffer to // disk — all blocking — off the runtime before the upload reads the file. tokio::task::spawn_blocking(move || -> std::result::Result<(), zip::result::ZipError> { zip.start_file("README.txt", options)?; zip.write_all(readme.as_bytes())?; let buf = zip.finish()?; // Flush BufWriter so all bytes hit the OS file before we upload it. buf.into_inner().map_err(|e| e.into_error())?; Ok(()) }) .await .context("join zip finalize task")? .context("finalize export zip")?; } // Upload ZIP to S3 via multipart upload (streams from disk in 10 MB parts) let timestamp = chrono::Utc::now().format("%Y%m%d-%H%M%S"); let export_key = format!("{}/exports/content-{}.zip", user.id, timestamp); if let Err(e) = s3.upload_multipart(&export_key, "application/zip", &zip_path).await { tracing::error!(error = ?e, "Failed to upload content export ZIP to S3"); if is_htmx { return Ok(export_error_html("Failed to prepare download. Please try again.")); } return Err(e); } // Generate presigned download URL (1 hour) let download_url = match s3.presign_download(&export_key, Some(3600)).await { Ok(url) => url, Err(e) => { tracing::error!(error = ?e, "Failed to generate presigned URL for content export"); if is_htmx { return Ok(export_error_html("Export created but download link failed. Please try again.")); } return Err(e); } }; if is_htmx { return Ok(ExportContentReadyTemplate { download_url }.into_response()); } // Direct API call: redirect to presigned URL Response::builder() .status(303) .header("Location", &download_url) .body("".into()) .context("build export redirect response") } /// Extract file extension from an S3 key (e.g. "user/item/audio/track.mp3" -> "mp3"). fn extension_from_key(key: &str) -> &str { key.rsplit('.').next().unwrap_or("bin") } /// Sanitize a title for use as a filename in the ZIP archive. fn sanitize_filename(name: &str) -> String { name.chars() .map(|c| if c.is_alphanumeric() || c == '-' || c == '_' || c == ' ' { c } else { '_' }) .collect::() .trim() .to_string() } #[cfg(test)] mod tests { use super::*; #[test] fn extension_from_key_mp3() { assert_eq!(extension_from_key("user/item/audio/track.mp3"), "mp3"); } #[test] fn extension_from_key_nested_path() { assert_eq!(extension_from_key("a/b/c/file.tar.gz"), "gz"); } #[test] fn extension_from_key_no_dot_returns_whole_segment() { // rsplit('.').next() returns the whole string when no dot is present assert_eq!(extension_from_key("user/item/audio/noext"), "user/item/audio/noext"); } #[test] fn extension_from_key_empty_returns_bin() { // rsplit('.').next() on "" returns Some(""), which unwrap_or("bin") keeps as "" assert_eq!(extension_from_key(""), ""); } #[test] fn extension_from_key_dot_only() { assert_eq!(extension_from_key("file."), ""); } #[test] fn sanitize_filename_passthrough() { assert_eq!(sanitize_filename("My Track"), "My Track"); } #[test] fn sanitize_filename_special_chars() { assert_eq!(sanitize_filename("hello/world:2"), "hello_world_2"); } #[test] fn sanitize_filename_preserves_hyphens_underscores() { assert_eq!(sanitize_filename("my-file_name"), "my-file_name"); } #[test] fn sanitize_filename_trims_whitespace() { assert_eq!(sanitize_filename(" padded "), "padded"); } #[test] fn sanitize_filename_empty() { assert_eq!(sanitize_filename(""), ""); } #[test] fn sanitize_filename_all_special() { assert_eq!(sanitize_filename("@#$%"), "____"); } }