From 12a566a07a64c2c34ee8eb2e68c6632c12db9e44 Mon Sep 17 00:00:00 2001 From: eu Date: Sat, 28 Mar 2026 23:16:31 -0400 Subject: [PATCH 01/13] fixes for extractAudioToFile() to WAV --- android/gradlew | 0 .../src/features/audio/ExtractAudio.kt | 156 +++++- .../src/features/audio/WavFileWriter.kt | 502 ++++++++++++++++++ .../integration_test/audio_extract_test.dart | 39 +- .../audio/audio_extract_example_page.dart | 26 +- example/pubspec.yaml | 1 + .../src/features/audio/ExtractAudio.swift | 19 +- .../audio/models/AudioExtractConfig.swift | 3 +- .../src/features/audio/ExtractAudio.swift | 19 +- .../audio/models/AudioExtractConfig.swift | 3 +- 10 files changed, 734 insertions(+), 34 deletions(-) mode change 100644 => 100755 android/gradlew create mode 100644 android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt diff --git a/android/gradlew b/android/gradlew old mode 100644 new mode 100755 diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index d37c5fc8..8ad6d922 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -22,12 +22,15 @@ class NoAudioTrackException(message: String) : Exception(message) /** * Service for extracting audio from video files. * - * This class handles the audio extraction pipeline using Android MediaExtractor and MediaMuxer: + * This class handles the audio extraction pipeline: * - Extracts audio track from video file * - Supports trimming (start/end time) * - Supports multiple output formats (MP3, AAC, WAV, M4A, OGG) * - Provides progress tracking during extraction * - Supports cancellation of active extraction jobs + * + * For WAV format, uses custom WAV file writer with PCM encoding. + * For other formats, uses Android MediaExtractor and MediaMuxer. */ class ExtractAudio(private val context: Context) { @@ -54,6 +57,153 @@ class ExtractAudio(private val context: Context) { onProgress: (Double) -> Unit, onComplete: (ByteArray?) -> Unit, onError: (Throwable) -> Unit + ): AudioExtractJobHandle { + return if (config.format.lowercase() == "wav") { + // WAV format requires special handling + extractToWav(config, onProgress, onComplete, onError) + } else { + // Use MediaMuxer for other formats + extractWithMuxer(config, onProgress, onComplete, onError) + } + } + + /** + * Extracts audio to WAV format using custom WAV file writer. + * + * This method properly handles both compressed and PCM audio formats: + * - Compressed formats (AAC, MP3): Uses MediaCodec to decode to PCM + * - PCM formats: Writes directly to WAV file + */ + private fun extractToWav( + config: AudioExtractConfig, + onProgress: (Double) -> Unit, + onComplete: (ByteArray?) -> Unit, + onError: (Throwable) -> Unit + ): AudioExtractJobHandle { + val shouldStop = AtomicBoolean(false) + val mainHandler = Handler(Looper.getMainLooper()) + + // Determine output file location + val outputFile = if (config.outputPath != null) { + File(config.outputPath) + } else { + File( + context.cacheDir, + "audio_output_${System.currentTimeMillis()}.wav" + ) + } + + // Run extraction in background thread + Thread { + var extractor: MediaExtractor? = null + var wavWriter: WavFileWriter? = null + + try { + // Initialize extractor + extractor = MediaExtractor() + extractor.setDataSource(config.inputPath) + + // Find audio track + val audioTrackIndex = findAudioTrack(extractor) + if (audioTrackIndex < 0) { + throw NoAudioTrackException("No audio track found in video file") + } + + val audioFormat = extractor.getTrackFormat(audioTrackIndex) + + // Get duration for progress tracking + val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION) + val startUs = config.startUs ?: 0L + val endUs = config.endUs ?: durationUs + + // Validate end time + if (endUs <= startUs) { + throw IllegalArgumentException("endUs must be greater than startUs") + } + + // Create WAV writer (handles both compressed and PCM audio) + wavWriter = WavFileWriter(outputFile) + + mainHandler.post { onProgress(0.0) } + + wavWriter.extractAndWrite( + extractor = extractor, + audioTrackIndex = audioTrackIndex, + startUs = startUs, + endUs = endUs, + onProgress = { progress -> + if (!shouldStop.get()) { + mainHandler.post { onProgress(progress) } + } + }, + shouldStop = { shouldStop.get() } + ) + + // Check if cancelled + if (shouldStop.get()) { + outputFile.delete() + throw InterruptedException("Extraction cancelled by user") + } + + extractor.release() + extractor = null + + // Read output and invoke completion callback + mainHandler.post { + try { + if (config.outputPath != null) { + // Output saved to file, return null + onComplete(null) + } else { + // Read temporary file and return bytes + val resultBytes = outputFile.readBytes() + onComplete(resultBytes) + } + } catch (e: Exception) { + onError(e) + } finally { + if (config.outputPath == null) { + outputFile.delete() + } + } + } + + } catch (e: Exception) { + Log.e(TAG, "Error extracting WAV audio: ${e.message}", e) + mainHandler.post { + onError(e) + } + // Clean up output file on error + if (outputFile.exists()) { + outputFile.delete() + } + } finally { + try { + extractor?.release() + } catch (e: Exception) { + Log.w(TAG, "Error releasing extractor: ${e.message}") + } + } + }.start() + + // Return cancellation handle + return AudioExtractJobHandle { + shouldStop.set(true) + mainHandler.removeCallbacksAndMessages(null) + if (outputFile.exists()) { + outputFile.delete() + } + } + } + + /** + * Extracts audio using MediaMuxer for non-WAV formats. + */ + private fun extractWithMuxer( + config: AudioExtractConfig, + onProgress: (Double) -> Unit, + onComplete: (ByteArray?) -> Unit, + onError: (Throwable) -> Unit ): AudioExtractJobHandle { val shouldStop = AtomicBoolean(false) val mainHandler = Handler(Looper.getMainLooper()) @@ -245,7 +395,7 @@ class ExtractAudio(private val context: Context) { /** * Determines the MediaMuxer output format based on the requested audio format. * - * @param format Audio format string (mp3, aac, wav, m4a, ogg) + * @param format Audio format string (mp3, aac, m4a, ogg) * @return MediaMuxer output format constant */ private fun determineOutputFormat(format: String): Int { @@ -253,7 +403,7 @@ class ExtractAudio(private val context: Context) { "mp3" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 "aac" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 "m4a" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 - "wav" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 + "wav" -> throw IllegalArgumentException("WAV format should be handled by extractToWav()") "ogg" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_OGG "webm" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_WEBM else -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 // Default to MP4 container diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt new file mode 100644 index 00000000..d22fbd80 --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt @@ -0,0 +1,502 @@ +package ch.waio.pro_video_editor.src.features.audio + +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import java.io.File +import java.io.FileOutputStream +import java.io.RandomAccessFile +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Utility class for writing WAV audio files with proper RIFF/WAVE format. + * + * This class handles the creation of WAV files by: + * - Writing a proper RIFF/WAVE header + * - Decoding compressed audio to PCM samples + * - Writing PCM data in the correct format + * - Updating the file size fields after writing + */ +class WavFileWriter(private val outputFile: File) { + + companion object { + // Magic numbers for WAV format + // Note: These are stored in little-endian byte order (reversed) so that when + // written via a LITTLE_ENDIAN ByteBuffer, they appear correctly in the file. + private const val RIFF_HEADER = 0x46464952 // "RIFF" in little-endian + private const val WAVE_HEADER = 0x45564157 // "WAVE" in little-endian + private const val FMT_HEADER = 0x20746d66 // "fmt " in little-endian + private const val DATA_HEADER = 0x61746164 // "data" in little-endian + private const val PCM_FORMAT = 1.toShort() + private const val IEEE_FLOAT_FORMAT = 3.toShort() + private const val BUFFER_SIZE = 1024 * 1024 // 1MB buffer + } + + private var sampleRate: Int = 44100 + private var numChannels: Int = 2 + private var bitsPerSample: Int = 16 + private var isFloatPcm: Boolean = false + private var totalDataSize: Long = 0 + + /** + * Extracts audio from a video file and writes it as a WAV file. + * + * @param extractor MediaExtractor configured with the source video + * @param audioTrackIndex Index of the audio track in the extractor + * @param startUs Optional start time in microseconds + * @param endUs Optional end time in microseconds + * @param onProgress Progress callback (0.0 to 1.0) + * @param shouldStop Atomic boolean to check for cancellation + * @throws Exception if extraction or writing fails + */ + fun extractAndWrite( + extractor: MediaExtractor, + audioTrackIndex: Int, + startUs: Long = 0L, + endUs: Long = Long.MAX_VALUE, + onProgress: (Double) -> Unit = {}, + shouldStop: () -> Boolean = { false } + ) { + val audioFormat = extractor.getTrackFormat(audioTrackIndex) + + sampleRate = audioFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + numChannels = audioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + bitsPerSample = 16 // PCM 16-bit is standard + + val mime = audioFormat.getString(MediaFormat.KEY_MIME) ?: throw IllegalArgumentException("No MIME type in audio format") + + val isPcm = mime.equals("audio/raw", ignoreCase = true) || + mime.equals("audio/pcm", ignoreCase = true) + if (isPcm) { + extractPcmToWav(extractor, audioTrackIndex, startUs, endUs, onProgress, shouldStop) + } else { + extractAndDecodeToWav(extractor, audioFormat, audioTrackIndex, startUs, endUs, onProgress, shouldStop) + } + } + + /** + * Extracts compressed audio and decodes it to PCM WAV format. + */ + private fun extractAndDecodeToWav( + extractor: MediaExtractor, + audioFormat: MediaFormat, + audioTrackIndex: Int, + startUs: Long, + endUs: Long, + onProgress: (Double) -> Unit, + shouldStop: () -> Boolean + ) { + var decoder: MediaCodec? = null + var outputStream: FileOutputStream? = null + + try { + totalDataSize = 0 + + val mime = audioFormat.getString(MediaFormat.KEY_MIME)!! + decoder = MediaCodec.createDecoderByType(mime) + decoder.configure(audioFormat, null, null, 0) + decoder.start() + + // Get the decoder's ACTUAL output format immediately after starting + // don't rely on INFO_OUTPUT_FORMAT_CHANGED which may not be sent + val decoderOutputFormat = decoder.outputFormat + + // output parameters from decoder + sampleRate = if (decoderOutputFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + decoderOutputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } else { + audioFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } + + numChannels = if (decoderOutputFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + decoderOutputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } else { + audioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } + + // Determine bits per sample from PCM encoding + // Convert float PCM to 16-bit integer for compatibility + if (decoderOutputFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = decoderOutputFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + bitsPerSample = 16 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_8BIT -> { + bitsPerSample = 8 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for better compatibility + bitsPerSample = 16 + isFloatPcm = true + } + else -> { + bitsPerSample = 16 + isFloatPcm = false + } + } + } else { + bitsPerSample = 16 + isFloatPcm = false + } + + extractor.selectTrack(audioTrackIndex) + + if (startUs > 0) { + extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + + outputStream = FileOutputStream(outputFile) + + // Write initial WAV header with decoder output format + // We'll update the sizes at the end + writeWavHeader(outputStream, 0) + + val validEndUs = if (endUs == Long.MAX_VALUE) Long.MAX_VALUE else endUs + val totalDurationUs = if (validEndUs == Long.MAX_VALUE) Long.MAX_VALUE else (validEndUs - startUs) + var currentTimeUs = startUs + var inputEos = false + var outputEos = false + + onProgress(0.0) + + while (!outputEos && !shouldStop()) { + if (!inputEos) { + val inputBufferId = decoder.dequeueInputBuffer(10000) + if (inputBufferId >= 0) { + val decoderInputBuffer = decoder.getInputBuffer(inputBufferId)!! + decoderInputBuffer.clear() + + val sampleSize = extractor.readSampleData(decoderInputBuffer, 0) + val presentationTimeUs = extractor.sampleTime + + if (sampleSize < 0 || presentationTimeUs > validEndUs) { + // End of stream or reached end time + decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM) + inputEos = true + } else { + decoder.queueInputBuffer(inputBufferId, 0, sampleSize, presentationTimeUs, 0) + extractor.advance() + currentTimeUs = presentationTimeUs + + if (totalDurationUs != Long.MAX_VALUE) { + val progress = ((currentTimeUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + onProgress(progress) + } + } + } + } + + val bufferInfo = MediaCodec.BufferInfo() + val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000) + + when { + outputBufferId == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> { + // Output format changed during decoding - update our parameters + val newOutputFormat = decoder.outputFormat + + var formatChanged = false + if (newOutputFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + val newSampleRate = newOutputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + if (newSampleRate != sampleRate) { + sampleRate = newSampleRate + formatChanged = true + } + } + if (newOutputFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + val newChannels = newOutputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + if (newChannels != numChannels) { + numChannels = newChannels + formatChanged = true + } + } + if (newOutputFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = newOutputFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + if (bitsPerSample != 16 || isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = false + formatChanged = true + } + } + AudioFormat.ENCODING_PCM_8BIT -> { + if (bitsPerSample != 8 || isFloatPcm) { + bitsPerSample = 8 + isFloatPcm = false + formatChanged = true + } + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for compatibility + if (bitsPerSample != 16 || !isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = true + formatChanged = true + } + } + else -> { + if (bitsPerSample != 16 || isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = false + formatChanged = true + } + } + } + } + + // If format changed, rewrite the header + if (formatChanged) { + outputStream.flush() + val raf = RandomAccessFile(outputFile, "rw") + raf.seek(0L) + + val byteRate = sampleRate * numChannels * bitsPerSample / 8 + val blockAlign = (numChannels * bitsPerSample / 8).toShort() + + val headerBytes = ByteBuffer.allocate(44).apply { + // All values written in little-endian order + // Magic number constants are pre-encoded for little-endian + order(ByteOrder.LITTLE_ENDIAN) + putInt(RIFF_HEADER) + putInt(0) // Will update at end + putInt(WAVE_HEADER) + + putInt(FMT_HEADER) + putInt(16) + putShort(PCM_FORMAT) + putShort(numChannels.toShort()) + putInt(sampleRate) + putInt(byteRate) + putShort(blockAlign) + putShort(bitsPerSample.toShort()) + + putInt(DATA_HEADER) + putInt(0) // Will update at end + }.array() + + raf.write(headerBytes) + raf.close() + } + } + outputBufferId >= 0 -> { + val decoderOutputBuffer = decoder.getOutputBuffer(outputBufferId)!! + + if (bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) { + outputEos = true + } + + if (bufferInfo.size > 0) { + // Get PCM data from decoder output buffer + decoderOutputBuffer.position(bufferInfo.offset) + decoderOutputBuffer.limit(bufferInfo.offset + bufferInfo.size) + + if (isFloatPcm) { + // Convert float PCM to 16-bit integer PCM + val floatSamples = bufferInfo.size / 4 // 4 bytes per float + val int16Buffer = ByteBuffer.allocate(floatSamples * 2) // 2 bytes per int16 + int16Buffer.order(ByteOrder.LITTLE_ENDIAN) + + for (i in 0 until floatSamples) { + val floatValue = decoderOutputBuffer.float + // Clamp and convert float [-1.0, 1.0] to int16 [-32768, 32767] + val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() + int16Buffer.putShort(intValue) + } + + outputStream.write(int16Buffer.array()) + totalDataSize += int16Buffer.array().size + } else { + // Write PCM data directly (already in correct format) + val pcmData = ByteArray(bufferInfo.size) + decoderOutputBuffer.get(pcmData) + outputStream.write(pcmData) + totalDataSize += pcmData.size + } + } + + decoder.releaseOutputBuffer(outputBufferId, false) + } + } + } + + outputStream.flush() + outputStream.close() + outputStream = null + + // Update WAV header with actual sizes + updateWavHeader() + + onProgress(1.0) + + } finally { + outputStream?.close() + decoder?.stop() + decoder?.release() + } + } + + /** + * Extracts PCM audio directly without decoding. + */ + private fun extractPcmToWav( + extractor: MediaExtractor, + audioTrackIndex: Int, + startUs: Long, + endUs: Long, + onProgress: (Double) -> Unit, + shouldStop: () -> Boolean + ) { + var outputStream: FileOutputStream? = null + + try { + // Reset total data size for this extraction + totalDataSize = 0 + + // Select the audio track in the extractor + extractor.selectTrack(audioTrackIndex) + + if (startUs > 0) { + extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + + outputStream = FileOutputStream(outputFile) + + // Write placeholder WAV header + writeWavHeader(outputStream, 0) + + val buffer = ByteBuffer.allocate(BUFFER_SIZE) + val validEndUs = if (endUs == Long.MAX_VALUE) Long.MAX_VALUE else endUs + val totalDurationUs = if (validEndUs == Long.MAX_VALUE) Long.MAX_VALUE else (validEndUs - startUs) + var currentTimeUs = startUs + + onProgress(0.0) + + while (!shouldStop()) { + buffer.clear() + val sampleSize = extractor.readSampleData(buffer, 0) + + if (sampleSize < 0) { + // End of stream + break + } + + val presentationTimeUs = extractor.sampleTime + + if (presentationTimeUs > validEndUs) { + break + } + + buffer.position(0) + buffer.limit(sampleSize) + + val pcmData = ByteArray(sampleSize) + buffer.get(pcmData) + outputStream.write(pcmData) + totalDataSize += sampleSize + + currentTimeUs = presentationTimeUs + if (totalDurationUs != Long.MAX_VALUE) { + val progress = ((currentTimeUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + onProgress(progress) + } + + extractor.advance() + } + + outputStream.flush() + outputStream.close() + outputStream = null + + // Update WAV header with actual sizes + updateWavHeader() + + onProgress(1.0) + + } finally { + outputStream?.close() + } + } + + /** + * Writes a WAV file header with RIFF/WAVE format. + * + * Note: RIFF files have a 4GB limit due to 32-bit size fields. This is a WAV format limitation. + */ + private fun writeWavHeader(outputStream: FileOutputStream, dataSize: Long) { + val header = ByteBuffer.allocate(44) + header.order(ByteOrder.LITTLE_ENDIAN) + + val byteRate = sampleRate * numChannels * bitsPerSample / 8 + val blockAlign = (numChannels * bitsPerSample / 8).toShort() + + // Ensure dataSize doesn't exceed 32-bit signed integer limit for WAV format + // WAV files are limited to 4GB due to 32-bit size fields in RIFF format + val safeSizeForHeader = if (dataSize > 0x7FFFFFFFL) { + 0x7FFFFFFF + } else { + dataSize.toInt() + } + + // RIFF header - all values written in little-endian order + // Magic number constants are pre-encoded for little-endian + header.putInt(RIFF_HEADER) // "RIFF" + header.putInt(36 + safeSizeForHeader) // File size - 8 + header.putInt(WAVE_HEADER) // "WAVE" + + // fmt sub-chunk + header.putInt(FMT_HEADER) // "fmt " + header.putInt(16) // Sub-chunk size (16 for PCM) + header.putShort(PCM_FORMAT) // Audio format (1 = PCM) + header.putShort(numChannels.toShort()) // Number of channels + header.putInt(sampleRate) // Sample rate + header.putInt(byteRate) // Byte rate + header.putShort(blockAlign) // Block align + header.putShort(bitsPerSample.toShort()) // Bits per sample + + // data sub-chunk + header.putInt(DATA_HEADER) // "data" + header.putInt(safeSizeForHeader) // Data size + + outputStream.write(header.array()) + } + + /** + * Updates the WAV header with the actual file sizes after writing is complete. + * + * Note: WAV files are limited to 4GB due to 32-bit size fields. If the file exceeds this, + * the header size fields will be clamped to the maximum 32-bit signed integer value. + */ + private fun updateWavHeader() { + // Ensure totalDataSize doesn't exceed 32-bit limit + val safeSizeForHeader = if (totalDataSize > 0x7FFFFFFFL) { + 0x7FFFFFFF + } else { + totalDataSize.toInt() + } + + RandomAccessFile(outputFile, "rw").use { raf -> + // Update file size at byte 4 (RIFF chunk size = 36 + data size) + // RIFF chunk size = file size - 8 bytes + raf.seek(4L) + val riffSize = 36 + safeSizeForHeader + raf.write(ByteBuffer.allocate(4).apply { + order(ByteOrder.LITTLE_ENDIAN) + putInt(riffSize) + flip() + }.array()) + + // Update data size at byte 40 (data chunk size) + raf.seek(40L) + raf.write(ByteBuffer.allocate(4).apply { + order(ByteOrder.LITTLE_ENDIAN) + putInt(safeSizeForHeader) + flip() + }.array()) + } + } +} diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index 841166d2..08c0ebe5 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -3,6 +3,7 @@ import 'dart:io'; import 'package:flutter/foundation.dart'; import 'package:flutter_test/flutter_test.dart'; import 'package:integration_test/integration_test.dart'; +import 'package:mime/mime.dart'; import 'package:path_provider/path_provider.dart'; import 'package:pro_video_editor/pro_video_editor.dart'; import 'package:pro_video_editor_example/core/constants/example_constants.dart'; @@ -17,6 +18,8 @@ void main() { // Audio extraction is not supported on Web, Windows, and Linux yet final skipPlatform = kIsWeb || isWindows || isLinux; + final pve = ProVideoEditor.instance; + /// Helper to check if a format is supported on current platform bool isFormatSupported(AudioFormat format) { if (kIsWeb) return false; @@ -24,9 +27,7 @@ void main() { switch (format) { case AudioFormat.mp3: return Platform.isAndroid; // MP3 only on Android - case AudioFormat.aac: - case AudioFormat.m4a: - case AudioFormat.wav: + case AudioFormat.aac || AudioFormat.m4a || AudioFormat.wav: return Platform.isAndroid || Platform.isIOS || Platform.isMacOS; case AudioFormat.caf: return Platform.isIOS || Platform.isMacOS; // CAF only on Apple @@ -41,15 +42,8 @@ void main() { final outputPath = '${directory.path}/test_audio_${DateTime.now().millisecondsSinceEpoch}.${format.extension}'; - final config = AudioExtractConfigs( - video: testVideo, - format: format, - ); - - final result = await ProVideoEditor.instance.extractAudioToFile( - outputPath, - config, - ); + final config = AudioExtractConfigs(video: testVideo, format: format); + final result = await pve.extractAudioToFile(outputPath, config); expect(result, equals(outputPath)); @@ -58,6 +52,10 @@ void main() { expect(await file.exists(), isTrue, reason: 'Audio file should exist at $outputPath'); + final header = file.openSync().readSync(defaultMagicNumbersMaxLength); + final mimeType = lookupMimeType(result, headerBytes: header); + expect(mimeType, format.mimeType); + // Verify file has content final fileSize = await file.length(); expect(fileSize, greaterThan(1000), @@ -84,10 +82,7 @@ void main() { endTime: const Duration(seconds: 10), ); - final result = await ProVideoEditor.instance.extractAudioToFile( - outputPath, - config, - ); + final result = await pve.extractAudioToFile(outputPath, config); expect(result, equals(outputPath)); @@ -127,12 +122,11 @@ void main() { ); final progressValues = []; - final subscription = - ProVideoEditor.instance.progressStreamById(config.id).listen((event) { + final subscription = pve.progressStreamById(config.id).listen((event) { progressValues.add(event.progress); }); - await ProVideoEditor.instance.extractAudioToFile(outputPath, config); + await pve.extractAudioToFile(outputPath, config); await subscription.cancel(); // Verify progress updates @@ -173,14 +167,13 @@ void main() { ); // Start extraction - final extractionFuture = - ProVideoEditor.instance.extractAudioToFile(outputPath, config); + final extractionFuture = pve.extractAudioToFile(outputPath, config); // Wait a bit to ensure extraction has started await Future.delayed(const Duration(milliseconds: 100)); // Cancel the task - await ProVideoEditor.instance.cancel(config.id); + await pve.cancel(config.id); // Extraction should throw or complete with error try { @@ -220,7 +213,7 @@ void main() { ); try { - await ProVideoEditor.instance.extractAudioToFile(outputPath, config); + await pve.extractAudioToFile(outputPath, config); // If it succeeds, the implementation might handle it gracefully // by swapping or clamping the values } catch (e) { diff --git a/example/lib/features/audio/audio_extract_example_page.dart b/example/lib/features/audio/audio_extract_example_page.dart index 0fa58595..1068465f 100644 --- a/example/lib/features/audio/audio_extract_example_page.dart +++ b/example/lib/features/audio/audio_extract_example_page.dart @@ -4,8 +4,10 @@ import 'dart:io'; import 'package:audioplayers/audioplayers.dart'; import 'package:flutter/foundation.dart'; import 'package:flutter/material.dart'; +import 'package:mime/mime.dart'; import 'package:path_provider/path_provider.dart'; import 'package:pro_video_editor/pro_video_editor.dart'; +import 'package:wav/wav.dart'; import '/core/constants/example_constants.dart'; @@ -58,8 +60,7 @@ class _AudioExtractExamplePageState extends State { if (!_isFormatSupported(_selectedFormat)) { // Find first supported format _selectedFormat = AudioFormat.values.firstWhere( - _isFormatSupported, - orElse: () => AudioFormat.m4a, // Fallback to M4A + _isFormatSupported, orElse: () => AudioFormat.m4a, // Fallback to M4A ); } } @@ -124,9 +125,26 @@ class _AudioExtractExamplePageState extends State { }); if (mounted) { + var raf = File(outputPath).openSync(); + var bytes = raf.readSync(defaultMagicNumbersMaxLength); + raf.closeSync(); + + var info = lookupMimeType(outputPath, headerBytes: bytes) ?? 'unknown'; + + if (_selectedFormat == AudioFormat.wav) { + try { + var wav = Wav.read(File(outputPath).readAsBytesSync()); + info += ' channels:${wav.channels.length}' + ' sampleRate:${wav.samplesPerSecond}' + ' format:${wav.format.name}'; + } catch (_) { + info += ' (invalid wav)'; + } + } + ScaffoldMessenger.of(context).showSnackBar( - const SnackBar( - content: Text('Audio extracted successfully!'), + SnackBar( + content: Text('Audio extracted successfully!\n$info'), backgroundColor: Colors.green, ), ); diff --git a/example/pubspec.yaml b/example/pubspec.yaml index a0c01a5a..1721b8bd 100644 --- a/example/pubspec.yaml +++ b/example/pubspec.yaml @@ -22,6 +22,7 @@ dependencies: flutter_colorpicker: ^1.1.0 file_picker: ^10.2.1 mime: ^2.0.0 + wav: 1.5.0 path_provider: ^2.1.5 video_player: ^2.9.3 diff --git a/ios/Classes/src/features/audio/ExtractAudio.swift b/ios/Classes/src/features/audio/ExtractAudio.swift index 8fe5ff56..4b46339f 100644 --- a/ios/Classes/src/features/audio/ExtractAudio.swift +++ b/ios/Classes/src/features/audio/ExtractAudio.swift @@ -406,6 +406,22 @@ class ExtractAudio { let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100 let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2 + // Create audio channel layout based on number of channels + var channelLayout = AudioChannelLayout() + channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0) + channelLayout.mNumberChannelDescriptions = 0 + channelLayout.mChannelLayoutTag = switch channels { + case 1: kAudioChannelLayoutTag_Mono + case 2: kAudioChannelLayoutTag_Stereo + case 3: kAudioChannelLayoutTag_MPEG_3_0_A + case 4: kAudioChannelLayoutTag_Quadraphonic + case 5: kAudioChannelLayoutTag_MPEG_5_0_A + case 6: kAudioChannelLayoutTag_MPEG_5_1_A + case 7: kAudioChannelLayoutTag_MPEG_6_1_A + case 8: kAudioChannelLayoutTag_MPEG_7_1_A + default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels) + } + // Configure writer input for PCM WAV let writerInputSettings: [String: Any] = [ AVFormatIDKey: kAudioFormatLinearPCM, @@ -414,7 +430,8 @@ class ExtractAudio { AVLinearPCMBitDepthKey: 16, AVLinearPCMIsFloatKey: false, AVLinearPCMIsBigEndianKey: false, - AVLinearPCMIsNonInterleaved: false + AVLinearPCMIsNonInterleaved: false, + AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout.size) ] let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: writerInputSettings) diff --git a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift index e20c5c4a..d6ddc476 100644 --- a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -56,13 +56,14 @@ struct AudioExtractConfig { /// Returns the file extension for the output audio file based on the format. /// - /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a") + /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a", "wav") func getOutputExtension() -> String { switch format.lowercased() { case "mp3": return "mp3" case "aac": return "m4a" case "m4a": return "m4a" case "caf": return "caf" + case "wav": return "wav" default: return "m4a" } } diff --git a/macos/Classes/src/features/audio/ExtractAudio.swift b/macos/Classes/src/features/audio/ExtractAudio.swift index 3af37529..0f971813 100644 --- a/macos/Classes/src/features/audio/ExtractAudio.swift +++ b/macos/Classes/src/features/audio/ExtractAudio.swift @@ -406,6 +406,22 @@ class ExtractAudio { let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100 let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2 + // Create audio channel layout based on number of channels + var channelLayout = AudioChannelLayout() + channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0) + channelLayout.mNumberChannelDescriptions = 0 + channelLayout.mChannelLayoutTag = switch channels { + case 1: kAudioChannelLayoutTag_Mono + case 2: kAudioChannelLayoutTag_Stereo + case 3: kAudioChannelLayoutTag_MPEG_3_0_A + case 4: kAudioChannelLayoutTag_Quadraphonic + case 5: kAudioChannelLayoutTag_MPEG_5_0_A + case 6: kAudioChannelLayoutTag_MPEG_5_1_A + case 7: kAudioChannelLayoutTag_MPEG_6_1_A + case 8: kAudioChannelLayoutTag_MPEG_7_1_A + default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels) + } + // Configure writer input for PCM WAV let writerInputSettings: [String: Any] = [ AVFormatIDKey: kAudioFormatLinearPCM, @@ -414,7 +430,8 @@ class ExtractAudio { AVLinearPCMBitDepthKey: 16, AVLinearPCMIsFloatKey: false, AVLinearPCMIsBigEndianKey: false, - AVLinearPCMIsNonInterleaved: false + AVLinearPCMIsNonInterleaved: false, + AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout.size) ] let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: writerInputSettings) diff --git a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift index e20c5c4a..d6ddc476 100644 --- a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -56,13 +56,14 @@ struct AudioExtractConfig { /// Returns the file extension for the output audio file based on the format. /// - /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a") + /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a", "wav") func getOutputExtension() -> String { switch format.lowercased() { case "mp3": return "mp3" case "aac": return "m4a" case "m4a": return "m4a" case "caf": return "caf" + case "wav": return "wav" default: return "m4a" } } From 8e87d8d9968b5ab02d274f372f11af91ca60f760 Mon Sep 17 00:00:00 2001 From: eu Date: Mon, 30 Mar 2026 16:19:41 -0400 Subject: [PATCH 02/13] an attempt to fix audio timeline --- .../src/features/audio/ExtractAudio.swift | 74 +++++++++++++------ .../src/features/audio/ExtractAudio.swift | 74 +++++++++++++------ 2 files changed, 106 insertions(+), 42 deletions(-) diff --git a/ios/Classes/src/features/audio/ExtractAudio.swift b/ios/Classes/src/features/audio/ExtractAudio.swift index 4b46339f..f411acff 100644 --- a/ios/Classes/src/features/audio/ExtractAudio.swift +++ b/ios/Classes/src/features/audio/ExtractAudio.swift @@ -136,43 +136,75 @@ class ExtractAudio { outputFileType = .m4a } - // Create export session with audio-only preset - guard let session = AVAssetExportSession( - asset: asset, - presetName: AVAssetExportPresetPassthrough - ) else { - throw NSError( - domain: "ExtractAudio", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] - ) - } - - exportSession = session - session.outputURL = outputURL - session.outputFileType = outputFileType - // Configure to export only audio tracks let audioTracks = asset.tracks(withMediaType: .audio) guard !audioTracks.isEmpty else { throw NoAudioTrackException() } - // Apply time range if trimming is requested + // Get the actual audio track to extract + let audioTrack = audioTracks[0] + + // Determine the time range to extract + // IMPORTANT: Use the audio track's actual timeRange, not asset.duration + // Audio tracks may not start at zero due to encoding delays or sync adjustments + let sourceTimeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) let duration = CMTimeSubtract(endTime, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let startUs = config.startUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let duration = CMTimeSubtract(asset.duration, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) - session.timeRange = CMTimeRange(start: .zero, duration: endTime) + sourceTimeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + sourceTimeRange = audioTrack.timeRange + } + + // Create composition to remap timestamps to start at zero + // This ensures the extracted audio timeline starts at 0, not at the original offset + let composition = AVMutableComposition() + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "ExtractAudio", + code: -13, + userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"] + ) } + // Insert the audio track at time zero (remapping the timeline) + try compositionAudioTrack.insertTimeRange( + sourceTimeRange, + of: audioTrack, + at: .zero + ) + + // Create export session with the composition (not the original asset) + guard let session = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError( + domain: "ExtractAudio", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] + ) + } + + exportSession = session + session.outputURL = outputURL + session.outputFileType = outputFileType + + // No need to set timeRange on the session since the composition already handles it + // Start progress tracking on main thread DispatchQueue.main.async { onProgress(0.0) @@ -463,7 +495,7 @@ class ExtractAudio { ) } - writer.startSession(atSourceTime: timeRange.start) + writer.startSession(atSourceTime: .zero) // Calculate total duration for progress let totalDuration = CMTimeGetSeconds(timeRange.duration) diff --git a/macos/Classes/src/features/audio/ExtractAudio.swift b/macos/Classes/src/features/audio/ExtractAudio.swift index 0f971813..575599be 100644 --- a/macos/Classes/src/features/audio/ExtractAudio.swift +++ b/macos/Classes/src/features/audio/ExtractAudio.swift @@ -136,43 +136,75 @@ class ExtractAudio { outputFileType = .m4a } - // Create export session with audio-only preset - guard let session = AVAssetExportSession( - asset: asset, - presetName: AVAssetExportPresetPassthrough - ) else { - throw NSError( - domain: "ExtractAudio", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] - ) - } - - exportSession = session - session.outputURL = outputURL - session.outputFileType = outputFileType - // Configure to export only audio tracks let audioTracks = asset.tracks(withMediaType: .audio) guard !audioTracks.isEmpty else { throw NoAudioTrackException() } - // Apply time range if trimming is requested + // Get the actual audio track to extract + let audioTrack = audioTracks[0] + + // Determine the time range to extract + // IMPORTANT: Use the audio track's actual timeRange, not asset.duration + // Audio tracks may not start at zero due to encoding delays or sync adjustments + let sourceTimeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) let duration = CMTimeSubtract(endTime, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let startUs = config.startUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let duration = CMTimeSubtract(asset.duration, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) - session.timeRange = CMTimeRange(start: .zero, duration: endTime) + sourceTimeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + sourceTimeRange = audioTrack.timeRange + } + + // Create composition to remap timestamps to start at zero + // This ensures the extracted audio timeline starts at 0, not at the original offset + let composition = AVMutableComposition() + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "ExtractAudio", + code: -13, + userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"] + ) } + // Insert the audio track at time zero (remapping the timeline) + try compositionAudioTrack.insertTimeRange( + sourceTimeRange, + of: audioTrack, + at: .zero + ) + + // Create export session with the composition (not the original asset) + guard let session = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError( + domain: "ExtractAudio", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] + ) + } + + exportSession = session + session.outputURL = outputURL + session.outputFileType = outputFileType + + // No need to set timeRange on the session since the composition already handles it + // Start progress tracking on main thread DispatchQueue.main.async { onProgress(0.0) @@ -463,7 +495,7 @@ class ExtractAudio { ) } - writer.startSession(atSourceTime: timeRange.start) + writer.startSession(atSourceTime: .zero) // Calculate total duration for progress let totalDuration = CMTimeGetSeconds(timeRange.duration) From 4823f1294a28265b6d74c3c52d04ebb722a54fd6 Mon Sep 17 00:00:00 2001 From: eu Date: Mon, 30 Mar 2026 17:47:48 -0400 Subject: [PATCH 03/13] an attempt to fix audio timeline for Android --- .../src/features/audio/ExtractAudio.kt | 87 +++++++++++++++---- 1 file changed, 70 insertions(+), 17 deletions(-) diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index 8ad6d922..02c65fa8 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -111,13 +111,42 @@ class ExtractAudio(private val context: Context) { val audioFormat = extractor.getTrackFormat(audioTrackIndex) - // Get duration for progress tracking + // Get the audio track's actual time range + // Audio tracks may not start at timestamp 0 due to encoder delays val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION) - val startUs = config.startUs ?: 0L - val endUs = config.endUs ?: durationUs + + // Determine the actual start and end timestamps for extraction + val actualStartUs: Long + val actualEndUs: Long + + if (config.startUs != null || config.endUs != null) { + // User specified trim parameters - use them as-is + actualStartUs = config.startUs ?: 0L + actualEndUs = config.endUs ?: (actualStartUs + durationUs) + } else { + // Full extraction - need to detect the audio track's actual start time + // Select track first to be able to read samples + extractor.selectTrack(audioTrackIndex) + val firstSampleTimeUs = extractor.sampleTime + + if (firstSampleTimeUs > 0) { + // Audio track has an offset (e.g., AAC encoder delay) + actualStartUs = firstSampleTimeUs + actualEndUs = firstSampleTimeUs + durationUs + } else { + // Audio track starts at or near zero + actualStartUs = 0L + actualEndUs = durationUs + } + + // Seek back to start + if (actualStartUs > 0) { + extractor.seekTo(actualStartUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + } // Validate end time - if (endUs <= startUs) { + if (actualEndUs <= actualStartUs) { throw IllegalArgumentException("endUs must be greater than startUs") } @@ -129,8 +158,8 @@ class ExtractAudio(private val context: Context) { wavWriter.extractAndWrite( extractor = extractor, audioTrackIndex = audioTrackIndex, - startUs = startUs, - endUs = endUs, + startUs = actualStartUs, + endUs = actualEndUs, onProgress = { progress -> if (!shouldStop.get()) { mainHandler.post { onProgress(progress) } @@ -250,20 +279,43 @@ class ExtractAudio(private val context: Context) { val muxerTrackIndex = muxer.addTrack(audioFormat) muxer.start() - // Calculate duration and seek to start if needed + // Get the audio track's actual time range + // Audio tracks may not start at timestamp 0 due to encoder delays val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION) - val startUs = config.startUs ?: 0L - val endUs = config.endUs ?: durationUs + + // Determine the actual start and end timestamps for extraction + val actualStartUs: Long + val actualEndUs: Long + + if (config.startUs != null || config.endUs != null) { + // User specified trim parameters - use them as-is + actualStartUs = config.startUs ?: 0L + actualEndUs = config.endUs ?: (actualStartUs + durationUs) + } else { + // Full extraction - need to detect the audio track's actual start time + // Read the first sample to get the actual start timestamp + val firstSampleTimeUs = extractor.sampleTime + + if (firstSampleTimeUs > 0) { + // Audio track has an offset (e.g., AAC encoder delay) + actualStartUs = firstSampleTimeUs + actualEndUs = firstSampleTimeUs + durationUs + } else { + // Audio track starts at or near zero + actualStartUs = 0L + actualEndUs = durationUs + } + } - if (startUs > 0) { - extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + if (actualStartUs > 0) { + extractor.seekTo(actualStartUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) } // Extract and write audio samples val buffer = ByteBuffer.allocate(BUFFER_SIZE) val bufferInfo = MediaCodec.BufferInfo() - var extractedUs = startUs - val totalDurationUs = endUs - startUs + var extractedUs = actualStartUs + val totalDurationUs = actualEndUs - actualStartUs mainHandler.post { onProgress(0.0) } @@ -278,12 +330,13 @@ class ExtractAudio(private val context: Context) { val presentationTimeUs = extractor.sampleTime // Check if we've reached the end time - if (presentationTimeUs > endUs) { + if (presentationTimeUs > actualEndUs) { break } - // Adjust presentation time if we're trimming from start - bufferInfo.presentationTimeUs = presentationTimeUs - startUs + // Adjust presentation time to start at zero in the output + // This ensures extracted audio always has timestamps starting at 0 + bufferInfo.presentationTimeUs = presentationTimeUs - actualStartUs bufferInfo.size = sampleSize bufferInfo.offset = 0 @@ -299,7 +352,7 @@ class ExtractAudio(private val context: Context) { // Update progress extractedUs = presentationTimeUs - val progress = ((extractedUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + val progress = ((extractedUs - actualStartUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) mainHandler.post { onProgress(progress) } // Advance to next sample From 59637d5b2f10a2264321613c8989bc8db0700b5a Mon Sep 17 00:00:00 2001 From: eu Date: Mon, 30 Mar 2026 18:01:39 -0400 Subject: [PATCH 04/13] an attempt to fix audio timeline for Android --- .../src/features/audio/ExtractAudio.kt | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index 02c65fa8..ebb024bf 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -125,23 +125,27 @@ class ExtractAudio(private val context: Context) { actualEndUs = config.endUs ?: (actualStartUs + durationUs) } else { // Full extraction - need to detect the audio track's actual start time - // Select track first to be able to read samples - extractor.selectTrack(audioTrackIndex) - val firstSampleTimeUs = extractor.sampleTime - - if (firstSampleTimeUs > 0) { - // Audio track has an offset (e.g., AAC encoder delay) - actualStartUs = firstSampleTimeUs - actualEndUs = firstSampleTimeUs + durationUs - } else { - // Audio track starts at or near zero - actualStartUs = 0L - actualEndUs = durationUs - } - - // Seek back to start - if (actualStartUs > 0) { - extractor.seekTo(actualStartUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + // Use a temporary extractor to avoid track selection conflicts! + // WavFileWriter will call selectTrack() on the main extractor, + // so we must not pre-select it here + var tempExtractor: MediaExtractor? = null + try { + tempExtractor = MediaExtractor() + tempExtractor.setDataSource(config.inputPath) + tempExtractor.selectTrack(audioTrackIndex) + val firstSampleTimeUs = tempExtractor.sampleTime + + if (firstSampleTimeUs > 0) { + // Audio track has an offset (e.g., AAC encoder delay) + actualStartUs = firstSampleTimeUs + actualEndUs = firstSampleTimeUs + durationUs + } else { + // Audio track starts at or near zero + actualStartUs = 0L + actualEndUs = durationUs + } + } finally { + tempExtractor?.release() } } From a820cdb7470c12b171c79019b1608144cac38a3b Mon Sep 17 00:00:00 2001 From: eu Date: Mon, 30 Mar 2026 18:02:00 -0400 Subject: [PATCH 05/13] an attempt to fix audio timeline --- ios/Classes/src/features/audio/ExtractAudio.swift | 7 ++++++- macos/Classes/src/features/audio/ExtractAudio.swift | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ios/Classes/src/features/audio/ExtractAudio.swift b/ios/Classes/src/features/audio/ExtractAudio.swift index f411acff..69f91c47 100644 --- a/ios/Classes/src/features/audio/ExtractAudio.swift +++ b/ios/Classes/src/features/audio/ExtractAudio.swift @@ -381,7 +381,9 @@ class ExtractAudio { } // Calculate time range - var timeRange = CMTimeRange(start: .zero, duration: asset.duration) + // Use the audio track's actual timeRange for full extraction + // Audio tracks may not start at zero due to encoding delays or sync adjustments + var timeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -392,6 +394,9 @@ class ExtractAudio { } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) timeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + timeRange = audioTrack.timeRange } // Create asset reader diff --git a/macos/Classes/src/features/audio/ExtractAudio.swift b/macos/Classes/src/features/audio/ExtractAudio.swift index 575599be..07eeed9f 100644 --- a/macos/Classes/src/features/audio/ExtractAudio.swift +++ b/macos/Classes/src/features/audio/ExtractAudio.swift @@ -381,7 +381,8 @@ class ExtractAudio { } // Calculate time range - var timeRange = CMTimeRange(start: .zero, duration: asset.duration) + // Audio tracks may not start at zero due to encoding delays or sync adjustments + var timeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -392,6 +393,9 @@ class ExtractAudio { } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) timeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + timeRange = audioTrack.timeRange } // Create asset reader From 7e01f3c6c5508e2c062f43b68a8cb432d55a056f Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 20:31:32 +0200 Subject: [PATCH 06/13] test: update audio extraction test to handle multiple WAV mime types --- example/integration_test/audio_extract_test.dart | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index de077a69..69c886cf 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -60,7 +60,11 @@ void main() { final header = file.openSync().readSync(defaultMagicNumbersMaxLength); final mimeType = lookupMimeType(result, headerBytes: header); - expect(mimeType, format.mimeType); + // WAV files may be detected as either 'audio/wav' or 'audio/x-wav' + final expectedMimeTypes = format == AudioFormat.wav + ? ['audio/wav', 'audio/x-wav'] + : [format.mimeType]; + expect(expectedMimeTypes, contains(mimeType)); // Verify file has content final fileSize = await file.length(); From 66954d0a09aa6d6dc4401254964945f47161026d Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 20:33:48 +0200 Subject: [PATCH 07/13] test: enhance audio format MIME type detection and validation in extraction tests --- .../integration_test/audio_extract_test.dart | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index 69c886cf..e9c4c7d9 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -60,10 +60,15 @@ void main() { final header = file.openSync().readSync(defaultMagicNumbersMaxLength); final mimeType = lookupMimeType(result, headerBytes: header); - // WAV files may be detected as either 'audio/wav' or 'audio/x-wav' - final expectedMimeTypes = format == AudioFormat.wav - ? ['audio/wav', 'audio/x-wav'] - : [format.mimeType]; + // Some formats may be detected under alternative MIME types depending + // on the file extension used on the current platform: + // - WAV: 'audio/wav' or 'audio/x-wav' + // - AAC on iOS/macOS: saved as .m4a, detected as 'audio/mp4' + final expectedMimeTypes = switch (format) { + AudioFormat.wav => ['audio/wav', 'audio/x-wav'], + AudioFormat.aac => ['audio/aac', 'audio/mp4'], + _ => [format.mimeType], + }; expect(expectedMimeTypes, contains(mimeType)); // Verify file has content @@ -115,11 +120,15 @@ void main() { greaterThan(500), reason: 'Trimmed audio should have some content', ); - expect( - fileSize, - lessThan(500000), - reason: 'Trimmed audio should be smaller than full extraction', - ); + // WAV is uncompressed — 5 seconds can be several MB depending on + // sample rate and bit depth, so only cap compressed formats. + if (format != AudioFormat.wav && format != AudioFormat.caf) { + expect( + fileSize, + lessThan(500000), + reason: 'Trimmed audio should be smaller than full extraction', + ); + } // Clean up await file.delete(); From 9c3de53cc15d616386a56a9d6c345a77651ae2d9 Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 20:37:28 +0200 Subject: [PATCH 08/13] chore: run dart format --- example/lib/features/audio/audio_extract_example_page.dart | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example/lib/features/audio/audio_extract_example_page.dart b/example/lib/features/audio/audio_extract_example_page.dart index 2729c4d9..15620a16 100644 --- a/example/lib/features/audio/audio_extract_example_page.dart +++ b/example/lib/features/audio/audio_extract_example_page.dart @@ -60,7 +60,8 @@ class _AudioExtractExamplePageState extends State { if (!_isFormatSupported(_selectedFormat)) { // Find first supported format _selectedFormat = AudioFormat.values.firstWhere( - _isFormatSupported, orElse: () => AudioFormat.m4a, // Fallback to M4A + _isFormatSupported, + orElse: () => AudioFormat.m4a, // Fallback to M4A ); } } @@ -132,7 +133,8 @@ class _AudioExtractExamplePageState extends State { if (_selectedFormat == AudioFormat.wav) { try { var wav = Wav.read(File(outputPath).readAsBytesSync()); - info += ' channels:${wav.channels.length}' + info += + ' channels:${wav.channels.length}' ' sampleRate:${wav.samplesPerSecond}' ' format:${wav.format.name}'; } catch (_) { From 89ae7f073cf00991b7734798313853a13370d6a1 Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 20:40:32 +0200 Subject: [PATCH 09/13] test: skip audio extraction tests for unsupported formats --- example/integration_test/audio_extract_test.dart | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index e9c4c7d9..5b49d1cb 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -41,6 +41,8 @@ void main() { testWidgets( 'extractAudio with $format returns valid audio file', (tester) async { + if (!isFormatSupported(format)) return; + final directory = await getTemporaryDirectory(); final outputPath = '${directory.path}/test_audio_${DateTime.now().millisecondsSinceEpoch}.${format.extension}'; @@ -88,6 +90,8 @@ void main() { testWidgets( 'extractAudio with $format and trimming works correctly', (tester) async { + if (!isFormatSupported(format)) return; + final directory = await getTemporaryDirectory(); final outputPath = '${directory.path}/test_audio_trimmed_${DateTime.now().millisecondsSinceEpoch}.${format.extension}'; From e722f6b84f94d6801eea3d165ed11a85610dc7a8 Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 20:44:33 +0200 Subject: [PATCH 10/13] test: fix audio extraction test to properly read file headers --- example/integration_test/audio_extract_test.dart | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index 5b49d1cb..b209c380 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -60,7 +60,9 @@ void main() { reason: 'Audio file should exist at $outputPath', ); - final header = file.openSync().readSync(defaultMagicNumbersMaxLength); + final raf = file.openSync(); + final header = raf.readSync(defaultMagicNumbersMaxLength); + raf.closeSync(); final mimeType = lookupMimeType(result, headerBytes: header); // Some formats may be detected under alternative MIME types depending // on the file extension used on the current platform: From 2678043897080c23bf10403701321fce9b1e691a Mon Sep 17 00:00:00 2001 From: hm21 Date: Tue, 31 Mar 2026 21:17:50 +0200 Subject: [PATCH 11/13] refactor: delegate file cleanup to background thread during audio extraction cancellation --- .../src/features/audio/ExtractAudio.kt | 8 +- .../src/features/audio/WavFileWriter.kt | 121 ++++++++++++------ .../integration_test/audio_extract_test.dart | 19 ++- 3 files changed, 93 insertions(+), 55 deletions(-) diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index da0dd5b9..c7c8299a 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -223,9 +223,7 @@ class ExtractAudio(private val context: Context) { return AudioExtractJobHandle { shouldStop.set(true) mainHandler.removeCallbacksAndMessages(null) - if (outputFile.exists()) { - outputFile.delete() - } + // File cleanup is handled by the background thread once it detects shouldStop } } @@ -428,9 +426,7 @@ class ExtractAudio(private val context: Context) { return AudioExtractJobHandle { shouldStop.set(true) mainHandler.removeCallbacksAndMessages(null) - if (config.outputPath == null && outputFile.exists()) { - outputFile.delete() - } + // File cleanup is handled by the background thread once it detects shouldStop } } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt index d22fbd80..417dd2ec 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt @@ -30,7 +30,6 @@ class WavFileWriter(private val outputFile: File) { private const val FMT_HEADER = 0x20746d66 // "fmt " in little-endian private const val DATA_HEADER = 0x61746164 // "data" in little-endian private const val PCM_FORMAT = 1.toShort() - private const val IEEE_FLOAT_FORMAT = 3.toShort() private const val BUFFER_SIZE = 1024 * 1024 // 1MB buffer } @@ -70,7 +69,7 @@ class WavFileWriter(private val outputFile: File) { val isPcm = mime.equals("audio/raw", ignoreCase = true) || mime.equals("audio/pcm", ignoreCase = true) if (isPcm) { - extractPcmToWav(extractor, audioTrackIndex, startUs, endUs, onProgress, shouldStop) + extractPcmToWav(extractor, audioFormat, audioTrackIndex, startUs, endUs, onProgress, shouldStop) } else { extractAndDecodeToWav(extractor, audioFormat, audioTrackIndex, startUs, endUs, onProgress, shouldStop) } @@ -252,35 +251,35 @@ class WavFileWriter(private val outputFile: File) { // If format changed, rewrite the header if (formatChanged) { outputStream.flush() - val raf = RandomAccessFile(outputFile, "rw") - raf.seek(0L) + RandomAccessFile(outputFile, "rw").use { raf -> + raf.seek(0L) - val byteRate = sampleRate * numChannels * bitsPerSample / 8 - val blockAlign = (numChannels * bitsPerSample / 8).toShort() + val byteRate = sampleRate * numChannels * bitsPerSample / 8 + val blockAlign = (numChannels * bitsPerSample / 8).toShort() - val headerBytes = ByteBuffer.allocate(44).apply { - // All values written in little-endian order - // Magic number constants are pre-encoded for little-endian - order(ByteOrder.LITTLE_ENDIAN) - putInt(RIFF_HEADER) - putInt(0) // Will update at end - putInt(WAVE_HEADER) - - putInt(FMT_HEADER) - putInt(16) - putShort(PCM_FORMAT) - putShort(numChannels.toShort()) - putInt(sampleRate) - putInt(byteRate) - putShort(blockAlign) - putShort(bitsPerSample.toShort()) - - putInt(DATA_HEADER) - putInt(0) // Will update at end - }.array() + val headerBytes = ByteBuffer.allocate(44).apply { + // All values written in little-endian order + // Magic number constants are pre-encoded for little-endian + order(ByteOrder.LITTLE_ENDIAN) + putInt(RIFF_HEADER) + putInt(0) // Will update at end + putInt(WAVE_HEADER) + + putInt(FMT_HEADER) + putInt(16) + putShort(PCM_FORMAT) + putShort(numChannels.toShort()) + putInt(sampleRate) + putInt(byteRate) + putShort(blockAlign) + putShort(bitsPerSample.toShort()) + + putInt(DATA_HEADER) + putInt(0) // Will update at end + }.array() - raf.write(headerBytes) - raf.close() + raf.write(headerBytes) + } } } outputBufferId >= 0 -> { @@ -345,6 +344,7 @@ class WavFileWriter(private val outputFile: File) { */ private fun extractPcmToWav( extractor: MediaExtractor, + audioFormat: MediaFormat, audioTrackIndex: Int, startUs: Long, endUs: Long, @@ -357,6 +357,33 @@ class WavFileWriter(private val outputFile: File) { // Reset total data size for this extraction totalDataSize = 0 + // Determine bits per sample from the source PCM encoding + if (audioFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = audioFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + bitsPerSample = 16 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_8BIT -> { + bitsPerSample = 8 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for better compatibility + bitsPerSample = 16 + isFloatPcm = true + } + else -> { + bitsPerSample = 16 + isFloatPcm = false + } + } + } else { + bitsPerSample = 16 + isFloatPcm = false + } + // Select the audio track in the extractor extractor.selectTrack(audioTrackIndex) @@ -396,8 +423,23 @@ class WavFileWriter(private val outputFile: File) { val pcmData = ByteArray(sampleSize) buffer.get(pcmData) - outputStream.write(pcmData) - totalDataSize += sampleSize + + if (isFloatPcm) { + // Convert float PCM to 16-bit integer PCM + val floatBuffer = ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN) + val floatSamples = pcmData.size / 4 + val int16Buffer = ByteBuffer.allocate(floatSamples * 2).order(ByteOrder.LITTLE_ENDIAN) + for (i in 0 until floatSamples) { + val floatValue = floatBuffer.float + val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() + int16Buffer.putShort(intValue) + } + outputStream.write(int16Buffer.array()) + totalDataSize += int16Buffer.array().size + } else { + outputStream.write(pcmData) + totalDataSize += sampleSize + } currentTimeUs = presentationTimeUs if (totalDurationUs != Long.MAX_VALUE) { @@ -434,10 +476,10 @@ class WavFileWriter(private val outputFile: File) { val byteRate = sampleRate * numChannels * bitsPerSample / 8 val blockAlign = (numChannels * bitsPerSample / 8).toShort() - // Ensure dataSize doesn't exceed 32-bit signed integer limit for WAV format - // WAV files are limited to 4GB due to 32-bit size fields in RIFF format - val safeSizeForHeader = if (dataSize > 0x7FFFFFFFL) { - 0x7FFFFFFF + // RIFF chunk sizes are unsigned 32-bit, so the actual WAV limit is ~4GB. + // Clamp to 0xFFFFFFFF; .toInt() gives the correct bit pattern for ByteBuffer.putInt. + val safeSizeForHeader = if (dataSize > 0xFFFFFFFFL) { + 0xFFFFFFFF.toInt() } else { dataSize.toInt() } @@ -468,13 +510,14 @@ class WavFileWriter(private val outputFile: File) { /** * Updates the WAV header with the actual file sizes after writing is complete. * - * Note: WAV files are limited to 4GB due to 32-bit size fields. If the file exceeds this, - * the header size fields will be clamped to the maximum 32-bit signed integer value. + * Note: WAV files are limited to ~4GB due to unsigned 32-bit size fields in the RIFF spec. + * If the file exceeds this, the header size fields are clamped to 0xFFFFFFFF. */ private fun updateWavHeader() { - // Ensure totalDataSize doesn't exceed 32-bit limit - val safeSizeForHeader = if (totalDataSize > 0x7FFFFFFFL) { - 0x7FFFFFFF + // RIFF chunk sizes are unsigned 32-bit; clamp to 0xFFFFFFFF (~4GB). + // .toInt() gives the correct bit pattern for ByteBuffer.putInt. + val safeSizeForHeader = if (totalDataSize > 0xFFFFFFFFL) { + 0xFFFFFFFF.toInt() } else { totalDataSize.toInt() } diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index b209c380..7e8ef5fc 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -60,17 +60,16 @@ void main() { reason: 'Audio file should exist at $outputPath', ); - final raf = file.openSync(); - final header = raf.readSync(defaultMagicNumbersMaxLength); - raf.closeSync(); - final mimeType = lookupMimeType(result, headerBytes: header); - // Some formats may be detected under alternative MIME types depending - // on the file extension used on the current platform: - // - WAV: 'audio/wav' or 'audio/x-wav' - // - AAC on iOS/macOS: saved as .m4a, detected as 'audio/mp4' + // Use extension-based MIME detection — header-based detection is + // unreliable for MP4-container formats (AAC/M4A/MP3 on Android all + // share the same magic bytes regardless of audio content). + final mimeType = lookupMimeType(result); + // AAC on iOS/macOS is saved with a .m4a extension (the only container + // Apple supports for AAC export), so it resolves to 'audio/mp4'. + // The mime package maps .wav to 'audio/x-wav' rather than 'audio/wav'. final expectedMimeTypes = switch (format) { - AudioFormat.wav => ['audio/wav', 'audio/x-wav'], - AudioFormat.aac => ['audio/aac', 'audio/mp4'], + AudioFormat.aac => [format.mimeType, 'audio/mp4'], + AudioFormat.wav => [format.mimeType, 'audio/x-wav'], _ => [format.mimeType], }; expect(expectedMimeTypes, contains(mimeType)); From c4ed38525dbf8571e7933572370f83f292d9ad6b Mon Sep 17 00:00:00 2001 From: eu Date: Wed, 1 Apr 2026 15:15:59 -0400 Subject: [PATCH 12/13] added AVFileType for wav format --- ios/Classes/src/features/audio/models/AudioExtractConfig.swift | 1 + macos/Classes/src/features/audio/models/AudioExtractConfig.swift | 1 + 2 files changed, 2 insertions(+) diff --git a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift index d6ddc476..278e22da 100644 --- a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -76,6 +76,7 @@ struct AudioExtractConfig { case "mp3": return "com.apple.m4a-audio" // MP3 in M4A container case "aac": return "com.apple.m4a-audio" // AAC in M4A container case "m4a": return "com.apple.m4a-audio" // M4A container + case "wav": return "com.microsoft.waveform-audio" // WAV format default: return "com.apple.m4a-audio" } } diff --git a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift index 98f41ed2..95dd671c 100644 --- a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -77,6 +77,7 @@ struct AudioExtractConfig { case "mp3": return "com.apple.m4a-audio" // MP3 in M4A container case "aac": return "com.apple.m4a-audio" // AAC in M4A container case "m4a": return "com.apple.m4a-audio" // M4A container + case "wav": return "com.microsoft.waveform-audio" // WAV format default: return "com.apple.m4a-audio" } } From 4390d14166824a6f076a70e42af9af649ed94331 Mon Sep 17 00:00:00 2001 From: eu Date: Wed, 1 Apr 2026 15:23:00 -0400 Subject: [PATCH 13/13] use audio/x-wav mimetype for wav audio --- example/integration_test/audio_extract_test.dart | 2 +- lib/core/models/audio/audio_format_model.dart | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index 7e8ef5fc..727089d7 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -69,7 +69,7 @@ void main() { // The mime package maps .wav to 'audio/x-wav' rather than 'audio/wav'. final expectedMimeTypes = switch (format) { AudioFormat.aac => [format.mimeType, 'audio/mp4'], - AudioFormat.wav => [format.mimeType, 'audio/x-wav'], + AudioFormat.wav => [format.mimeType, 'audio/wav'], _ => [format.mimeType], }; expect(expectedMimeTypes, contains(mimeType)); diff --git a/lib/core/models/audio/audio_format_model.dart b/lib/core/models/audio/audio_format_model.dart index e8e06ae9..1740ffa0 100644 --- a/lib/core/models/audio/audio_format_model.dart +++ b/lib/core/models/audio/audio_format_model.dart @@ -24,7 +24,7 @@ enum AudioFormat { /// WAV format - uncompressed audio, high quality, large file size. /// Supported on: Android, iOS, macOS - wav('audio/wav'); + wav('audio/x-wav'); const AudioFormat(this.mimeType);