diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt index ffbad3fe..c7c8299a 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/ExtractAudio.kt @@ -22,12 +22,15 @@ class NoAudioTrackException(message: String) : Exception(message) /** * Service for extracting audio from video files. * - * This class handles the audio extraction pipeline using Android MediaExtractor and MediaMuxer: + * This class handles the audio extraction pipeline: * - Extracts audio track from video file * - Supports trimming (start/end time) * - Supports multiple output formats (MP3, AAC, WAV, M4A, OGG) * - Provides progress tracking during extraction * - Supports cancellation of active extraction jobs + * + * For WAV format, uses custom WAV file writer with PCM encoding. + * For other formats, uses Android MediaExtractor and MediaMuxer. */ class ExtractAudio(private val context: Context) { @@ -54,6 +57,184 @@ class ExtractAudio(private val context: Context) { onProgress: (Double) -> Unit, onComplete: (ByteArray?) -> Unit, onError: (Throwable) -> Unit + ): AudioExtractJobHandle { + return if (config.format.lowercase() == "wav") { + // WAV format requires special handling + extractToWav(config, onProgress, onComplete, onError) + } else { + // Use MediaMuxer for other formats + extractWithMuxer(config, onProgress, onComplete, onError) + } + } + + /** + * Extracts audio to WAV format using custom WAV file writer. + * + * This method properly handles both compressed and PCM audio formats: + * - Compressed formats (AAC, MP3): Uses MediaCodec to decode to PCM + * - PCM formats: Writes directly to WAV file + */ + private fun extractToWav( + config: AudioExtractConfig, + onProgress: (Double) -> Unit, + onComplete: (ByteArray?) -> Unit, + onError: (Throwable) -> Unit + ): AudioExtractJobHandle { + val shouldStop = AtomicBoolean(false) + val mainHandler = Handler(Looper.getMainLooper()) + + // Determine output file location + val outputFile = if (config.outputPath != null) { + File(config.outputPath) + } else { + File( + context.cacheDir, + "audio_output_${System.currentTimeMillis()}.wav" + ) + } + + // Run extraction in background thread + Thread { + var extractor: MediaExtractor? = null + var wavWriter: WavFileWriter? = null + + try { + // Initialize extractor + extractor = MediaExtractor() + extractor.setDataSource(config.inputPath) + + // Find audio track + val audioTrackIndex = findAudioTrack(extractor) + if (audioTrackIndex < 0) { + throw NoAudioTrackException("No audio track found in video file") + } + + val audioFormat = extractor.getTrackFormat(audioTrackIndex) + + // Get the audio track's actual time range + // Audio tracks may not start at timestamp 0 due to encoder delays + val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION) + + // Determine the actual start and end timestamps for extraction + val actualStartUs: Long + val actualEndUs: Long + + if (config.startUs != null || config.endUs != null) { + // User specified trim parameters - use them as-is + actualStartUs = config.startUs ?: 0L + actualEndUs = config.endUs ?: (actualStartUs + durationUs) + } else { + // Full extraction - need to detect the audio track's actual start time + // Use a temporary extractor to avoid track selection conflicts! + // WavFileWriter will call selectTrack() on the main extractor, + // so we must not pre-select it here + var tempExtractor: MediaExtractor? = null + try { + tempExtractor = MediaExtractor() + tempExtractor.setDataSource(config.inputPath) + tempExtractor.selectTrack(audioTrackIndex) + val firstSampleTimeUs = tempExtractor.sampleTime + + if (firstSampleTimeUs > 0) { + // Audio track has an offset (e.g., AAC encoder delay) + actualStartUs = firstSampleTimeUs + actualEndUs = firstSampleTimeUs + durationUs + } else { + // Audio track starts at or near zero + actualStartUs = 0L + actualEndUs = durationUs + } + } finally { + tempExtractor?.release() + } + } + + // Validate end time + if (actualEndUs <= actualStartUs) { + throw IllegalArgumentException("endUs must be greater than startUs") + } + + // Create WAV writer (handles both compressed and PCM audio) + wavWriter = WavFileWriter(outputFile) + + mainHandler.post { onProgress(0.0) } + + wavWriter.extractAndWrite( + extractor = extractor, + audioTrackIndex = audioTrackIndex, + startUs = actualStartUs, + endUs = actualEndUs, + onProgress = { progress -> + if (!shouldStop.get()) { + mainHandler.post { onProgress(progress) } + } + }, + shouldStop = { shouldStop.get() } + ) + + // Check if cancelled + if (shouldStop.get()) { + outputFile.delete() + throw InterruptedException("Extraction cancelled by user") + } + + extractor.release() + extractor = null + + // Read output and invoke completion callback + mainHandler.post { + try { + if (config.outputPath != null) { + // Output saved to file, return null + onComplete(null) + } else { + // Read temporary file and return bytes + val resultBytes = outputFile.readBytes() + onComplete(resultBytes) + } + } catch (e: Exception) { + onError(e) + } finally { + if (config.outputPath == null) { + outputFile.delete() + } + } + } + + } catch (e: Exception) { + Log.e(TAG, "Error extracting WAV audio: ${e.message}", e) + mainHandler.post { + onError(e) + } + // Clean up output file on error + if (outputFile.exists()) { + outputFile.delete() + } + } finally { + try { + extractor?.release() + } catch (e: Exception) { + Log.w(TAG, "Error releasing extractor: ${e.message}") + } + } + }.start() + + // Return cancellation handle + return AudioExtractJobHandle { + shouldStop.set(true) + mainHandler.removeCallbacksAndMessages(null) + // File cleanup is handled by the background thread once it detects shouldStop + } + } + + /** + * Extracts audio using MediaMuxer for non-WAV formats. + */ + private fun extractWithMuxer( + config: AudioExtractConfig, + onProgress: (Double) -> Unit, + onComplete: (ByteArray?) -> Unit, + onError: (Throwable) -> Unit ): AudioExtractJobHandle { val shouldStop = AtomicBoolean(false) val mainHandler = Handler(Looper.getMainLooper()) @@ -100,20 +281,43 @@ class ExtractAudio(private val context: Context) { val muxerTrackIndex = muxer.addTrack(audioFormat) muxer.start() - // Calculate duration and seek to start if needed + // Get the audio track's actual time range + // Audio tracks may not start at timestamp 0 due to encoder delays val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION) - val startUs = config.startUs ?: 0L - val endUs = config.endUs ?: durationUs + + // Determine the actual start and end timestamps for extraction + val actualStartUs: Long + val actualEndUs: Long + + if (config.startUs != null || config.endUs != null) { + // User specified trim parameters - use them as-is + actualStartUs = config.startUs ?: 0L + actualEndUs = config.endUs ?: (actualStartUs + durationUs) + } else { + // Full extraction - need to detect the audio track's actual start time + // Read the first sample to get the actual start timestamp + val firstSampleTimeUs = extractor.sampleTime + + if (firstSampleTimeUs > 0) { + // Audio track has an offset (e.g., AAC encoder delay) + actualStartUs = firstSampleTimeUs + actualEndUs = firstSampleTimeUs + durationUs + } else { + // Audio track starts at or near zero + actualStartUs = 0L + actualEndUs = durationUs + } + } - if (startUs > 0) { - extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + if (actualStartUs > 0) { + extractor.seekTo(actualStartUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) } // Extract and write audio samples val buffer = ByteBuffer.allocate(BUFFER_SIZE) val bufferInfo = MediaCodec.BufferInfo() - var extractedUs = startUs - val totalDurationUs = endUs - startUs + var extractedUs = actualStartUs + val totalDurationUs = actualEndUs - actualStartUs mainHandler.post { onProgress(0.0) } @@ -128,12 +332,13 @@ class ExtractAudio(private val context: Context) { val presentationTimeUs = extractor.sampleTime // Check if we've reached the end time - if (presentationTimeUs > endUs) { + if (presentationTimeUs > actualEndUs) { break } - // Adjust presentation time if we're trimming from start - bufferInfo.presentationTimeUs = presentationTimeUs - startUs + // Adjust presentation time to start at zero in the output + // This ensures extracted audio always has timestamps starting at 0 + bufferInfo.presentationTimeUs = presentationTimeUs - actualStartUs bufferInfo.size = sampleSize bufferInfo.offset = 0 @@ -150,8 +355,7 @@ class ExtractAudio(private val context: Context) { // Update progress extractedUs = presentationTimeUs - val progress = - ((extractedUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + val progress = ((extractedUs - actualStartUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) mainHandler.post { onProgress(progress) } // Advance to next sample @@ -222,9 +426,7 @@ class ExtractAudio(private val context: Context) { return AudioExtractJobHandle { shouldStop.set(true) mainHandler.removeCallbacksAndMessages(null) - if (config.outputPath == null && outputFile.exists()) { - outputFile.delete() - } + // File cleanup is handled by the background thread once it detects shouldStop } } @@ -247,7 +449,7 @@ class ExtractAudio(private val context: Context) { /** * Determines the MediaMuxer output format based on the requested audio format. * - * @param format Audio format string (mp3, aac, wav, m4a, ogg) + * @param format Audio format string (mp3, aac, m4a, ogg) * @return MediaMuxer output format constant */ private fun determineOutputFormat(format: String): Int { @@ -255,7 +457,7 @@ class ExtractAudio(private val context: Context) { "mp3" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 "aac" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 "m4a" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 - "wav" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 + "wav" -> throw IllegalArgumentException("WAV format should be handled by extractToWav()") "ogg" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_OGG "webm" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_WEBM else -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 // Default to MP4 container diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt new file mode 100644 index 00000000..417dd2ec --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/audio/WavFileWriter.kt @@ -0,0 +1,545 @@ +package ch.waio.pro_video_editor.src.features.audio + +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import java.io.File +import java.io.FileOutputStream +import java.io.RandomAccessFile +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Utility class for writing WAV audio files with proper RIFF/WAVE format. + * + * This class handles the creation of WAV files by: + * - Writing a proper RIFF/WAVE header + * - Decoding compressed audio to PCM samples + * - Writing PCM data in the correct format + * - Updating the file size fields after writing + */ +class WavFileWriter(private val outputFile: File) { + + companion object { + // Magic numbers for WAV format + // Note: These are stored in little-endian byte order (reversed) so that when + // written via a LITTLE_ENDIAN ByteBuffer, they appear correctly in the file. + private const val RIFF_HEADER = 0x46464952 // "RIFF" in little-endian + private const val WAVE_HEADER = 0x45564157 // "WAVE" in little-endian + private const val FMT_HEADER = 0x20746d66 // "fmt " in little-endian + private const val DATA_HEADER = 0x61746164 // "data" in little-endian + private const val PCM_FORMAT = 1.toShort() + private const val BUFFER_SIZE = 1024 * 1024 // 1MB buffer + } + + private var sampleRate: Int = 44100 + private var numChannels: Int = 2 + private var bitsPerSample: Int = 16 + private var isFloatPcm: Boolean = false + private var totalDataSize: Long = 0 + + /** + * Extracts audio from a video file and writes it as a WAV file. + * + * @param extractor MediaExtractor configured with the source video + * @param audioTrackIndex Index of the audio track in the extractor + * @param startUs Optional start time in microseconds + * @param endUs Optional end time in microseconds + * @param onProgress Progress callback (0.0 to 1.0) + * @param shouldStop Atomic boolean to check for cancellation + * @throws Exception if extraction or writing fails + */ + fun extractAndWrite( + extractor: MediaExtractor, + audioTrackIndex: Int, + startUs: Long = 0L, + endUs: Long = Long.MAX_VALUE, + onProgress: (Double) -> Unit = {}, + shouldStop: () -> Boolean = { false } + ) { + val audioFormat = extractor.getTrackFormat(audioTrackIndex) + + sampleRate = audioFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + numChannels = audioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + bitsPerSample = 16 // PCM 16-bit is standard + + val mime = audioFormat.getString(MediaFormat.KEY_MIME) ?: throw IllegalArgumentException("No MIME type in audio format") + + val isPcm = mime.equals("audio/raw", ignoreCase = true) || + mime.equals("audio/pcm", ignoreCase = true) + if (isPcm) { + extractPcmToWav(extractor, audioFormat, audioTrackIndex, startUs, endUs, onProgress, shouldStop) + } else { + extractAndDecodeToWav(extractor, audioFormat, audioTrackIndex, startUs, endUs, onProgress, shouldStop) + } + } + + /** + * Extracts compressed audio and decodes it to PCM WAV format. + */ + private fun extractAndDecodeToWav( + extractor: MediaExtractor, + audioFormat: MediaFormat, + audioTrackIndex: Int, + startUs: Long, + endUs: Long, + onProgress: (Double) -> Unit, + shouldStop: () -> Boolean + ) { + var decoder: MediaCodec? = null + var outputStream: FileOutputStream? = null + + try { + totalDataSize = 0 + + val mime = audioFormat.getString(MediaFormat.KEY_MIME)!! + decoder = MediaCodec.createDecoderByType(mime) + decoder.configure(audioFormat, null, null, 0) + decoder.start() + + // Get the decoder's ACTUAL output format immediately after starting + // don't rely on INFO_OUTPUT_FORMAT_CHANGED which may not be sent + val decoderOutputFormat = decoder.outputFormat + + // output parameters from decoder + sampleRate = if (decoderOutputFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + decoderOutputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } else { + audioFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } + + numChannels = if (decoderOutputFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + decoderOutputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } else { + audioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } + + // Determine bits per sample from PCM encoding + // Convert float PCM to 16-bit integer for compatibility + if (decoderOutputFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = decoderOutputFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + bitsPerSample = 16 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_8BIT -> { + bitsPerSample = 8 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for better compatibility + bitsPerSample = 16 + isFloatPcm = true + } + else -> { + bitsPerSample = 16 + isFloatPcm = false + } + } + } else { + bitsPerSample = 16 + isFloatPcm = false + } + + extractor.selectTrack(audioTrackIndex) + + if (startUs > 0) { + extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + + outputStream = FileOutputStream(outputFile) + + // Write initial WAV header with decoder output format + // We'll update the sizes at the end + writeWavHeader(outputStream, 0) + + val validEndUs = if (endUs == Long.MAX_VALUE) Long.MAX_VALUE else endUs + val totalDurationUs = if (validEndUs == Long.MAX_VALUE) Long.MAX_VALUE else (validEndUs - startUs) + var currentTimeUs = startUs + var inputEos = false + var outputEos = false + + onProgress(0.0) + + while (!outputEos && !shouldStop()) { + if (!inputEos) { + val inputBufferId = decoder.dequeueInputBuffer(10000) + if (inputBufferId >= 0) { + val decoderInputBuffer = decoder.getInputBuffer(inputBufferId)!! + decoderInputBuffer.clear() + + val sampleSize = extractor.readSampleData(decoderInputBuffer, 0) + val presentationTimeUs = extractor.sampleTime + + if (sampleSize < 0 || presentationTimeUs > validEndUs) { + // End of stream or reached end time + decoder.queueInputBuffer(inputBufferId, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM) + inputEos = true + } else { + decoder.queueInputBuffer(inputBufferId, 0, sampleSize, presentationTimeUs, 0) + extractor.advance() + currentTimeUs = presentationTimeUs + + if (totalDurationUs != Long.MAX_VALUE) { + val progress = ((currentTimeUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + onProgress(progress) + } + } + } + } + + val bufferInfo = MediaCodec.BufferInfo() + val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000) + + when { + outputBufferId == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> { + // Output format changed during decoding - update our parameters + val newOutputFormat = decoder.outputFormat + + var formatChanged = false + if (newOutputFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + val newSampleRate = newOutputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + if (newSampleRate != sampleRate) { + sampleRate = newSampleRate + formatChanged = true + } + } + if (newOutputFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + val newChannels = newOutputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + if (newChannels != numChannels) { + numChannels = newChannels + formatChanged = true + } + } + if (newOutputFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = newOutputFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + if (bitsPerSample != 16 || isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = false + formatChanged = true + } + } + AudioFormat.ENCODING_PCM_8BIT -> { + if (bitsPerSample != 8 || isFloatPcm) { + bitsPerSample = 8 + isFloatPcm = false + formatChanged = true + } + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for compatibility + if (bitsPerSample != 16 || !isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = true + formatChanged = true + } + } + else -> { + if (bitsPerSample != 16 || isFloatPcm) { + bitsPerSample = 16 + isFloatPcm = false + formatChanged = true + } + } + } + } + + // If format changed, rewrite the header + if (formatChanged) { + outputStream.flush() + RandomAccessFile(outputFile, "rw").use { raf -> + raf.seek(0L) + + val byteRate = sampleRate * numChannels * bitsPerSample / 8 + val blockAlign = (numChannels * bitsPerSample / 8).toShort() + + val headerBytes = ByteBuffer.allocate(44).apply { + // All values written in little-endian order + // Magic number constants are pre-encoded for little-endian + order(ByteOrder.LITTLE_ENDIAN) + putInt(RIFF_HEADER) + putInt(0) // Will update at end + putInt(WAVE_HEADER) + + putInt(FMT_HEADER) + putInt(16) + putShort(PCM_FORMAT) + putShort(numChannels.toShort()) + putInt(sampleRate) + putInt(byteRate) + putShort(blockAlign) + putShort(bitsPerSample.toShort()) + + putInt(DATA_HEADER) + putInt(0) // Will update at end + }.array() + + raf.write(headerBytes) + } + } + } + outputBufferId >= 0 -> { + val decoderOutputBuffer = decoder.getOutputBuffer(outputBufferId)!! + + if (bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) { + outputEos = true + } + + if (bufferInfo.size > 0) { + // Get PCM data from decoder output buffer + decoderOutputBuffer.position(bufferInfo.offset) + decoderOutputBuffer.limit(bufferInfo.offset + bufferInfo.size) + + if (isFloatPcm) { + // Convert float PCM to 16-bit integer PCM + val floatSamples = bufferInfo.size / 4 // 4 bytes per float + val int16Buffer = ByteBuffer.allocate(floatSamples * 2) // 2 bytes per int16 + int16Buffer.order(ByteOrder.LITTLE_ENDIAN) + + for (i in 0 until floatSamples) { + val floatValue = decoderOutputBuffer.float + // Clamp and convert float [-1.0, 1.0] to int16 [-32768, 32767] + val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() + int16Buffer.putShort(intValue) + } + + outputStream.write(int16Buffer.array()) + totalDataSize += int16Buffer.array().size + } else { + // Write PCM data directly (already in correct format) + val pcmData = ByteArray(bufferInfo.size) + decoderOutputBuffer.get(pcmData) + outputStream.write(pcmData) + totalDataSize += pcmData.size + } + } + + decoder.releaseOutputBuffer(outputBufferId, false) + } + } + } + + outputStream.flush() + outputStream.close() + outputStream = null + + // Update WAV header with actual sizes + updateWavHeader() + + onProgress(1.0) + + } finally { + outputStream?.close() + decoder?.stop() + decoder?.release() + } + } + + /** + * Extracts PCM audio directly without decoding. + */ + private fun extractPcmToWav( + extractor: MediaExtractor, + audioFormat: MediaFormat, + audioTrackIndex: Int, + startUs: Long, + endUs: Long, + onProgress: (Double) -> Unit, + shouldStop: () -> Boolean + ) { + var outputStream: FileOutputStream? = null + + try { + // Reset total data size for this extraction + totalDataSize = 0 + + // Determine bits per sample from the source PCM encoding + if (audioFormat.containsKey(MediaFormat.KEY_PCM_ENCODING)) { + val pcmEncoding = audioFormat.getInteger(MediaFormat.KEY_PCM_ENCODING) + when (pcmEncoding) { + AudioFormat.ENCODING_PCM_16BIT -> { + bitsPerSample = 16 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_8BIT -> { + bitsPerSample = 8 + isFloatPcm = false + } + AudioFormat.ENCODING_PCM_FLOAT -> { + // Convert float to 16-bit for better compatibility + bitsPerSample = 16 + isFloatPcm = true + } + else -> { + bitsPerSample = 16 + isFloatPcm = false + } + } + } else { + bitsPerSample = 16 + isFloatPcm = false + } + + // Select the audio track in the extractor + extractor.selectTrack(audioTrackIndex) + + if (startUs > 0) { + extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + + outputStream = FileOutputStream(outputFile) + + // Write placeholder WAV header + writeWavHeader(outputStream, 0) + + val buffer = ByteBuffer.allocate(BUFFER_SIZE) + val validEndUs = if (endUs == Long.MAX_VALUE) Long.MAX_VALUE else endUs + val totalDurationUs = if (validEndUs == Long.MAX_VALUE) Long.MAX_VALUE else (validEndUs - startUs) + var currentTimeUs = startUs + + onProgress(0.0) + + while (!shouldStop()) { + buffer.clear() + val sampleSize = extractor.readSampleData(buffer, 0) + + if (sampleSize < 0) { + // End of stream + break + } + + val presentationTimeUs = extractor.sampleTime + + if (presentationTimeUs > validEndUs) { + break + } + + buffer.position(0) + buffer.limit(sampleSize) + + val pcmData = ByteArray(sampleSize) + buffer.get(pcmData) + + if (isFloatPcm) { + // Convert float PCM to 16-bit integer PCM + val floatBuffer = ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN) + val floatSamples = pcmData.size / 4 + val int16Buffer = ByteBuffer.allocate(floatSamples * 2).order(ByteOrder.LITTLE_ENDIAN) + for (i in 0 until floatSamples) { + val floatValue = floatBuffer.float + val intValue = (floatValue.coerceIn(-1.0f, 1.0f) * 32767.0f).toInt().toShort() + int16Buffer.putShort(intValue) + } + outputStream.write(int16Buffer.array()) + totalDataSize += int16Buffer.array().size + } else { + outputStream.write(pcmData) + totalDataSize += sampleSize + } + + currentTimeUs = presentationTimeUs + if (totalDurationUs != Long.MAX_VALUE) { + val progress = ((currentTimeUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0) + onProgress(progress) + } + + extractor.advance() + } + + outputStream.flush() + outputStream.close() + outputStream = null + + // Update WAV header with actual sizes + updateWavHeader() + + onProgress(1.0) + + } finally { + outputStream?.close() + } + } + + /** + * Writes a WAV file header with RIFF/WAVE format. + * + * Note: RIFF files have a 4GB limit due to 32-bit size fields. This is a WAV format limitation. + */ + private fun writeWavHeader(outputStream: FileOutputStream, dataSize: Long) { + val header = ByteBuffer.allocate(44) + header.order(ByteOrder.LITTLE_ENDIAN) + + val byteRate = sampleRate * numChannels * bitsPerSample / 8 + val blockAlign = (numChannels * bitsPerSample / 8).toShort() + + // RIFF chunk sizes are unsigned 32-bit, so the actual WAV limit is ~4GB. + // Clamp to 0xFFFFFFFF; .toInt() gives the correct bit pattern for ByteBuffer.putInt. + val safeSizeForHeader = if (dataSize > 0xFFFFFFFFL) { + 0xFFFFFFFF.toInt() + } else { + dataSize.toInt() + } + + // RIFF header - all values written in little-endian order + // Magic number constants are pre-encoded for little-endian + header.putInt(RIFF_HEADER) // "RIFF" + header.putInt(36 + safeSizeForHeader) // File size - 8 + header.putInt(WAVE_HEADER) // "WAVE" + + // fmt sub-chunk + header.putInt(FMT_HEADER) // "fmt " + header.putInt(16) // Sub-chunk size (16 for PCM) + header.putShort(PCM_FORMAT) // Audio format (1 = PCM) + header.putShort(numChannels.toShort()) // Number of channels + header.putInt(sampleRate) // Sample rate + header.putInt(byteRate) // Byte rate + header.putShort(blockAlign) // Block align + header.putShort(bitsPerSample.toShort()) // Bits per sample + + // data sub-chunk + header.putInt(DATA_HEADER) // "data" + header.putInt(safeSizeForHeader) // Data size + + outputStream.write(header.array()) + } + + /** + * Updates the WAV header with the actual file sizes after writing is complete. + * + * Note: WAV files are limited to ~4GB due to unsigned 32-bit size fields in the RIFF spec. + * If the file exceeds this, the header size fields are clamped to 0xFFFFFFFF. + */ + private fun updateWavHeader() { + // RIFF chunk sizes are unsigned 32-bit; clamp to 0xFFFFFFFF (~4GB). + // .toInt() gives the correct bit pattern for ByteBuffer.putInt. + val safeSizeForHeader = if (totalDataSize > 0xFFFFFFFFL) { + 0xFFFFFFFF.toInt() + } else { + totalDataSize.toInt() + } + + RandomAccessFile(outputFile, "rw").use { raf -> + // Update file size at byte 4 (RIFF chunk size = 36 + data size) + // RIFF chunk size = file size - 8 bytes + raf.seek(4L) + val riffSize = 36 + safeSizeForHeader + raf.write(ByteBuffer.allocate(4).apply { + order(ByteOrder.LITTLE_ENDIAN) + putInt(riffSize) + flip() + }.array()) + + // Update data size at byte 40 (data chunk size) + raf.seek(40L) + raf.write(ByteBuffer.allocate(4).apply { + order(ByteOrder.LITTLE_ENDIAN) + putInt(safeSizeForHeader) + flip() + }.array()) + } + } +} diff --git a/example/integration_test/audio_extract_test.dart b/example/integration_test/audio_extract_test.dart index ac20ad76..727089d7 100644 --- a/example/integration_test/audio_extract_test.dart +++ b/example/integration_test/audio_extract_test.dart @@ -4,6 +4,7 @@ import 'package:flutter/foundation.dart'; import 'package:flutter/services.dart'; import 'package:flutter_test/flutter_test.dart'; import 'package:integration_test/integration_test.dart'; +import 'package:mime/mime.dart'; import 'package:path_provider/path_provider.dart'; import 'package:pro_video_editor/pro_video_editor.dart'; import 'package:pro_video_editor_example/core/constants/example_constants.dart'; @@ -18,6 +19,8 @@ void main() { // Audio extraction is not supported on Web, Windows, and Linux yet final skipPlatform = kIsWeb || isWindows || isLinux; + final pve = ProVideoEditor.instance; + /// Helper to check if a format is supported on current platform bool isFormatSupported(AudioFormat format) { if (kIsWeb) return false; @@ -38,16 +41,14 @@ void main() { testWidgets( 'extractAudio with $format returns valid audio file', (tester) async { + if (!isFormatSupported(format)) return; + final directory = await getTemporaryDirectory(); final outputPath = '${directory.path}/test_audio_${DateTime.now().millisecondsSinceEpoch}.${format.extension}'; final config = AudioExtractConfigs(video: testVideo, format: format); - - final result = await ProVideoEditor.instance.extractAudioToFile( - outputPath, - config, - ); + final result = await pve.extractAudioToFile(outputPath, config); expect(result, equals(outputPath)); @@ -59,6 +60,20 @@ void main() { reason: 'Audio file should exist at $outputPath', ); + // Use extension-based MIME detection — header-based detection is + // unreliable for MP4-container formats (AAC/M4A/MP3 on Android all + // share the same magic bytes regardless of audio content). + final mimeType = lookupMimeType(result); + // AAC on iOS/macOS is saved with a .m4a extension (the only container + // Apple supports for AAC export), so it resolves to 'audio/mp4'. + // The mime package maps .wav to 'audio/x-wav' rather than 'audio/wav'. + final expectedMimeTypes = switch (format) { + AudioFormat.aac => [format.mimeType, 'audio/mp4'], + AudioFormat.wav => [format.mimeType, 'audio/wav'], + _ => [format.mimeType], + }; + expect(expectedMimeTypes, contains(mimeType)); + // Verify file has content final fileSize = await file.length(); expect( @@ -76,6 +91,8 @@ void main() { testWidgets( 'extractAudio with $format and trimming works correctly', (tester) async { + if (!isFormatSupported(format)) return; + final directory = await getTemporaryDirectory(); final outputPath = '${directory.path}/test_audio_trimmed_${DateTime.now().millisecondsSinceEpoch}.${format.extension}'; @@ -88,10 +105,7 @@ void main() { endTime: const Duration(seconds: 10), ); - final result = await ProVideoEditor.instance.extractAudioToFile( - outputPath, - config, - ); + final result = await pve.extractAudioToFile(outputPath, config); expect(result, equals(outputPath)); @@ -111,11 +125,15 @@ void main() { greaterThan(500), reason: 'Trimmed audio should have some content', ); - expect( - fileSize, - lessThan(500000), - reason: 'Trimmed audio should be smaller than full extraction', - ); + // WAV is uncompressed — 5 seconds can be several MB depending on + // sample rate and bit depth, so only cap compressed formats. + if (format != AudioFormat.wav && format != AudioFormat.caf) { + expect( + fileSize, + lessThan(500000), + reason: 'Trimmed audio should be smaller than full extraction', + ); + } // Clean up await file.delete(); diff --git a/example/lib/features/audio/audio_extract_example_page.dart b/example/lib/features/audio/audio_extract_example_page.dart index fc28e620..15620a16 100644 --- a/example/lib/features/audio/audio_extract_example_page.dart +++ b/example/lib/features/audio/audio_extract_example_page.dart @@ -4,8 +4,10 @@ import 'dart:io'; import 'package:audioplayers/audioplayers.dart'; import 'package:flutter/foundation.dart'; import 'package:flutter/material.dart'; +import 'package:mime/mime.dart'; import 'package:path_provider/path_provider.dart'; import 'package:pro_video_editor/pro_video_editor.dart'; +import 'package:wav/wav.dart'; import '/core/constants/example_constants.dart'; @@ -122,9 +124,27 @@ class _AudioExtractExamplePageState extends State { }); if (mounted) { + var raf = File(outputPath).openSync(); + var bytes = raf.readSync(defaultMagicNumbersMaxLength); + raf.closeSync(); + + var info = lookupMimeType(outputPath, headerBytes: bytes) ?? 'unknown'; + + if (_selectedFormat == AudioFormat.wav) { + try { + var wav = Wav.read(File(outputPath).readAsBytesSync()); + info += + ' channels:${wav.channels.length}' + ' sampleRate:${wav.samplesPerSecond}' + ' format:${wav.format.name}'; + } catch (_) { + info += ' (invalid wav)'; + } + } + ScaffoldMessenger.of(context).showSnackBar( - const SnackBar( - content: Text('Audio extracted successfully!'), + SnackBar( + content: Text('Audio extracted successfully!\n$info'), backgroundColor: Colors.green, ), ); diff --git a/example/pubspec.yaml b/example/pubspec.yaml index 3176bd58..627d4799 100644 --- a/example/pubspec.yaml +++ b/example/pubspec.yaml @@ -22,6 +22,7 @@ dependencies: flutter_colorpicker: ^1.1.0 file_picker: ^10.2.1 mime: ^2.0.0 + wav: 1.5.0 path_provider: ^2.1.5 video_player: ^2.9.3 diff --git a/ios/Classes/src/features/audio/ExtractAudio.swift b/ios/Classes/src/features/audio/ExtractAudio.swift index 8fe5ff56..69f91c47 100644 --- a/ios/Classes/src/features/audio/ExtractAudio.swift +++ b/ios/Classes/src/features/audio/ExtractAudio.swift @@ -136,43 +136,75 @@ class ExtractAudio { outputFileType = .m4a } - // Create export session with audio-only preset - guard let session = AVAssetExportSession( - asset: asset, - presetName: AVAssetExportPresetPassthrough - ) else { - throw NSError( - domain: "ExtractAudio", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] - ) - } - - exportSession = session - session.outputURL = outputURL - session.outputFileType = outputFileType - // Configure to export only audio tracks let audioTracks = asset.tracks(withMediaType: .audio) guard !audioTracks.isEmpty else { throw NoAudioTrackException() } - // Apply time range if trimming is requested + // Get the actual audio track to extract + let audioTrack = audioTracks[0] + + // Determine the time range to extract + // IMPORTANT: Use the audio track's actual timeRange, not asset.duration + // Audio tracks may not start at zero due to encoding delays or sync adjustments + let sourceTimeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) let duration = CMTimeSubtract(endTime, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let startUs = config.startUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let duration = CMTimeSubtract(asset.duration, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) - session.timeRange = CMTimeRange(start: .zero, duration: endTime) + sourceTimeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + sourceTimeRange = audioTrack.timeRange + } + + // Create composition to remap timestamps to start at zero + // This ensures the extracted audio timeline starts at 0, not at the original offset + let composition = AVMutableComposition() + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "ExtractAudio", + code: -13, + userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"] + ) } + // Insert the audio track at time zero (remapping the timeline) + try compositionAudioTrack.insertTimeRange( + sourceTimeRange, + of: audioTrack, + at: .zero + ) + + // Create export session with the composition (not the original asset) + guard let session = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError( + domain: "ExtractAudio", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] + ) + } + + exportSession = session + session.outputURL = outputURL + session.outputFileType = outputFileType + + // No need to set timeRange on the session since the composition already handles it + // Start progress tracking on main thread DispatchQueue.main.async { onProgress(0.0) @@ -349,7 +381,9 @@ class ExtractAudio { } // Calculate time range - var timeRange = CMTimeRange(start: .zero, duration: asset.duration) + // Use the audio track's actual timeRange for full extraction + // Audio tracks may not start at zero due to encoding delays or sync adjustments + var timeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -360,6 +394,9 @@ class ExtractAudio { } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) timeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + timeRange = audioTrack.timeRange } // Create asset reader @@ -406,6 +443,22 @@ class ExtractAudio { let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100 let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2 + // Create audio channel layout based on number of channels + var channelLayout = AudioChannelLayout() + channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0) + channelLayout.mNumberChannelDescriptions = 0 + channelLayout.mChannelLayoutTag = switch channels { + case 1: kAudioChannelLayoutTag_Mono + case 2: kAudioChannelLayoutTag_Stereo + case 3: kAudioChannelLayoutTag_MPEG_3_0_A + case 4: kAudioChannelLayoutTag_Quadraphonic + case 5: kAudioChannelLayoutTag_MPEG_5_0_A + case 6: kAudioChannelLayoutTag_MPEG_5_1_A + case 7: kAudioChannelLayoutTag_MPEG_6_1_A + case 8: kAudioChannelLayoutTag_MPEG_7_1_A + default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels) + } + // Configure writer input for PCM WAV let writerInputSettings: [String: Any] = [ AVFormatIDKey: kAudioFormatLinearPCM, @@ -414,7 +467,8 @@ class ExtractAudio { AVLinearPCMBitDepthKey: 16, AVLinearPCMIsFloatKey: false, AVLinearPCMIsBigEndianKey: false, - AVLinearPCMIsNonInterleaved: false + AVLinearPCMIsNonInterleaved: false, + AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout.size) ] let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: writerInputSettings) @@ -446,7 +500,7 @@ class ExtractAudio { ) } - writer.startSession(atSourceTime: timeRange.start) + writer.startSession(atSourceTime: .zero) // Calculate total duration for progress let totalDuration = CMTimeGetSeconds(timeRange.duration) diff --git a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift index e20c5c4a..278e22da 100644 --- a/ios/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/ios/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -56,13 +56,14 @@ struct AudioExtractConfig { /// Returns the file extension for the output audio file based on the format. /// - /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a") + /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a", "wav") func getOutputExtension() -> String { switch format.lowercased() { case "mp3": return "mp3" case "aac": return "m4a" case "m4a": return "m4a" case "caf": return "caf" + case "wav": return "wav" default: return "m4a" } } @@ -75,6 +76,7 @@ struct AudioExtractConfig { case "mp3": return "com.apple.m4a-audio" // MP3 in M4A container case "aac": return "com.apple.m4a-audio" // AAC in M4A container case "m4a": return "com.apple.m4a-audio" // M4A container + case "wav": return "com.microsoft.waveform-audio" // WAV format default: return "com.apple.m4a-audio" } } diff --git a/lib/core/models/audio/audio_format_model.dart b/lib/core/models/audio/audio_format_model.dart index e8e06ae9..1740ffa0 100644 --- a/lib/core/models/audio/audio_format_model.dart +++ b/lib/core/models/audio/audio_format_model.dart @@ -24,7 +24,7 @@ enum AudioFormat { /// WAV format - uncompressed audio, high quality, large file size. /// Supported on: Android, iOS, macOS - wav('audio/wav'); + wav('audio/x-wav'); const AudioFormat(this.mimeType); diff --git a/macos/Classes/src/features/audio/ExtractAudio.swift b/macos/Classes/src/features/audio/ExtractAudio.swift index 91c74cad..938fa13a 100644 --- a/macos/Classes/src/features/audio/ExtractAudio.swift +++ b/macos/Classes/src/features/audio/ExtractAudio.swift @@ -136,46 +136,76 @@ class ExtractAudio { default: outputFileType = .m4a } - - // Create export session with audio-only preset - guard - let session = AVAssetExportSession( - asset: asset, - presetName: AVAssetExportPresetPassthrough - ) - else { - throw NSError( - domain: "ExtractAudio", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] - ) - } - - exportSession = session - session.outputURL = outputURL - session.outputFileType = outputFileType - + // Configure to export only audio tracks let audioTracks = asset.tracks(withMediaType: .audio) guard !audioTracks.isEmpty else { throw NoAudioTrackException() } - - // Apply time range if trimming is requested + + // Get the actual audio track to extract + let audioTrack = audioTracks[0] + + // Determine the time range to extract + // IMPORTANT: Use the audio track's actual timeRange, not asset.duration + // Audio tracks may not start at zero due to encoding delays or sync adjustments + let sourceTimeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) let duration = CMTimeSubtract(endTime, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let startUs = config.startUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let duration = CMTimeSubtract(asset.duration, startTime) - session.timeRange = CMTimeRange(start: startTime, duration: duration) + sourceTimeRange = CMTimeRange(start: startTime, duration: duration) } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) - session.timeRange = CMTimeRange(start: .zero, duration: endTime) + sourceTimeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + sourceTimeRange = audioTrack.timeRange + } + + // Create composition to remap timestamps to start at zero + // This ensures the extracted audio timeline starts at 0, not at the original offset + let composition = AVMutableComposition() + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "ExtractAudio", + code: -13, + userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"] + ) + } + + // Insert the audio track at time zero (remapping the timeline) + try compositionAudioTrack.insertTimeRange( + sourceTimeRange, + of: audioTrack, + at: .zero + ) + + // Create export session with the composition (not the original asset) + guard let session = AVAssetExportSession( + asset: composition, + presetName: AVAssetExportPresetPassthrough + ) else { + throw NSError( + domain: "ExtractAudio", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Failed to create export session"] + ) } + exportSession = session + session.outputURL = outputURL + session.outputFileType = outputFileType + + // No need to set timeRange on the session since the composition already handles it + // Start progress tracking on main thread DispatchQueue.main.async { onProgress(0.0) @@ -368,7 +398,8 @@ class ExtractAudio { } // Calculate time range - var timeRange = CMTimeRange(start: .zero, duration: asset.duration) + // Audio tracks may not start at zero due to encoding delays or sync adjustments + var timeRange: CMTimeRange if let startUs = config.startUs, let endUs = config.endUs { let startTime = CMTime(value: startUs, timescale: 1_000_000) let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -381,6 +412,9 @@ class ExtractAudio { } else if let endUs = config.endUs { let endTime = CMTime(value: endUs, timescale: 1_000_000) timeRange = CMTimeRange(start: .zero, duration: endTime) + } else { + // Use the audio track's actual time range to capture all audio data + timeRange = audioTrack.timeRange } // Create asset reader @@ -428,6 +462,22 @@ class ExtractAudio { formatDescription)?.pointee let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100 let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2 + + // Create audio channel layout based on number of channels + var channelLayout = AudioChannelLayout() + channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0) + channelLayout.mNumberChannelDescriptions = 0 + channelLayout.mChannelLayoutTag = switch channels { + case 1: kAudioChannelLayoutTag_Mono + case 2: kAudioChannelLayoutTag_Stereo + case 3: kAudioChannelLayoutTag_MPEG_3_0_A + case 4: kAudioChannelLayoutTag_Quadraphonic + case 5: kAudioChannelLayoutTag_MPEG_5_0_A + case 6: kAudioChannelLayoutTag_MPEG_5_1_A + case 7: kAudioChannelLayoutTag_MPEG_6_1_A + case 8: kAudioChannelLayoutTag_MPEG_7_1_A + default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels) + } // Configure writer input for PCM WAV let writerInputSettings: [String: Any] = [ @@ -438,6 +488,7 @@ class ExtractAudio { AVLinearPCMIsFloatKey: false, AVLinearPCMIsBigEndianKey: false, AVLinearPCMIsNonInterleaved: false, + AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout.size) ] let writerInput = AVAssetWriterInput( @@ -471,9 +522,9 @@ class ExtractAudio { userInfo: [NSLocalizedDescriptionKey: "Failed to start writing"] ) } - - writer.startSession(atSourceTime: timeRange.start) - + + writer.startSession(atSourceTime: .zero) + // Calculate total duration for progress let totalDuration = CMTimeGetSeconds(timeRange.duration) diff --git a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift index ecdc18c5..95dd671c 100644 --- a/macos/Classes/src/features/audio/models/AudioExtractConfig.swift +++ b/macos/Classes/src/features/audio/models/AudioExtractConfig.swift @@ -57,13 +57,14 @@ struct AudioExtractConfig { /// Returns the file extension for the output audio file based on the format. /// - /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a") + /// - Returns: The file extension string (e.g., "mp3", "aac", "m4a", "wav") func getOutputExtension() -> String { switch format.lowercased() { case "mp3": return "mp3" case "aac": return "m4a" case "m4a": return "m4a" case "caf": return "caf" + case "wav": return "wav" default: return "m4a" } } @@ -76,6 +77,7 @@ struct AudioExtractConfig { case "mp3": return "com.apple.m4a-audio" // MP3 in M4A container case "aac": return "com.apple.m4a-audio" // AAC in M4A container case "m4a": return "com.apple.m4a-audio" // M4A container + case "wav": return "com.microsoft.waveform-audio" // WAV format default: return "com.apple.m4a-audio" } }