Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ class NoAudioTrackException(message: String) : Exception(message)
/**
* Service for extracting audio from video files.
*
* This class handles the audio extraction pipeline using Android MediaExtractor and MediaMuxer:
* This class handles the audio extraction pipeline:
* - Extracts audio track from video file
* - Supports trimming (start/end time)
* - Supports multiple output formats (MP3, AAC, WAV, M4A, OGG)
* - Provides progress tracking during extraction
* - Supports cancellation of active extraction jobs
*
* For WAV format, uses custom WAV file writer with PCM encoding.
* For other formats, uses Android MediaExtractor and MediaMuxer.
*/
class ExtractAudio(private val context: Context) {

Expand All @@ -54,6 +57,184 @@ class ExtractAudio(private val context: Context) {
onProgress: (Double) -> Unit,
onComplete: (ByteArray?) -> Unit,
onError: (Throwable) -> Unit
): AudioExtractJobHandle {
return if (config.format.lowercase() == "wav") {
// WAV format requires special handling
extractToWav(config, onProgress, onComplete, onError)
} else {
// Use MediaMuxer for other formats
extractWithMuxer(config, onProgress, onComplete, onError)
}
}

/**
* Extracts audio to WAV format using custom WAV file writer.
*
* This method properly handles both compressed and PCM audio formats:
* - Compressed formats (AAC, MP3): Uses MediaCodec to decode to PCM
* - PCM formats: Writes directly to WAV file
*/
private fun extractToWav(
config: AudioExtractConfig,
onProgress: (Double) -> Unit,
onComplete: (ByteArray?) -> Unit,
onError: (Throwable) -> Unit
): AudioExtractJobHandle {
val shouldStop = AtomicBoolean(false)
val mainHandler = Handler(Looper.getMainLooper())

// Determine output file location
val outputFile = if (config.outputPath != null) {
File(config.outputPath)
} else {
File(
context.cacheDir,
"audio_output_${System.currentTimeMillis()}.wav"
)
}

// Run extraction in background thread
Thread {
var extractor: MediaExtractor? = null
var wavWriter: WavFileWriter? = null

try {
// Initialize extractor
extractor = MediaExtractor()
extractor.setDataSource(config.inputPath)

// Find audio track
val audioTrackIndex = findAudioTrack(extractor)
if (audioTrackIndex < 0) {
throw NoAudioTrackException("No audio track found in video file")
}

val audioFormat = extractor.getTrackFormat(audioTrackIndex)

// Get the audio track's actual time range
// Audio tracks may not start at timestamp 0 due to encoder delays
val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION)

// Determine the actual start and end timestamps for extraction
val actualStartUs: Long
val actualEndUs: Long

if (config.startUs != null || config.endUs != null) {
// User specified trim parameters - use them as-is
actualStartUs = config.startUs ?: 0L
actualEndUs = config.endUs ?: (actualStartUs + durationUs)
} else {
// Full extraction - need to detect the audio track's actual start time
// Use a temporary extractor to avoid track selection conflicts!
// WavFileWriter will call selectTrack() on the main extractor,
// so we must not pre-select it here
var tempExtractor: MediaExtractor? = null
try {
tempExtractor = MediaExtractor()
tempExtractor.setDataSource(config.inputPath)
tempExtractor.selectTrack(audioTrackIndex)
val firstSampleTimeUs = tempExtractor.sampleTime

if (firstSampleTimeUs > 0) {
// Audio track has an offset (e.g., AAC encoder delay)
actualStartUs = firstSampleTimeUs
actualEndUs = firstSampleTimeUs + durationUs
} else {
// Audio track starts at or near zero
actualStartUs = 0L
actualEndUs = durationUs
}
} finally {
tempExtractor?.release()
}
}

// Validate end time
if (actualEndUs <= actualStartUs) {
throw IllegalArgumentException("endUs must be greater than startUs")
}

// Create WAV writer (handles both compressed and PCM audio)
wavWriter = WavFileWriter(outputFile)

mainHandler.post { onProgress(0.0) }

wavWriter.extractAndWrite(
extractor = extractor,
audioTrackIndex = audioTrackIndex,
startUs = actualStartUs,
endUs = actualEndUs,
onProgress = { progress ->
if (!shouldStop.get()) {
mainHandler.post { onProgress(progress) }
}
},
shouldStop = { shouldStop.get() }
)

// Check if cancelled
if (shouldStop.get()) {
outputFile.delete()
throw InterruptedException("Extraction cancelled by user")
}

extractor.release()
extractor = null

// Read output and invoke completion callback
mainHandler.post {
try {
if (config.outputPath != null) {
// Output saved to file, return null
onComplete(null)
} else {
// Read temporary file and return bytes
val resultBytes = outputFile.readBytes()
onComplete(resultBytes)
}
} catch (e: Exception) {
onError(e)
} finally {
if (config.outputPath == null) {
outputFile.delete()
}
}
}

} catch (e: Exception) {
Log.e(TAG, "Error extracting WAV audio: ${e.message}", e)
mainHandler.post {
onError(e)
}
// Clean up output file on error
if (outputFile.exists()) {
outputFile.delete()
}
} finally {
try {
extractor?.release()
} catch (e: Exception) {
Log.w(TAG, "Error releasing extractor: ${e.message}")
}
}
}.start()

// Return cancellation handle
return AudioExtractJobHandle {
shouldStop.set(true)
mainHandler.removeCallbacksAndMessages(null)
// File cleanup is handled by the background thread once it detects shouldStop
}
}

/**
* Extracts audio using MediaMuxer for non-WAV formats.
*/
private fun extractWithMuxer(
config: AudioExtractConfig,
onProgress: (Double) -> Unit,
onComplete: (ByteArray?) -> Unit,
onError: (Throwable) -> Unit
): AudioExtractJobHandle {
val shouldStop = AtomicBoolean(false)
val mainHandler = Handler(Looper.getMainLooper())
Expand Down Expand Up @@ -100,20 +281,43 @@ class ExtractAudio(private val context: Context) {
val muxerTrackIndex = muxer.addTrack(audioFormat)
muxer.start()

// Calculate duration and seek to start if needed
// Get the audio track's actual time range
// Audio tracks may not start at timestamp 0 due to encoder delays
val durationUs = audioFormat.getLong(MediaFormat.KEY_DURATION)
val startUs = config.startUs ?: 0L
val endUs = config.endUs ?: durationUs

// Determine the actual start and end timestamps for extraction
val actualStartUs: Long
val actualEndUs: Long

if (config.startUs != null || config.endUs != null) {
// User specified trim parameters - use them as-is
actualStartUs = config.startUs ?: 0L
actualEndUs = config.endUs ?: (actualStartUs + durationUs)
} else {
// Full extraction - need to detect the audio track's actual start time
// Read the first sample to get the actual start timestamp
val firstSampleTimeUs = extractor.sampleTime

if (firstSampleTimeUs > 0) {
// Audio track has an offset (e.g., AAC encoder delay)
actualStartUs = firstSampleTimeUs
actualEndUs = firstSampleTimeUs + durationUs
} else {
// Audio track starts at or near zero
actualStartUs = 0L
actualEndUs = durationUs
}
}

if (startUs > 0) {
extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
if (actualStartUs > 0) {
extractor.seekTo(actualStartUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC)
}

// Extract and write audio samples
val buffer = ByteBuffer.allocate(BUFFER_SIZE)
val bufferInfo = MediaCodec.BufferInfo()
var extractedUs = startUs
val totalDurationUs = endUs - startUs
var extractedUs = actualStartUs
val totalDurationUs = actualEndUs - actualStartUs

mainHandler.post { onProgress(0.0) }

Expand All @@ -128,12 +332,13 @@ class ExtractAudio(private val context: Context) {
val presentationTimeUs = extractor.sampleTime

// Check if we've reached the end time
if (presentationTimeUs > endUs) {
if (presentationTimeUs > actualEndUs) {
break
}

// Adjust presentation time if we're trimming from start
bufferInfo.presentationTimeUs = presentationTimeUs - startUs
// Adjust presentation time to start at zero in the output
// This ensures extracted audio always has timestamps starting at 0
bufferInfo.presentationTimeUs = presentationTimeUs - actualStartUs
bufferInfo.size = sampleSize
bufferInfo.offset = 0

Expand All @@ -150,8 +355,7 @@ class ExtractAudio(private val context: Context) {

// Update progress
extractedUs = presentationTimeUs
val progress =
((extractedUs - startUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0)
val progress = ((extractedUs - actualStartUs).toDouble() / totalDurationUs).coerceIn(0.0, 1.0)
mainHandler.post { onProgress(progress) }

// Advance to next sample
Expand Down Expand Up @@ -222,9 +426,7 @@ class ExtractAudio(private val context: Context) {
return AudioExtractJobHandle {
shouldStop.set(true)
mainHandler.removeCallbacksAndMessages(null)
if (config.outputPath == null && outputFile.exists()) {
outputFile.delete()
}
// File cleanup is handled by the background thread once it detects shouldStop
}
}

Expand All @@ -247,15 +449,15 @@ class ExtractAudio(private val context: Context) {
/**
* Determines the MediaMuxer output format based on the requested audio format.
*
* @param format Audio format string (mp3, aac, wav, m4a, ogg)
* @param format Audio format string (mp3, aac, m4a, ogg)
* @return MediaMuxer output format constant
*/
private fun determineOutputFormat(format: String): Int {
return when (format.lowercase()) {
"mp3" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4
"aac" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4
"m4a" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4
"wav" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4
"wav" -> throw IllegalArgumentException("WAV format should be handled by extractToWav()")
"ogg" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_OGG
"webm" -> MediaMuxer.OutputFormat.MUXER_OUTPUT_WEBM
else -> MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4 // Default to MP4 container
Expand Down
Loading
Loading