Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,33 @@ jobs:

# Fail if any changes were written to any source files or generated untracked files
- run: git add -A && git diff --cached --exit-code

python-adapters:
name: python-adapters
needs: build
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Cache models
uses: actions/cache@v4
with:
path: data/whisperx-env/parakeet/*.nemo
key: ${{ runner.os }}-models-${{ hashFiles('Makefile') }}

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg

- name: Run Python Adapter Tests
run: |
# The tests require a lot of RAM and might fail on standard runners
# We'll see how it goes
make test-python-adapters
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,13 @@ test: ## Run tests using gotestsum (via go tool)
test-watch: ## Run tests in watch mode using gotestsum (via go tool)
@echo "Running tests in watch mode..."
go tool gotestsum --watch -- -v ./...

setup-python-tests: ## Set up Python environments and download models for tests
@echo "Setting up Python environments using Go setup tool..."
go run cmd/setup-adapters/main.go

test-python-adapters: setup-python-tests ## Run Python adapter tests
@echo "Running Parakeet adapter tests..."
uv run --with pytest --project data/whisperx-env/parakeet pytest internal/transcription/adapters/py/nvidia/tests/
@echo "Running PyAnnote adapter tests..."
uv run --with pytest --project data/whisperx-env/pyannote pytest internal/transcription/adapters/py/pyannote/tests/
42 changes: 4 additions & 38 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"net/http"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"

Expand All @@ -21,7 +20,6 @@
"scriberr/internal/service"
"scriberr/internal/sse"
"scriberr/internal/transcription"
"scriberr/internal/transcription/adapters"
"scriberr/internal/transcription/registry"
"scriberr/pkg/logger"
)
Expand All @@ -45,7 +43,7 @@
// @license.name MIT
// @license.url https://opensource.org/licenses/MIT

// @host localhost:8080
// @host localhost:5318
// @BasePath /api/v1

// @securityDefinitions.apikey ApiKeyAuth
Expand Down Expand Up @@ -78,7 +76,7 @@
cfg := config.Load()

// Register adapters with config-based paths
registerAdapters(cfg)
registry.RegisterStandardAdapters(cfg)

// Initialize database
logger.Startup("database", "Connecting to database")
Expand Down Expand Up @@ -113,6 +111,7 @@
logger.Startup("service", "Initializing services")
userService := service.NewUserService(userRepo, authService)
fileService := service.NewFileService()
speakerService := service.NewSpeakerService(jobRepo)

Check failure on line 114 in cmd/server/main.go

View workflow job for this annotation

GitHub Actions / basics

undefined: service.NewSpeakerService

// Initialize unified transcription processor
logger.Startup("transcription", "Initializing transcription service")
Expand Down Expand Up @@ -162,8 +161,9 @@
taskQueue,
unifiedProcessor,
quickTranscriptionService,
speakerService,
multiTrackProcessor,
broadcaster,

Check failure on line 166 in cmd/server/main.go

View workflow job for this annotation

GitHub Actions / basics

too many arguments in call to api.NewHandler
)

// Set up router
Expand Down Expand Up @@ -214,37 +214,3 @@

logger.Info("Server stopped")
}

// registerAdapters registers all transcription and diarization adapters with config-based paths
func registerAdapters(cfg *config.Config) {
logger.Info("Registering adapters with environment path", "whisperx_env", cfg.WhisperXEnv)

// Shared environment path for NVIDIA models (NeMo-based)
nvidiaEnvPath := filepath.Join(cfg.WhisperXEnv, "parakeet")

// Dedicated environment path for PyAnnote (to avoid dependency conflicts)
pyannoteEnvPath := filepath.Join(cfg.WhisperXEnv, "pyannote")

// Dedicated environment path for Voxtral (Mistral AI model)
voxtralEnvPath := filepath.Join(cfg.WhisperXEnv, "voxtral")

// Register transcription adapters
registry.RegisterTranscriptionAdapter("whisperx",
adapters.NewWhisperXAdapter(cfg.WhisperXEnv))
registry.RegisterTranscriptionAdapter("parakeet",
adapters.NewParakeetAdapter(nvidiaEnvPath))
registry.RegisterTranscriptionAdapter("canary",
adapters.NewCanaryAdapter(nvidiaEnvPath)) // Shares with Parakeet
registry.RegisterTranscriptionAdapter("voxtral",
adapters.NewVoxtralAdapter(voxtralEnvPath))
registry.RegisterTranscriptionAdapter("openai_whisper",
adapters.NewOpenAIAdapter(cfg.OpenAIAPIKey))

// Register diarization adapters
registry.RegisterDiarizationAdapter("pyannote",
adapters.NewPyAnnoteAdapter(pyannoteEnvPath)) // Dedicated environment
registry.RegisterDiarizationAdapter("sortformer",
adapters.NewSortformerAdapter(nvidiaEnvPath)) // Shares with Parakeet

logger.Info("Adapter registration complete")
}
35 changes: 35 additions & 0 deletions cmd/setup-adapters/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package main

import (
"context"
"flag"
"fmt"
"os"

"scriberr/internal/config"
"scriberr/internal/transcription/registry"
"scriberr/pkg/logger"
)

func main() {
logLevel := flag.String("log-level", "info", "Log level (debug, info, warn, error)")
flag.Parse()

logger.Init(*logLevel)
logger.Info("Starting adapter environment setup")

cfg := config.Load()

// Register all standard adapters
registry.RegisterStandardAdapters(cfg)

// Initialize all registered models synchronously
ctx := context.Background()
err := registry.GetRegistry().InitializeModelsSync(ctx)

Check failure on line 28 in cmd/setup-adapters/main.go

View workflow job for this annotation

GitHub Actions / basics

registry.GetRegistry().InitializeModelsSync undefined (type *registry.ModelRegistry has no field or method InitializeModelsSync)
if err != nil {
fmt.Fprintf(os.Stderr, "Error during adapter setup: %v\n", err)
os.Exit(1)
}

logger.Info("Adapter environment setup completed successfully")
}
19 changes: 19 additions & 0 deletions internal/transcription/adapters/base_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,20 @@ func CheckEnvironmentReady(envPath, importStatement string) bool {
}
envCacheMutex.RUnlock()

start := time.Now()
logger.Debug("Checking environment readiness", "env_path", envPath, "import", importStatement)

// Run the actual check
testCmd := exec.Command("uv", "run", "--native-tls", "--project", envPath, "python", "-c", importStatement)
ready := testCmd.Run() == nil

duration := time.Since(start)
logger.Info("Environment readiness check completed",
"env_path", envPath,
"import", importStatement,
"ready", ready,
"duration", duration.String())

// Cache the result
envCacheMutex.Lock()
envCache[cacheKey] = ready
Expand All @@ -80,6 +90,15 @@ func CheckEnvironmentReady(envPath, importStatement string) bool {
return result.(bool)
}

// EnsureEnvironment ensures an environment is fully set up using singleflight to prevent redundant work
func EnsureEnvironment(envPath string, setupFn func() error) error {
key := "setup:" + envPath
_, err, _ := requestGroup.Do(key, func() (interface{}, error) {
return nil, setupFn()
})
return err
}

// BaseAdapter provides common functionality for all model adapters
type BaseAdapter struct {
modelID string
Expand Down
4 changes: 2 additions & 2 deletions internal/transcription/adapters/canary_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func (c *CanaryAdapter) PrepareEnvironment(ctx context.Context) error {
}

// Check if environment is already ready (using cache to speed up repeated checks)
if CheckEnvironmentReady(c.envPath, "import nemo.collections.asr") {
if CheckEnvironmentReady(c.envPath, "import nemo") {
modelPath := filepath.Join(c.envPath, "canary-1b-v2.nemo")
if stat, err := os.Stat(modelPath); err == nil && stat.Size() > 1024*1024 {
logger.Info("Canary environment already ready")
Expand All @@ -184,7 +184,7 @@ func (c *CanaryAdapter) PrepareEnvironment(ctx context.Context) error {
}

// Setup environment (reuse Parakeet setup since they share the same environment)
if err := c.setupCanaryEnvironment(); err != nil {
if err := EnsureEnvironment(c.envPath, c.setupCanaryEnvironment); err != nil {
return fmt.Errorf("failed to setup Canary environment: %w", err)
}

Expand Down
15 changes: 12 additions & 3 deletions internal/transcription/adapters/parakeet_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,17 @@ func (p *ParakeetAdapter) PrepareEnvironment(ctx context.Context) error {
return fmt.Errorf("failed to create buffered script: %w", err)
}

// Copy transcription scripts (standard and buffered)
if err := p.copyTranscriptionScript(); err != nil {
return fmt.Errorf("failed to copy transcription script: %w", err)
}

if err := p.copyBufferedScript(); err != nil {
return fmt.Errorf("failed to create buffered script: %w", err)
}

// Check if environment is already ready (using cache to speed up repeated checks)
if CheckEnvironmentReady(p.envPath, "import nemo.collections.asr") {
if CheckEnvironmentReady(p.envPath, "import nemo") {
modelPath := filepath.Join(p.envPath, "parakeet-tdt-0.6b-v3.nemo")
scriptPath := filepath.Join(p.envPath, "parakeet_transcribe.py")
bufferedScriptPath := filepath.Join(p.envPath, "parakeet_transcribe_buffered.py")
Expand All @@ -158,8 +167,8 @@ func (p *ParakeetAdapter) PrepareEnvironment(ctx context.Context) error {
logger.Info("Parakeet environment not ready, setting up")
}

// Setup environment
if err := p.setupParakeetEnvironment(); err != nil {
// Setup environment (shared with other NVIDIA adapters)
if err := EnsureEnvironment(p.envPath, p.setupParakeetEnvironment); err != nil {
return fmt.Errorf("failed to setup Parakeet environment: %w", err)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,11 @@ def main():
include_confidence=args.include_confidence,
preserve_formatting=args.preserve_formatting,
)
except Exception as e:
print(f"Error during transcription: {e}")
except Exception:
import traceback
traceback.print_exc()
sys.exit(1)


if __name__ == "__main__":
main()
main()
3 changes: 1 addition & 2 deletions internal/transcription/adapters/py/nvidia/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies = [
"soundfile",
"ml-dtypes>=0.3.1,<0.5.0",
"onnx>=1.15.0,<1.18.0",
"qdrant-client",
# "pyannote.audio" # needed for sortformer or no?
]

Expand All @@ -23,12 +24,10 @@ dev = [
[tool.uv.sources]
nemo-toolkit = { git = "https://github.com/NVIDIA/NeMo.git", tag = "v2.5.3" }
torch = [
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
{ index = "pytorch-cpu", marker = "platform_machine != 'x86_64' and sys_platform != 'darwin'" },
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
]
torchaudio = [
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
{ index = "pytorch-cpu", marker = "platform_machine != 'x86_64' and sys_platform != 'darwin'" },
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
]
Expand Down
7 changes: 3 additions & 4 deletions internal/transcription/adapters/pyannote_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ func (p *PyAnnoteAdapter) PrepareEnvironment(ctx context.Context) error {
}

// Check if PyAnnote is already available (using cache to speed up repeated checks)
if CheckEnvironmentReady(p.envPath, "from pyannote.audio import Pipeline") {
if CheckEnvironmentReady(p.envPath, "import pyannote") {
logger.Info("PyAnnote already available in environment")
// Still ensure script exists
if err := p.copyDiarizationScript(); err != nil {
Expand All @@ -199,13 +199,12 @@ func (p *PyAnnoteAdapter) PrepareEnvironment(ctx context.Context) error {
}

// Create environment if it doesn't exist or is incomplete
if err := p.setupPyAnnoteEnvironment(); err != nil {
if err := EnsureEnvironment(p.envPath, p.setupPyAnnoteEnvironment); err != nil {
return fmt.Errorf("failed to setup PyAnnote environment: %w", err)
}

// Verify PyAnnote is now available
testCmd := exec.Command("uv", "run", "--native-tls", "--project", p.envPath, "python", "-c", "from pyannote.audio import Pipeline")
if testCmd.Run() != nil {
if !CheckEnvironmentReady(p.envPath, "import pyannote") {
logger.Warn("PyAnnote environment test still failed after setup")
}

Expand Down
45 changes: 45 additions & 0 deletions internal/transcription/registry/registration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package registry

import (
"path/filepath"
"scriberr/internal/config"
"scriberr/internal/transcription/adapters"
"scriberr/pkg/logger"
)

// RegisterStandardAdapters registers all built-in model adapters using the provided configuration.
// This centralizes adapter registration so it can be used by the server, CLI, and setup tools.
func RegisterStandardAdapters(cfg *config.Config) {
// Shared environment path for NVIDIA models (NeMo-based)
nvidiaEnvPath := filepath.Join(cfg.WhisperXEnv, "parakeet")

// Dedicated environment path for PyAnnote (to avoid dependency conflicts)
pyannoteEnvPath := filepath.Join(cfg.WhisperXEnv, "pyannote")

// Dedicated environment path for Voxtral (Mistral AI model)
voxtralEnvPath := filepath.Join(cfg.WhisperXEnv, "voxtral")

logger.Info("Registering standard adapters",
"nvidia_env", nvidiaEnvPath,
"pyannote_env", pyannoteEnvPath)

// Register transcription adapters
RegisterTranscriptionAdapter("parakeet",
adapters.NewParakeetAdapter(nvidiaEnvPath))
RegisterTranscriptionAdapter("canary",
adapters.NewCanaryAdapter(nvidiaEnvPath))
RegisterTranscriptionAdapter("voxtral",
adapters.NewVoxtralAdapter(voxtralEnvPath))
RegisterTranscriptionAdapter("openai_whisper",
adapters.NewOpenAIAdapter(cfg.OpenAIAPIKey))

// Register diarization adapters
RegisterDiarizationAdapter("sortformer",
adapters.NewSortformerAdapter(nvidiaEnvPath))

// PyAnnote is registered here so it's available in the setup tool and server
RegisterDiarizationAdapter("pyannote",
adapters.NewPyAnnoteAdapter(pyannoteEnvPath))

logger.Info("Standard adapter registration complete")
}
Loading