forked from cactus-compute/cactus
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
75 lines (60 loc) · 2 KB
/
example.py
File metadata and controls
75 lines (60 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
"""
Cactus Python FFI Example
Usage:
1. cactus build
2. cactus download LiquidAI/LFM2-VL-450M
3. cactus download openai/whisper-small
4. cd tools && python example.py
"""
import sys
import json
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent
PROJECT_ROOT = SCRIPT_DIR.parent
sys.path.insert(0, str(SCRIPT_DIR / "src"))
from cactus import (
cactus_init,
cactus_complete,
cactus_transcribe,
cactus_embed,
cactus_image_embed,
cactus_audio_embed,
cactus_reset,
cactus_destroy,
)
WEIGHTS_DIR = PROJECT_ROOT / "weights"
ASSETS_DIR = PROJECT_ROOT / "tests" / "assets"
# Load model
print("Loading LFM2-VL-450M...")
vlm = cactus_init(str(WEIGHTS_DIR / "lfm2-vl-450m"))
# Text completion
messages = json.dumps([{"role": "user", "content": "What is 2+2?"}])
response = cactus_complete(vlm, messages)
print("\nCompletion:")
print(json.dumps(json.loads(response), indent=2))
# Text embedding
embedding = cactus_embed(vlm, "Hello world")
print(f"\nText embedding dim: {len(embedding)}")
# Image embedding
embedding = cactus_image_embed(vlm, str(ASSETS_DIR / "test_monkey.png"))
print(f"\nImage embedding dim: {len(embedding)}")
# VLM - describe image
messages = json.dumps([{"role": "user", "content": "Describe this image", "images": [str(ASSETS_DIR / "test_monkey.png")]}])
response = cactus_complete(vlm, messages)
print("\nVLM Image Description:")
print(json.dumps(json.loads(response), indent=2))
cactus_reset(vlm)
cactus_destroy(vlm)
# Transcription
print("\nLoading whisper-small...")
whisper = cactus_init(str(WEIGHTS_DIR / "whisper-small"))
whisper_prompt = "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>"
response = cactus_transcribe(whisper, str(ASSETS_DIR / "test.wav"), prompt=whisper_prompt)
print("Transcription:")
print(json.dumps(json.loads(response), indent=2))
# Audio embedding
embedding = cactus_audio_embed(whisper, str(ASSETS_DIR / "test.wav"))
print(f"\nAudio embedding dim: {len(embedding)}")
cactus_destroy(whisper)
print("\nDone!")