supadata-ai · rafalzawadzki · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project Overview
 
-This is a Model Context Protocol (MCP) server implementation for Supadata web scraping and video transcript integration. The project provides six main tools: `supadata_transcript`, `supadata_check_transcript_status`, `supadata_scrape`, `supadata_map`, `supadata_crawl`, and `supadata_check_crawl_status` for video transcription, web scraping, URL discovery, and batch crawling operations.
+This is a Model Context Protocol (MCP) server implementation for Supadata web scraping and video transcript integration. The project provides nine main tools: `supadata_transcript`, `supadata_check_transcript_status`, `supadata_scrape`, `supadata_map`, `supadata_crawl`, `supadata_check_crawl_status`, `supadata_metadata`, `supadata_extract`, and `supadata_check_extract_status` for video transcription, web scraping, URL discovery, batch crawling, media metadata retrieval, and AI-powered structured data extraction.
 
 ## Key Commands
 
@@ -26,7 +26,7 @@ This is a Model Context Protocol (MCP) server implementation for Supadata web sc
 The server is built using the `@modelcontextprotocol/sdk` and runs on stdio transport. The main server logic is in `src/index.ts` with the following key components:
 
 - **Server Creation**: `createServer()` function creates an McpServer instance
-- **Tool Registration**: Six tools are registered with input validation using Zod schemas
+- **Tool Registration**: Nine tools are registered with input validation using Zod schemas
 - **Error Handling**: Comprehensive error handling with retry logic and exponential backoff
 - **Configuration**: Environment-based configuration with defaults
 
@@ -36,7 +36,9 @@ The server integrates with Supadata's JavaScript SDK (`@supadata/js`) and provid
 - **Web Scraping**: Single page content extraction to Markdown
 - **URL Mapping**: Website URL discovery and indexing
 - **Crawling**: Asynchronous batch crawling of multiple pages
-- **Status Checking**: Monitor crawl and transcript job progress and retrieve results
+- **Media Metadata**: Retrieve metadata from YouTube, TikTok, Instagram, and Twitter URLs
+- **Structured Extraction**: AI-powered extraction of structured data from video content
+- **Status Checking**: Monitor crawl, transcript, and extract job progress and retrieve results
 
 ### Tool Implementations
 
@@ -76,6 +78,23 @@ The server integrates with Supadata's JavaScript SDK (`@supadata/js`) and provid
 - **Output**: Job status and results (if completed)
 - **Cost**: No additional cost
 
+#### supadata_metadata
+- **Purpose**: Fetch metadata from media URLs on supported platforms
+- **Input**: `url` (string)
+- **Output**: Rich metadata object with platform, title, description, author info, engagement stats, media details, tags, and creation date
+- **Supported Platforms**: YouTube, TikTok, Instagram, Twitter
+
+#### supadata_extract
+- **Purpose**: Extract structured data from video content using AI
+- **Input**: `url` (string), `prompt` (string optional), `schema` (object optional - JSON Schema for output format)
+- **Output**: Job ID for async processing
+
+#### supadata_check_extract_status
+- **Purpose**: Check extract job status and retrieve results
+- **Input**: `id` (string - job ID from extract)
+- **Output**: Job status and extracted data (if completed)
+- **Cost**: No additional cost
+
 ## Configuration
 
 ### Required Environment Variables
@@ -100,7 +119,7 @@ The server includes robust error handling with:
 ## Testing
 
 The test suite uses Jest with TypeScript and ESM support. Tests cover:
-- All six tool implementations
+- All nine tool implementations
 - Error handling scenarios
 - Rate limiting behavior
 - Mock-based testing with `@jest/globals`

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@supadata/mcp",
-  "version": "1.1.0",
+  "version": "1.2.0",
   "description": "MCP server for Supadata video & web scraping integration. Features include YouTube, TikTok, Instagram, Twitter, and file video transcription, web scraping, batch processing and structured data extraction.",
   "type": "module",
   "bin": "./dist/index.js",
@@ -19,7 +19,7 @@
   "license": "MIT",
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.25.3",
-    "@supadata/js": "^1.3.0",
+    "@supadata/js": "^1.4.0",
     "dotenv": "^16.4.7",
     "zod": "^3.25.76"
   },

diff --git a/src/index.test.ts b/src/index.test.ts
@@ -31,9 +31,16 @@ interface MockWebService {
   getCrawlResults: jest.MockedFunction<(id: string) => Promise<any>>;
 }
 
+interface MockExtractService {
+  get: jest.MockedFunction<(params: any) => Promise<{ jobId: string }>>;
+  getResults: jest.MockedFunction<(id: string) => Promise<any>>;
+}
+
 interface MockSupadataClient {
   transcript: MockTranscriptService;
   web: MockWebService;
+  metadata: jest.MockedFunction<(params: any) => Promise<any>>;
+  extract: MockExtractService;
 }
 
 describe('Supadata Tool Tests', () => {
@@ -54,7 +61,12 @@ describe('Supadata Tool Tests', () => {
         map: jest.fn(),
         crawl: jest.fn(),
         getCrawlResults: jest.fn(),
-      }
+      },
+      metadata: jest.fn(),
+      extract: {
+        get: jest.fn(),
+        getResults: jest.fn(),
+      },
     };
 
     // Create request handler
@@ -234,6 +246,144 @@ describe('Supadata Tool Tests', () => {
     });
   });
 
+  // Test metadata functionality
+  test('should handle metadata request', async () => {
+    const url = 'https://www.youtube.com/watch?v=example';
+
+    const mockResponse = {
+      platform: 'youtube',
+      type: 'video',
+      id: 'example',
+      url: url,
+      title: 'Example Video',
+      description: 'An example video description',
+      author: {
+        username: 'examplechannel',
+        displayName: 'Example Channel',
+        avatarUrl: 'https://example.com/avatar.jpg',
+        verified: true,
+      },
+      stats: { views: 1000000, likes: 50000, comments: 3000, shares: null },
+      media: { type: 'video', url: 'https://example.com/video.mp4' },
+      tags: ['example', 'test'],
+      createdAt: '2024-01-01T00:00:00Z',
+      additionalData: {},
+    };
+
+    mockClient.metadata.mockResolvedValueOnce(mockResponse);
+
+    const response = await requestHandler({
+      method: 'call_tool',
+      params: {
+        name: 'supadata_metadata',
+        arguments: { url },
+      },
+    });
+
+    expect(response.isError).toBe(false);
+    expect(response.content[0].text).toContain('Example Video');
+    expect(response.content[0].text).toContain('youtube');
+    expect(mockClient.metadata).toHaveBeenCalledWith({ url });
+  });
+
+  // Test extract functionality
+  test('should handle extract request with prompt', async () => {
+    const url = 'https://www.youtube.com/watch?v=example';
+    const prompt = 'Extract the main topics discussed';
+
+    mockClient.extract.get.mockResolvedValueOnce({
+      jobId: 'test-extract-job-id',
+    });
+
+    const response = await requestHandler({
+      method: 'call_tool',
+      params: {
+        name: 'supadata_extract',
+        arguments: { url, prompt },
+      },
+    });
+
+    expect(response.isError).toBe(false);
+    expect(response.content[0].text).toContain('test-extract-job-id');
+    expect(mockClient.extract.get).toHaveBeenCalledWith({ url, prompt });
+  });
+
+  test('should handle extract request with schema', async () => {
+    const url = 'https://www.youtube.com/watch?v=example';
+    const schema = {
+      type: 'object',
+      properties: {
+        topics: { type: 'array', items: { type: 'string' } },
+        sentiment: { type: 'string' },
+      },
+    };
+
+    mockClient.extract.get.mockResolvedValueOnce({
+      jobId: 'test-extract-schema-job-id',
+    });
+
+    const response = await requestHandler({
+      method: 'call_tool',
+      params: {
+        name: 'supadata_extract',
+        arguments: { url, schema },
+      },
+    });
+
+    expect(response.isError).toBe(false);
+    expect(response.content[0].text).toContain('test-extract-schema-job-id');
+    expect(mockClient.extract.get).toHaveBeenCalledWith({ url, schema });
+  });
+
+  // Test check extract status functionality
+  test('should handle extract status request', async () => {
+    const id = 'test-extract-job-id';
+
+    const mockStatusResponse = {
+      status: 'completed',
+      data: { topics: ['topic1', 'topic2'], sentiment: 'positive' },
+      schema: {
+        type: 'object',
+        properties: {
+          topics: { type: 'array', items: { type: 'string' } },
+          sentiment: { type: 'string' },
+        },
+      },
+    };
+
+    mockClient.extract.getResults.mockResolvedValueOnce(mockStatusResponse);
+
+    const response = await requestHandler({
+      method: 'call_tool',
+      params: {
+        name: 'supadata_check_extract_status',
+        arguments: { id },
+      },
+    });
+
+    expect(response.isError).toBe(false);
+    expect(response.content[0].text).toContain('completed');
+    expect(response.content[0].text).toContain('topic1');
+    expect(mockClient.extract.getResults).toHaveBeenCalledWith(id);
+  });
+
+  test('should handle extract API errors', async () => {
+    const url = 'https://www.youtube.com/watch?v=example';
+
+    mockClient.extract.get.mockRejectedValueOnce(new Error('API Error'));
+
+    const response = await requestHandler({
+      method: 'call_tool',
+      params: {
+        name: 'supadata_extract',
+        arguments: { url },
+      },
+    });
+
+    expect(response.isError).toBe(true);
+    expect(response.content[0].text).toContain('API Error');
+  });
+
   // Test error handling
   test('should handle API errors', async () => {
     const url = 'https://example.com';
@@ -371,6 +521,43 @@ async function handleRequest(
         };
       }
 
+      case 'supadata_metadata': {
+        const response = await client.metadata({ url: args.url });
+        return {
+          content: [
+            { type: 'text', text: JSON.stringify(response, null, 2) },
+          ],
+          isError: false,
+        };
+      }
+
+      case 'supadata_extract': {
+        const params: any = { url: args.url };
+        if (args.prompt) params.prompt = args.prompt;
+        if (args.schema) params.schema = args.schema;
+        const response = await client.extract.get(params);
+        const jobId = response.jobId || response;
+        return {
+          content: [
+            {
+              type: 'text',
+              text: `Started extract job for ${args.url} with job ID: ${jobId}. Use supadata_check_extract_status to check progress.`,
+            },
+          ],
+          isError: false,
+        };
+      }
+
+      case 'supadata_check_extract_status': {
+        const response = await client.extract.getResults(args.id);
+        return {
+          content: [
+            { type: 'text', text: JSON.stringify(response, null, 2) },
+          ],
+          isError: false,
+        };
+      }
+
       default:
         throw new Error(`Unknown tool: ${name}`);
     }