TypeScript SDK
The official TypeScript/JavaScript SDK for deploying and managing AI models on the Syaala Platform.
Production-Ready: All methods use real API endpoints. No mock data or fallback responses.
Installation
npm install @syaala/sdkQuick Start
Import and Initialize
import { createClient } from '@syaala/sdk'
const client = createClient(process.env.SYAALA_API_KEY!)Get User Profile
const result = await client.getProfile()
if (result.success) {
console.log('Organization:', result.data.organization.name)
console.log('Credits:', result.data.organization.creditsBalance)
}Create Deployment
const deployment = await client.deployments.create(orgId, {
name: 'llama-3-8b',
modelId: 'meta-llama/Llama-3-8B-Instruct',
runtime: 'vllm',
gpuType: 'NVIDIA_A100',
gpuCount: 1,
scaling: {
minReplicas: 0,
maxReplicas: 5
}
})Run Inference
const result = await client.inference.complete(deploymentId, {
prompt: 'Explain quantum computing in simple terms',
maxTokens: 200,
temperature: 0.7
})
if (result.success) {
console.log(result.data.text)
}Client Configuration
Basic Configuration
import { createClient } from '@syaala/sdk'
const client = createClient('sk_live_...')Advanced Configuration
import { SyaalaClient } from '@syaala/sdk'
const client = new SyaalaClient({
apiKey: process.env.SYAALA_API_KEY!,
baseUrl: 'https://api.syaala.com', // Production API
timeout: 30000, // 30 second timeout
retries: 3, // Retry failed requests
debug: false // Enable debug logging
})Configuration Options
| Option | Type | Default | Description |
|---|---|---|---|
apiKey | string | required | Your Syaala API key |
baseUrl | string | https://api.syaala.com | API base URL |
timeout | number | 30000 | Request timeout (ms) |
retries | number | 3 | Number of retry attempts |
debug | boolean | false | Enable debug logging |
Error Handling
The SDK uses the Result pattern for type-safe error handling:
import type { Result } from '@syaala/sdk'
const result = await client.deployments.list(orgId)
if (result.success) {
// TypeScript knows: result.data is available
console.log('Deployments:', result.data.data)
} else {
// TypeScript knows: result.error is a string
console.error('Error:', result.error)
}Error Types
interface APIError {
message: string
code?: string
statusCode?: number
details?: unknown
}Retry Strategy
Automatic retries for:
- 429: Rate limit errors (exponential backoff)
- 500, 502, 503, 504: Server errors
- Network errors: Connection failures
No retries for:
- 400: Bad request (invalid input)
- 401: Unauthorized (invalid API key)
- 403: Forbidden (insufficient permissions)
- 404: Not found
API Reference
Client Methods
ping()
Check API health and connectivity.
const result = await client.ping()
if (result.success) {
console.log('Status:', result.data.status)
console.log('Timestamp:', result.data.timestamp)
}Returns: Result<{ status: string; timestamp: string }>
getProfile()
Get current user profile and organization.
const result = await client.getProfile()
if (result.success) {
const profile = result.data
console.log('User ID:', profile.userId)
console.log('Display Name:', profile.displayName)
console.log('Organization:', profile.organization.name)
console.log('Credits Balance:', profile.organization.creditsBalance)
console.log('Billing Plan:', profile.organization.billingPlan)
}Returns: Result<UserProfile>
getOrganization(orgId?)
Get organization details.
const result = await client.getOrganization('org_abc123')
if (result.success) {
console.log('Members:', result.data.members)
}Parameters:
orgId(optional): Organization ID. If omitted, returns current organization.
Returns: Result<Organization>
Deployments
deployments.list(orgId, options?)
List all deployments.
const result = await client.deployments.list(orgId, {
page: 1,
limit: 20,
state: 'HEALTHY'
})
if (result.success) {
console.log('Deployments:', result.data.data)
console.log('Total:', result.data.total)
}Parameters:
orgId(required): Organization IDoptions(optional):page: Page number (default: 1)limit: Results per page (default: 20)state: Filter by state
Returns: Result<PaginatedResponse<Deployment>>
deployments.get(deploymentId)
Get deployment details.
const result = await client.deployments.get('dep_abc123')
if (result.success) {
const deployment = result.data
console.log('Name:', deployment.name)
console.log('State:', deployment.state)
console.log('Replicas:', deployment.currentReplicas)
console.log('GPU Type:', deployment.gpuType)
}Returns: Result<Deployment>
deployments.create(orgId, config)
Create a new deployment.
const result = await client.deployments.create(orgId, {
name: 'my-llm',
modelId: 'meta-llama/Llama-3-8B-Instruct',
runtime: 'vllm',
gpuType: 'NVIDIA_A100',
gpuCount: 1,
scaling: {
minReplicas: 0,
maxReplicas: 5
},
environment: {
MAX_TOKENS: '2048',
TEMPERATURE: '0.7'
}
})Parameters:
orgId(required): Organization IDconfig(required): Deployment configurationname: Deployment namemodelId: HuggingFace model IDruntime:vllm|triton|fastapigpuType: GPU type (e.g.,NVIDIA_A100)gpuCount: Number of GPUsscaling: Auto-scaling configurationenvironment: Environment variables
Returns: Result<Deployment>
deployments.update(deploymentId, updates)
Update deployment configuration.
const result = await client.deployments.update('dep_abc123', {
scaling: {
minReplicas: 2,
maxReplicas: 10
}
})Returns: Result<Deployment>
deployments.delete(deploymentId)
Delete a deployment.
const result = await client.deployments.delete('dep_abc123')
if (result.success) {
console.log('Deployment deleted')
}Returns: Result<void>
deployments.start(deploymentId)
Start a stopped deployment.
const result = await client.deployments.start('dep_abc123')Returns: Result<Deployment>
deployments.stop(deploymentId)
Stop a running deployment.
const result = await client.deployments.stop('dep_abc123')Returns: Result<Deployment>
deployments.getLogs(deploymentId, options?)
Get deployment logs.
const result = await client.deployments.getLogs('dep_abc123', {
tail: 100,
since: new Date(Date.now() - 3600000) // Last hour
})
if (result.success) {
result.data.logs.forEach(log => {
console.log(`[${log.timestamp}] ${log.message}`)
})
}Parameters:
deploymentId(required): Deployment IDoptions(optional):tail: Number of recent linessince: Show logs since timestamp
Returns: Result<{ logs: DeploymentLog[] }>
deployments.getMetrics(deploymentId, options?)
Get deployment metrics.
const result = await client.deployments.getMetrics('dep_abc123', {
period: '24h',
metric: 'gpu_util'
})
if (result.success) {
console.log('GPU Utilization:', result.data.metrics)
}Returns: Result<DeploymentMetrics>
Models
models.searchHuggingFace(query, options?)
Search HuggingFace models.
const result = await client.models.searchHuggingFace('llama', {
modelType: 'text-generation',
limit: 10,
sort: 'downloads'
})
if (result.success) {
result.data.models.forEach(model => {
console.log(model.name, '-', model.downloads)
})
}Parameters:
query(required): Search queryoptions(optional):modelType: Filter by task typelimit: Maximum resultssort: Sort bydownloads,likes, orcreated
Returns: Result<{ models: Model[] }>
models.importFromHuggingFace(orgId, modelId, config)
Import a model from HuggingFace.
const result = await client.models.importFromHuggingFace(
orgId,
'meta-llama/Llama-3-8B-Instruct',
{
name: 'Llama 3 8B Instruct',
description: 'Instruction-tuned LLaMA 3 model',
tags: ['llm', 'text-generation']
}
)Returns: Result<Model>
models.list(orgId, options?)
List imported models.
const result = await client.models.list(orgId, {
page: 1,
limit: 20
})Returns: Result<PaginatedResponse<Model>>
models.get(modelId)
Get model details.
const result = await client.models.get('model_abc123')
if (result.success) {
console.log('Model:', result.data.name)
console.log('Source:', result.data.source)
}Returns: Result<Model>
Inference
inference.complete(deploymentId, request)
Run text completion inference.
const result = await client.inference.complete('dep_abc123', {
prompt: 'Explain quantum computing in simple terms',
maxTokens: 200,
temperature: 0.7,
topP: 0.9,
stop: ['\n\n']
})
if (result.success) {
console.log('Response:', result.data.text)
console.log('Tokens used:', result.data.usage.totalTokens)
console.log('Finish reason:', result.data.finishReason)
}Parameters:
deploymentId(required): Deployment IDrequest(required):prompt: Input textmaxTokens: Maximum tokens to generatetemperature: Randomness (0-2)topP: Nucleus samplingstop: Stop sequences
Returns: Result<InferenceResponse>
inference.stream(deploymentId, request)
Stream text completion inference.
const stream = client.inference.stream('dep_abc123', {
prompt: 'Write a short story about AI',
maxTokens: 500,
stream: true
})
for await (const chunk of stream) {
if (chunk.success) {
process.stdout.write(chunk.data.text)
if (chunk.data.finishReason) {
console.log('\nDone:', chunk.data.finishReason)
break
}
} else {
console.error('Error:', chunk.error)
break
}
}Returns: AsyncGenerator<Result<StreamChunk>>
inference.batch(deploymentId, requests, options?)
Run batch inference.
const prompts = [
'What is machine learning?',
'Explain neural networks',
'What is deep learning?'
]
const requests = prompts.map(prompt => ({
prompt,
maxTokens: 100,
temperature: 0.7
}))
const result = await client.inference.batch('dep_abc123', requests, {
parallel: true,
maxConcurrency: 3
})
if (result.success) {
result.data.forEach((response, index) => {
console.log(`Response ${index + 1}:`, response.text)
})
}Returns: Result<InferenceResponse[]>
Code Examples
Complete Workflow
import { createClient } from '@syaala/sdk'
async function deployModel() {
const client = createClient(process.env.SYAALA_API_KEY!)
// 1. Get profile
const profile = await client.getProfile()
if (!profile.success) {
throw new Error('Failed to get profile')
}
const orgId = profile.data.orgId
console.log('Organization:', profile.data.organization.name)
// 2. Import model
const model = await client.models.importFromHuggingFace(
orgId,
'meta-llama/Llama-3-8B-Instruct',
{
name: 'Llama 3 8B',
description: 'Instruction-tuned model'
}
)
if (!model.success) {
throw new Error('Failed to import model')
}
// 3. Create deployment
const deployment = await client.deployments.create(orgId, {
name: 'llama-3-prod',
modelId: model.data.id,
runtime: 'vllm',
gpuType: 'NVIDIA_A100',
gpuCount: 1,
scaling: {
minReplicas: 0,
maxReplicas: 5
}
})
if (!deployment.success) {
throw new Error('Failed to create deployment')
}
console.log('Deployment created:', deployment.data.id)
// 4. Wait for deployment to be ready
let ready = false
while (!ready) {
const status = await client.deployments.get(deployment.data.id)
if (status.success && status.data.state === 'HEALTHY') {
ready = true
} else {
await new Promise(resolve => setTimeout(resolve, 5000))
}
}
// 5. Run inference
const result = await client.inference.complete(deployment.data.id, {
prompt: 'Explain quantum computing in simple terms',
maxTokens: 200,
temperature: 0.7
})
if (result.success) {
console.log('Response:', result.data.text)
}
}
deployModel().catch(console.error)Streaming with Progress
async function streamWithProgress(deploymentId: string, prompt: string) {
const stream = client.inference.stream(deploymentId, {
prompt,
maxTokens: 1000,
stream: true
})
let fullText = ''
let tokenCount = 0
for await (const chunk of stream) {
if (chunk.success) {
fullText += chunk.data.text
tokenCount++
// Update progress
process.stdout.write(`\rTokens: ${tokenCount}`)
if (chunk.data.finishReason) {
console.log('\n\nComplete!')
console.log('Full response:', fullText)
console.log('Finish reason:', chunk.data.finishReason)
break
}
} else {
console.error('\nError:', chunk.error)
break
}
}
}Error Handling with Retries
async function robustInference(deploymentId: string, prompt: string) {
let attempts = 0
const maxAttempts = 3
while (attempts < maxAttempts) {
const result = await client.inference.complete(deploymentId, {
prompt,
maxTokens: 200
})
if (result.success) {
return result.data.text
}
attempts++
console.log(`Attempt ${attempts} failed: ${result.error}`)
if (attempts < maxAttempts) {
// Exponential backoff
await new Promise(resolve =>
setTimeout(resolve, Math.pow(2, attempts) * 1000)
)
}
}
throw new Error('Failed after 3 attempts')
}TypeScript Types
All SDK methods return typed responses:
import type {
Deployment,
DeploymentConfig,
DeploymentState,
Model,
InferenceRequest,
InferenceResponse,
Result
} from '@syaala/sdk'Next Steps
- API Reference - Complete REST API documentation
- CLI Tool - Command-line interface
- Authentication Guide - Generate API keys
- Examples - More code examples