Skip to content

Scaling

This guide covers strategies and best practices for scaling Quiver to handle large datasets and high query loads.

Overview

Quiver can be scaled in several ways:

  • Vertical scaling (single instance)
  • Horizontal scaling (multiple instances)
  • Distributed deployment
  • Load balancing
  • Caching strategies

Vertical Scaling

Memory Optimization

config := quiver.Config{
    // Adjust batch size based on available memory
    BatchSize: 5000,

    // Configure cache size
    CacheSize: 1000000,

    // Optimize HNSW parameters
    HNSWM: 32,
    HNSWEfConstruct: 200,
}

Storage Optimization

config := quiver.Config{
    // Use SSD storage
    StoragePath: "/mnt/ssd/quiver",

    // Enable compression
    BackupCompression: true,

    // Enable dimensionality reduction
    EnableDimReduction: true,
    DimReductionTarget: 128,
}

Horizontal Scaling

Sharding Strategy

// Example shard configuration
type ShardConfig struct {
    ShardID     int
    TotalShards int
    Config      quiver.Config
}

// Create sharded indices
shards := make([]*quiver.Index, totalShards)
for i := 0; i < totalShards; i++ {
    config := quiver.Config{
        StoragePath: fmt.Sprintf("/path/to/shard_%d", i),
        // ... other config
    }
    shards[i], _ = quiver.New(config, logger)
}

Load Balancing

// Example load balancer
type LoadBalancer struct {
    shards    []*quiver.Index
    strategy  string // "round-robin", "least-loaded", etc.
    stats     map[int]ShardStats
}

func (lb *LoadBalancer) Route(vector []float32) *quiver.Index {
    switch lb.strategy {
    case "round-robin":
        return lb.roundRobin()
    case "least-loaded":
        return lb.leastLoaded()
    default:
        return lb.shards[0]
    }
}

Distributed Deployment

Multi-Node Setup

// Node configuration
type Node struct {
    ID       string
    Endpoint string
    Shards   []int
}

// Cluster configuration
type ClusterConfig struct {
    Nodes    []Node
    Replicas int
}

Data Replication

// Example replication strategy
func (cluster *Cluster) Replicate(vector []float32, metadata map[string]interface{}) error {
    // Primary write
    primary := cluster.getPrimaryNode()
    if err := primary.Add(vector, metadata); err != nil {
        return err
    }

    // Replicate to secondaries
    for _, secondary := range cluster.getSecondaryNodes() {
        go secondary.Add(vector, metadata)
    }
    return nil
}

Performance Optimization

Batch Processing

// Optimize batch operations
config := quiver.Config{
    BatchSize: 1000,
    // Configure batch processing interval
    BatchInterval: 100 * time.Millisecond,
}

Query Optimization

// Configure search parameters
config := quiver.Config{
    // Increase search performance
    HNSWEfSearch: 100,
    // Enable caching
    EnableQueryCache: true,
    QueryCacheSize: 10000,
}

Monitoring at Scale

Performance Metrics

// Monitor cluster health
type ClusterMetrics struct {
    NodesActive    int
    TotalVectors   int64
    QPS           float64
    Latency       time.Duration
    ErrorRate     float64
}

// Collect metrics
metrics := cluster.CollectMetrics()

Resource Usage

// Monitor resource usage per node
type NodeStats struct {
    CPU       float64
    Memory    uint64
    DiskUsage uint64
    QPS       float64
}

High Availability

Failover Configuration

// Configure high availability
config := quiver.Config{
    // Enable automatic failover
    EnableFailover: true,
    // Configure health check interval
    HealthCheckInterval: 5 * time.Second,
    // Set failover timeout
    FailoverTimeout: 30 * time.Second,
}

Recovery Procedures

// Handle node recovery
func (cluster *Cluster) RecoverNode(nodeID string) error {
    // Stop traffic to failed node
    cluster.StopTraffic(nodeID)

    // Recover data from replicas
    if err := cluster.RecoverData(nodeID); err != nil {
        return err
    }

    // Resume traffic
    return cluster.ResumeTraffic(nodeID)
}

Best Practices

  1. Capacity Planning
  2. Monitor resource usage
  3. Plan for growth
  4. Set scaling thresholds
  5. Regular performance testing

  6. Data Distribution

  7. Even shard distribution
  8. Balanced replication
  9. Geographic distribution
  10. Backup strategy

  11. Performance Optimization

  12. Regular monitoring
  13. Query optimization
  14. Resource allocation
  15. Cache management

Common Scenarios

Scaling for High QPS

// Configure for high query throughput
config := quiver.Config{
    // Optimize for search
    HNSWEfSearch: 150,
    // Enable query cache
    EnableQueryCache: true,
    // Configure connection pool
    MaxConnections: 1000,
}

Scaling for Large Datasets

// Configure for large datasets
config := quiver.Config{
    // Enable dimensionality reduction
    EnableDimReduction: true,
    // Optimize storage
    EnableCompression: true,
    // Configure sharding
    ShardCount: 10,
}

Next Steps