Skip to content

Monitoring

Quiver provides comprehensive monitoring capabilities to help you understand the performance and health of your vector database.

Overview

Monitoring features include:

  • Performance metrics
  • Health checks
  • Resource usage tracking
  • Operation logging
  • Prometheus integration
  • Custom metric collection

Built-in Metrics

Core Metrics

// Collect basic metrics
metrics := idx.CollectMetrics()

// Available metrics
fmt.Printf("Vector count: %d\n", metrics["vector_count"])
fmt.Printf("Batch size: %d\n", metrics["batch_size"])
fmt.Printf("Cache size: %d\n", metrics["cache_size"])
fmt.Printf("DB connections: %d\n", metrics["db_connections"])

Performance Metrics

// Search performance metrics
metrics := idx.CollectMetrics()
fmt.Printf("Average search time: %.2fms\n", metrics["avg_search_time_ms"])
fmt.Printf("Search QPS: %.2f\n", metrics["search_qps"])

Health Checks

Basic Health Check

// Verify index health
if err := idx.HealthCheck(); err != nil {
    log.Printf("Health check failed: %v", err)
}

Detailed Health Status

// Get detailed health status
status := idx.GetHealthStatus()
fmt.Printf("Index status: %s\n", status.Status)
fmt.Printf("Last backup: %s\n", status.LastBackup)
fmt.Printf("Disk usage: %.2f%%\n", status.DiskUsage)

Prometheus Integration

Metric Endpoints

// In your HTTP server setup
http.Handle("/metrics", promhttp.Handler())

Available Prometheus Metrics

# HELP quiver_vector_count Total number of vectors in the index
# TYPE quiver_vector_count gauge
quiver_vector_count 1000

# HELP quiver_search_duration_seconds Search operation duration
# TYPE quiver_search_duration_seconds histogram

# HELP quiver_batch_operations_total Total number of batch operations
# TYPE quiver_batch_operations_total counter

Resource Monitoring

Memory Usage

metrics := idx.CollectMetrics()
fmt.Printf("Memory usage: %d bytes\n", metrics["memory_usage"])
fmt.Printf("Cache memory: %d bytes\n", metrics["cache_memory"])

Disk Usage

metrics := idx.CollectMetrics()
fmt.Printf("Storage size: %d bytes\n", metrics["storage_size"])
fmt.Printf("Backup size: %d bytes\n", metrics["backup_size"])

Operation Logging

Configure Logging

config := zap.NewProductionConfig()
logger, _ := config.Build()

idx, err := quiver.New(quiver.Config{
    // ... other config
}, logger)

Log Levels

// Debug level for detailed operations
logger.Debug("Processing batch", 
    zap.Int("size", batchSize),
    zap.Duration("time", duration))

// Info level for important operations
logger.Info("Backup completed",
    zap.String("path", backupPath),
    zap.Int("vectors", vectorCount))

// Error level for issues
logger.Error("Search failed",
    zap.Error(err),
    zap.Any("query", query))

Performance Monitoring

Search Performance

// Track search latency
start := time.Now()
results, err := idx.Search(vector, k, page, pageSize)
duration := time.Since(start)

logger.Info("Search completed",
    zap.Duration("duration", duration),
    zap.Int("results", len(results)))

Batch Operations

// Monitor batch processing
metrics := idx.CollectMetrics()
fmt.Printf("Batch queue size: %d\n", metrics["batch_queue_size"])
fmt.Printf("Batch processing rate: %.2f/s\n", metrics["batch_rate"])

Custom Metrics

Define Custom Metrics

// Create custom metric collectors
var (
    customCounter = promauto.NewCounter(prometheus.CounterOpts{
        Name: "quiver_custom_operations_total",
        Help: "Total number of custom operations",
    })

    customHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
        Name: "quiver_custom_duration_seconds",
        Help: "Duration of custom operations",
    })
)

Track Custom Metrics

// Record custom metrics
customCounter.Inc()
customHistogram.Observe(duration.Seconds())

Best Practices

  1. Metric Collection
  2. Regular health checks
  3. Monitor resource usage
  4. Track performance metrics
  5. Set up alerts

  6. Log Management

  7. Structured logging
  8. Appropriate log levels
  9. Log rotation
  10. Error tracking

  11. Performance Monitoring

  12. Track latency
  13. Monitor throughput
  14. Watch resource usage
  15. Set baselines

Alerting

Configure Alerts

// Example alert configuration
type Alert struct {
    Metric    string
    Threshold float64
    Duration  time.Duration
}

alerts := []Alert{
    {
        Metric:    "search_latency_p95",
        Threshold: 100, // milliseconds
        Duration:  5 * time.Minute,
    },
    {
        Metric:    "disk_usage_percent",
        Threshold: 85, // percent
        Duration:  time.Hour,
    },
}

Next Steps