API Reference - OpenFinOps

ObservabilityHub

IMPORTANT: ObservabilityHub receives data from telemetry agents. You don't manually track costs - agents do this automatically.

Initialization

ObservabilityHub()

                            PYTHON
                            from openfinops import ObservabilityHub

# Server-side initialization
hub = ObservabilityHub()
                        

register_cluster()

register_cluster(cluster_id: str, nodes: List[str], region: str = "us-east-1")

Parameter	Type	Description
`cluster_id`	str	Unique cluster identifier
`nodes`	List[str]	List of node hostnames or IPs
`region`	str	Cloud region (default: us-east-1)

                            PYTHON
                            hub.register_cluster(
    cluster_id="gpu-cluster-1",
    nodes=["node-1", "node-2", "node-3"],
    region="us-west-2"
)
                        

get_cluster_health_summary()

get_cluster_health_summary() -> Dict

                            PYTHON
                            # Query health (populated by agents)
health = hub.get_cluster_health_summary()

for cluster_id, metrics in health.items():
    print(f"{cluster_id}: {metrics['health_status']}")
    print(f"  CPU: {metrics['avg_cpu_usage']:.1f}%")
    print(f"  GPU: {metrics['avg_gpu_usage']:.1f}%")
                        

Returns: Dictionary with cluster health metrics populated by telemetry agents

LLMObservabilityHub

Specialized monitoring for Large Language Model training and RAG pipelines.

Initialization

LLMObservabilityHub()

                            PYTHON
                            from openfinops.observability.llm_observability import LLMObservabilityHub

llm_hub = LLMObservabilityHub()
                        

collect_llm_training_metrics()

collect_llm_training_metrics(metrics: LLMTrainingMetrics)

                            PYTHON
                            from openfinops.observability.llm_observability import LLMTrainingMetrics
import time

metrics = LLMTrainingMetrics(
    run_id="llm-training-001",
    model_name="gpt-custom-7b",
    epoch=5,
    step=1000,
    training_loss=0.245,
    validation_loss=0.289,
    learning_rate=0.0001,
    gpu_memory_mb=42000,
    batch_size=32,
    throughput_samples_per_sec=128.5,
    timestamp=time.time()
)

llm_hub.collect_llm_training_metrics(metrics)
                        

get_training_summary()

get_training_summary(run_id: str) -> Dict

                            PYTHON
                            summary = llm_hub.get_training_summary("llm-training-001")

print(f"Model: {summary['model_name']}")
print(f"Total Steps: {summary['total_steps']}")
print(f"Best Loss: {summary['best_loss']:.4f}")
                        

CostObservatory

Centralized cost tracking and budget management. Receives cost data from telemetry agents.

Initialization

CostObservatory(collection_interval: float = 3600)

                            PYTHON
                            from openfinops.observability.cost_observatory import CostObservatory

cost_obs = CostObservatory()
                        

create_budget()

create_budget(budget: Budget)

                            PYTHON
                            from openfinops.observability.cost_observatory import Budget
import time

budget = Budget(
    budget_id="monthly-ai-budget",
    name="AI/ML Monthly Budget",
    amount_usd=50000.0,
    period="monthly",
    start_time=time.time(),
    scope={"provider": "aws", "tags": {"team": "ml-research"}},
    alert_threshold=0.8  # Alert at 80%
)

cost_obs.create_budget(budget)
                        

get_cost_summary()

get_cost_summary(time_range_hours: int = 24) -> Dict

                            PYTHON
                            summary = cost_obs.get_cost_summary(time_range_hours=24)

print(f"Total: ${summary['total_cost']:.2f}")

for provider, cost in summary['by_provider'].items():
    print(f"{provider}: ${cost:.2f}")

for service, cost in summary['by_service'].items():
    print(f"{service}: ${cost:.2f}")
                        

get_budget_status()

get_budget_status() -> Dict

                            PYTHON
                            status = cost_obs.get_budget_status()

for budget_id, info in status.items():
    print(f"{info['name']}:")
    print(f"  Budget: ${info['amount']:.2f}")
    print(f"  Spent: ${info['spent']:.2f}")
    print(f"  Status: {info['status']}")
                        

Dashboards

Role-based dashboard components for executives and team leads.

CFO Dashboard

CFODashboard(hub: ObservabilityHub)

                            PYTHON
                            from openfinops.dashboard import CFODashboard

cfo_dash = CFODashboard(hub)

# Generate financial report
report = cfo_dash.generate_financial_report()
print(f"Total Spend: ${report.total_spend}")
print(f"AI/ML ROI: {report.ai_ml_roi}%")
print(f"Budget Status: {report.budget_utilization}%")
                        

COO Dashboard

COODashboard(hub: ObservabilityHub)

                            PYTHON
                            from openfinops.dashboard import COODashboard

coo_dash = COODashboard(hub)

# Get operational metrics
metrics = coo_dash.get_operational_metrics()
print(f"SLA Compliance: {metrics.sla_compliance}%")
print(f"Efficiency Score: {metrics.operational_efficiency}%")
                        

Infrastructure Leader Dashboard

InfrastructureLeaderDashboard(hub: ObservabilityHub)

                            PYTHON
                            from openfinops.dashboard import InfrastructureLeaderDashboard

infra_dash = InfrastructureLeaderDashboard(hub)

# Get resource utilization
util = infra_dash.get_resource_utilization()
print(f"CPU: {util.cpu_percent}%")
print(f"Memory: {util.memory_percent}%")
print(f"Storage: {util.storage_percent}%")
                        

Telemetry Agents

Deploy agents as separate processes to automatically discover resources, collect metrics, and calculate costs.

AWSTelemetryAgent

AWSTelemetryAgent(openfinops_endpoint: str, aws_region: str)

Parameter	Type	Description
`openfinops_endpoint`	str	OpenFinOps server URL
`aws_region`	str	AWS region (e.g., 'us-west-2')

                            PYTHON
                            from agents.aws_telemetry_agent import AWSTelemetryAgent

# Initialize agent (uses boto3 credential chain)
agent = AWSTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    aws_region="us-west-2"
)

# Register with server
if agent.register_agent():
    print("✓ Agent registered")

    # Run continuous collection
    # Automatically discovers & calculates costs for:
    # - EC2 instances, EKS clusters, Lambda functions
    # - RDS databases, S3 buckets
    agent.run_continuous(interval_seconds=300)
                        

AzureTelemetryAgent

                            PYTHON
                            from agents.azure_telemetry_agent import AzureTelemetryAgent

agent = AzureTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    subscription_id="your-subscription-id"
)
agent.register_agent()
agent.run_continuous(interval_seconds=300)
                        

GCPTelemetryAgent

                            PYTHON
                            from agents.gcp_telemetry_agent import GCPTelemetryAgent

agent = GCPTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    project_id="your-project-id"
)
agent.register_agent()
agent.run_continuous(interval_seconds=300)
                        

Databricks Telemetry Agent

Collect Databricks DBU consumption, cluster costs, job execution metrics, and SQL warehouse usage.

DatabricksTelemetryAgent

DatabricksTelemetryAgent(openfinops_endpoint: str, databricks_host: str, databricks_token: str, workspace_name: str = None)

Parameter	Type	Description
`openfinops_endpoint`	str	OpenFinOps server URL (e.g., http://localhost:8080)
`databricks_host`	str	Databricks workspace URL (e.g., https://your-workspace.cloud.databricks.com)
`databricks_token`	str	Databricks personal access token
`workspace_name`	str (optional)	Workspace identifier (default: derived from host)

                            PYTHON
                            from agents.databricks_telemetry_agent import DatabricksTelemetryAgent

# Initialize agent
agent = DatabricksTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    databricks_host="https://your-workspace.cloud.databricks.com",
    databricks_token="dapi***",
    workspace_name="production"
)

# Register and run
if agent.register_agent():
    print("✓ Databricks agent registered")
    # Collect every 5 minutes
    agent.run_continuous(interval_seconds=300)
                        

Metrics Collected

Metric	Description
`cluster_metrics`	Cluster uptime, instance types, worker count, DBU consumption
`job_metrics`	Job runtime, success/failure, execution costs
`sql_warehouse_metrics`	Warehouse size, state, estimated costs

DBU Pricing

Cost Calculation: The agent automatically calculates costs based on DBU pricing:

All-Purpose Compute: $0.40/DBU
Jobs Compute: $0.15/DBU
SQL Pro: $0.55/DBU
Serverless SQL: $0.70/DBU
Delta Live Tables: $0.20-$0.30/DBU

collect_cluster_metrics()

Collect metrics from all clusters in the workspace.

collect_cluster_metrics() → Dict[str, Any]

                            PYTHON
                            # Returns cluster metrics
{
    "total_clusters": 5,
    "running_clusters": 3,
    "total_estimated_cost_usd": 145.67,
    "clusters": [
        {
            "cluster_name": "prod-cluster",
            "state": "RUNNING",
            "instance_type": "m5.xlarge",
            "num_workers": 4,
            "uptime_hours": 8.5,
            "estimated_cost_usd": 32.45
        }
    ]
}
                        

collect_job_metrics()

Collect job execution metrics and costs.

collect_job_metrics(hours_lookback: int = 24) → Dict[str, Any]

                            PYTHON
                            # Returns job metrics for last 24 hours
{
    "total_runs": 42,
    "successful_runs": 38,
    "failed_runs": 4,
    "total_estimated_cost_usd": 89.23,
    "jobs": [
        {
            "job_id": 123,
            "run_name": "ETL Pipeline",
            "state": "SUCCESS",
            "runtime_seconds": 1845,
            "estimated_cost_usd": 12.34
        }
    ]
}
                        

CLI Usage

                            BASH
                            # Install dependencies
pip install databricks-sdk requests

# Set credentials
export DATABRICKS_HOST=https://your-workspace.cloud.databricks.com
export DATABRICKS_TOKEN=dapi***

# Run agent
python agents/databricks_telemetry_agent.py \
  --openfinops-endpoint http://localhost:8080 \
  --databricks-host $DATABRICKS_HOST \
  --databricks-token $DATABRICKS_TOKEN \
  --interval 300
                        

Snowflake Telemetry Agent

Monitor Snowflake credit consumption, warehouse usage, storage costs, and query patterns.

SnowflakeTelemetryAgent

SnowflakeTelemetryAgent(openfinops_endpoint: str, snowflake_account: str, snowflake_user: str, snowflake_password: str, snowflake_warehouse: str, edition: str = 'enterprise')

Parameter	Type	Description
`openfinops_endpoint`	str	OpenFinOps server URL
`snowflake_account`	str	Snowflake account identifier (e.g., xy12345.us-east-1)
`snowflake_user`	str	Snowflake username
`snowflake_password`	str	Snowflake password
`snowflake_warehouse`	str	Warehouse name (default: COMPUTE_WH)
`edition`	str	Edition for pricing: 'standard', 'enterprise', 'business_critical'

                            PYTHON
                            from agents.snowflake_telemetry_agent import SnowflakeTelemetryAgent

# Initialize agent
agent = SnowflakeTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    snowflake_account="xy12345.us-east-1",
    snowflake_user="admin_user",
    snowflake_password="***",
    snowflake_warehouse="COMPUTE_WH",
    edition="enterprise"
)

# Register and run
if agent.register_agent():
    print("✓ Snowflake agent registered")
    agent.run_continuous(interval_seconds=300)
                        

Metrics Collected

Metric	Description
`warehouse_metrics`	Credit consumption (compute + cloud services), active hours
`storage_metrics`	Database storage, failsafe storage, total TB
`query_metrics`	Query count, execution time, data scanned
`user_attribution`	Cost breakdown by user and warehouse

Credit Pricing

Cost Calculation: Automatic cost calculation based on Snowflake edition:

Standard Edition: $2.00/credit
Enterprise Edition: $3.00/credit
Business Critical: $4.00/credit
Storage: $40/TB/month (on-demand)

collect_warehouse_metrics()

Collect warehouse credit consumption for past 24 hours.

collect_warehouse_metrics(hours_lookback: int = 24) → Dict[str, Any]

                            PYTHON
                            # Returns warehouse metrics
{
    "total_warehouses": 3,
    "total_credits_used": 145.67,
    "total_estimated_cost_usd": 437.01,
    "warehouses": [
        {
            "warehouse_name": "COMPUTE_WH",
            "total_credits_used": 89.45,
            "compute_credits": 78.23,
            "cloud_service_credits": 11.22,
            "active_hours": 18,
            "estimated_cost_usd": 268.35
        }
    ]
}
                        

collect_storage_metrics()

Collect storage usage across all databases.

collect_storage_metrics() → Dict[str, Any]

                            PYTHON
                            # Returns storage metrics
{
    "total_storage_tb": 12.456,
    "estimated_monthly_cost_usd": 498.24,
    "databases": [
        {
            "database_name": "PROD_DB",
            "current_storage_tb": 8.234,
            "database_tb": 7.123,
            "failsafe_tb": 1.111,
            "estimated_monthly_cost_usd": 329.36
        }
    ]
}
                        

CLI Usage

                            BASH
                            # Install dependencies
pip install snowflake-connector-python requests

# Set credentials
export SNOWFLAKE_USER=admin_user
export SNOWFLAKE_PASSWORD=***

# Run agent
python agents/snowflake_telemetry_agent.py \
  --openfinops-endpoint http://localhost:8080 \
  --snowflake-account xy12345.us-east-1 \
  --snowflake-warehouse COMPUTE_WH \
  --edition enterprise \
  --interval 300
                        

SaaS Services Telemetry Agent

Multi-service agent for monitoring MongoDB Atlas, Redis Cloud, GitHub Actions, DataDog, and more.

SaaSServicesTelemetryAgent

SaaSServicesTelemetryAgent(openfinops_endpoint: str, config_file: str)

Parameter	Type	Description
`openfinops_endpoint`	str	OpenFinOps server URL
`config_file`	str	Path to JSON configuration file

Configuration File

Create a JSON configuration file to enable specific services:

                            JSON
                            {
  "mongodb_atlas": {
    "enabled": true,
    "public_key": "your_public_key",
    "private_key": "your_private_key",
    "project_id": "your_project_id"
  },
  "redis_cloud": {
    "enabled": true,
    "api_key": "your_api_key",
    "secret_key": "your_secret_key",
    "account_id": "your_account_id"
  },
  "github_actions": {
    "enabled": true,
    "token": "ghp_your_token",
    "org_name": "your_organization"
  },
  "datadog": {
    "enabled": true,
    "api_key": "your_api_key",
    "app_key": "your_app_key"
  }
}
                        

Python Usage

                            PYTHON
                            from agents.saas_services_telemetry_agent import SaaSServicesTelemetryAgent

# Initialize agent with config
agent = SaaSServicesTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    config_file="saas_config.json"
)

# Register and run (collect hourly)
if agent.register_agent():
    print("✓ SaaS services agent registered")
    agent.run_continuous(interval_seconds=3600)
                        

Supported Services

Service	Metrics Collected	Cost Estimation
`mongodb_atlas`	Cluster size, replication, sharding, storage	M10: $0.08/hr, M30: $0.54/hr
`redis_cloud`	Subscriptions, databases, throughput	Based on subscription pricing
`github_actions`	Workflow minutes, billable time	Linux: $0.008/min, macOS: $0.08/min
`datadog`	Host count, usage metrics	~$15/host/month

MongoDB Atlas Metrics

                            PYTHON
                            # MongoDB Atlas metrics structure
{
    "service": "mongodb_atlas",
    "total_clusters": 3,
    "total_daily_cost_usd": 47.52,
    "clusters": [
        {
            "cluster_name": "prod-cluster",
            "instance_size": "M30",
            "num_shards": 1,
            "replication_factor": 3,
            "provider": "AWS",
            "region": "us-east-1",
            "hourly_cost_usd": 1.62,
            "daily_cost_usd": 38.88
        }
    ]
}
                        

GitHub Actions Metrics

                            PYTHON
                            # GitHub Actions metrics structure
{
    "service": "github_actions",
    "total_minutes_used": 12450,
    "included_minutes": 3000,
    "billable_minutes": 9450,
    "estimated_monthly_cost_usd": 75.60,
    "minutes_used_breakdown": {
        "UBUNTU": 8000,
        "MACOS": 1450
    }
}
                        

CLI Usage

                            BASH
                            # Create sample configuration
python agents/saas_services_telemetry_agent.py \
  --create-config saas_config.json

# Edit the configuration file with your credentials
# Then run the agent
python agents/saas_services_telemetry_agent.py \
  --openfinops-endpoint http://localhost:8080 \
  --config saas_config.json \
  --interval 3600
                        

Adding Custom Services

Extend the agent by creating custom collectors:

                            PYTHON
                            from agents.saas_services_telemetry_agent import SaaSServicesTelemetryAgent

class CustomServiceCollector:
    def __init__(self, api_key: str):
        self.api_key = api_key

    def collect_metrics(self) -> dict:
        # Your custom collection logic
        return {
            "service": "custom_service",
            "total_cost": 123.45,
            "metrics": {...}
        }

# Add to agent configuration
agent = SaaSServicesTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    config_file="config.json"
)
agent.collectors.append(("custom", CustomServiceCollector("api_key")))
agent.run_continuous(interval_seconds=3600)
                        

Server Setup & Configuration

Set up the OpenFinOps server to receive telemetry from agents and serve dashboards.

Step 1: Start the OpenFinOps Server

The server provides both web dashboards and API endpoints for telemetry ingestion.

Option A: Using CLI

                            BASH
                            # Start server on default port 8080
openfinops-dashboard

# Start on custom host/port
openfinops-dashboard --host 0.0.0.0 --port 8080

# Alternative command
openfinops-server --host 0.0.0.0 --port 8080
                        

Option B: Using Python

                            PYTHON
                            from openfinops.webui import start_server

# Start server
start_server(
    host='0.0.0.0',
    port=8080,
    debug=False
)
                        

Step 2: Verify Server is Running

                            BASH
                            # Check health endpoint
curl http://localhost:8080/api/health

# Expected response: {"status": "healthy"}

# Verify server in logs
# You should see:
# 🚀 Starting OpenFinOps Web UI Server...
#    Host: 0.0.0.0
#    Port: 8080
                        

Step 3: Configure Telemetry Endpoints

The server automatically exposes telemetry endpoints when started. Agents use these to send data.

Available API Endpoints

Endpoint	Method	Description
`/api/v1/agents/register`	POST	Agent registration endpoint
`/api/v1/telemetry/ingest`	POST	Telemetry data ingestion
`/api/health`	GET	Server health check
`/`	GET	Overview dashboard
`/dashboard/cfo`	GET	CFO executive dashboard
`/dashboard/coo`	GET	COO operational dashboard
`/dashboard/infrastructure`	GET	Infrastructure leader dashboard

Step 4: Initialize Observability Components

The server automatically initializes these components, but you can access them programmatically.

                            PYTHON
                            from openfinops import ObservabilityHub
from openfinops.observability.cost_observatory import CostObservatory
from openfinops.observability.llm_observability import LLMObservabilityHub

# Initialize components (done automatically by server)
hub = ObservabilityHub()
cost_obs = CostObservatory()
llm_hub = LLMObservabilityHub()

# These will receive data from agents automatically
# No manual configuration needed
                        

Step 5: Deploy Telemetry Agents

Once the server is running, deploy agents to start collecting data.

                            PYTHON
                            # deploy_agent.py
from agents.aws_telemetry_agent import AWSTelemetryAgent

# Point agent to your server
agent = AWSTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",  # Your server URL
    aws_region="us-west-2"
)

# Register with server
if agent.register_agent():
    print("✓ Agent registered with server")

    # Start continuous collection
    # Agent will POST to http://localhost:8080/api/v1/telemetry/ingest
    agent.run_continuous(interval_seconds=300)
                        

Complete Setup Example

                            PYTHON
                            # 1. Start server (in one terminal/process)
# openfinops-dashboard --port 8080

# 2. Deploy agent (in another terminal/process)
from agents.aws_telemetry_agent import AWSTelemetryAgent

agent = AWSTelemetryAgent(
    openfinops_endpoint="http://localhost:8080",
    aws_region="us-west-2"
)

agent.register_agent()
agent.run_continuous(interval_seconds=300)

# 3. Access dashboards in browser
# http://localhost:8080/
# http://localhost:8080/dashboard/cfo
# http://localhost:8080/dashboard/coo
                        

Production Deployment

For production, run the server with proper configuration.

                            BASH
                            # Using systemd service
# /etc/systemd/system/openfinops-server.service
[Unit]
Description=OpenFinOps Server
After=network.target

[Service]
Type=simple
User=openfinops
WorkingDirectory=/opt/openfinops
Environment="HOST=0.0.0.0"
Environment="PORT=8080"
ExecStart=/usr/bin/openfinops-dashboard --host 0.0.0.0 --port 8080
Restart=always

[Install]
WantedBy=multi-user.target

# Enable and start
sudo systemctl enable openfinops-server
sudo systemctl start openfinops-server
                        

                            DOCKER
                            # Using Docker
docker run -d \
  --name openfinops-server \
  -p 8080:8080 \
  -e HOST=0.0.0.0 \
  -e PORT=8080 \
  openfinops/server:latest

# Using Docker Compose
# docker-compose.yml
version: '3.8'
services:
  openfinops-server:
    image: openfinops/server:latest
    ports:
      - "8080:8080"
    environment:
      - HOST=0.0.0.0
      - PORT=8080
    restart: always
                        

Network Configuration

Ensure agents can reach the server on the configured port.

                            BASH
                            # Open firewall port (if needed)
sudo ufw allow 8080/tcp

# Verify server is listening
netstat -tulpn | grep 8080

# Test from agent machine
curl http://your-server-ip:8080/api/health
                        

Web UI Reference

Flask-based web server with real-time WebSocket updates.

start_server()

start_server(host: str = '0.0.0.0', port: int = 8080, debug: bool = False)

Parameter	Type	Description
`host`	str	Host to bind to (default: '0.0.0.0')
`port`	int	Port to bind to (default: 8080)
`debug`	bool	Enable debug mode (default: False)

WebSocket Real-Time Updates

The server pushes real-time updates to connected clients every 5 seconds.

                            JAVASCRIPT
                            // Client-side WebSocket connection
const socket = io('http://localhost:8080');

// Listen for cost updates
socket.on('cost_update', (data) => {
    console.log('New cost data:', data);
    updateDashboard(data);
});

// Listen for metric updates
socket.on('metrics_update', (data) => {
    console.log('New metrics:', data);
    updateCharts(data);
});
                        

Dashboard Routes

Route	Description
`/`	Overview dashboard with key metrics
`/dashboard/cfo`	CFO executive financial dashboard
`/dashboard/coo`	COO operational efficiency dashboard
`/dashboard/infrastructure`	Infrastructure leader technical dashboard