API Reference

Complete API documentation for OpenFinOps

ObservabilityHub

IMPORTANT: ObservabilityHub receives data from telemetry agents. You don't manually track costs - agents do this automatically.

Initialization

ObservabilityHub()
PYTHON from openfinops import ObservabilityHub # Server-side initialization hub = ObservabilityHub()

register_cluster()

register_cluster(cluster_id: str, nodes: List[str], region: str = "us-east-1")
Parameter Type Description
cluster_id str Unique cluster identifier
nodes List[str] List of node hostnames or IPs
region str Cloud region (default: us-east-1)
PYTHON hub.register_cluster( cluster_id="gpu-cluster-1", nodes=["node-1", "node-2", "node-3"], region="us-west-2" )

get_cluster_health_summary()

get_cluster_health_summary() -> Dict
PYTHON # Query health (populated by agents) health = hub.get_cluster_health_summary() for cluster_id, metrics in health.items(): print(f"{cluster_id}: {metrics['health_status']}") print(f" CPU: {metrics['avg_cpu_usage']:.1f}%") print(f" GPU: {metrics['avg_gpu_usage']:.1f}%")
Returns: Dictionary with cluster health metrics populated by telemetry agents

LLMObservabilityHub

Specialized monitoring for Large Language Model training and RAG pipelines.

Initialization

LLMObservabilityHub()
PYTHON from openfinops.observability.llm_observability import LLMObservabilityHub llm_hub = LLMObservabilityHub()

collect_llm_training_metrics()

collect_llm_training_metrics(metrics: LLMTrainingMetrics)
PYTHON from openfinops.observability.llm_observability import LLMTrainingMetrics import time metrics = LLMTrainingMetrics( run_id="llm-training-001", model_name="gpt-custom-7b", epoch=5, step=1000, training_loss=0.245, validation_loss=0.289, learning_rate=0.0001, gpu_memory_mb=42000, batch_size=32, throughput_samples_per_sec=128.5, timestamp=time.time() ) llm_hub.collect_llm_training_metrics(metrics)

get_training_summary()

get_training_summary(run_id: str) -> Dict
PYTHON summary = llm_hub.get_training_summary("llm-training-001") print(f"Model: {summary['model_name']}") print(f"Total Steps: {summary['total_steps']}") print(f"Best Loss: {summary['best_loss']:.4f}")

CostObservatory

Centralized cost tracking and budget management. Receives cost data from telemetry agents.

Initialization

CostObservatory(collection_interval: float = 3600)
PYTHON from openfinops.observability.cost_observatory import CostObservatory cost_obs = CostObservatory()

create_budget()

create_budget(budget: Budget)
PYTHON from openfinops.observability.cost_observatory import Budget import time budget = Budget( budget_id="monthly-ai-budget", name="AI/ML Monthly Budget", amount_usd=50000.0, period="monthly", start_time=time.time(), scope={"provider": "aws", "tags": {"team": "ml-research"}}, alert_threshold=0.8 # Alert at 80% ) cost_obs.create_budget(budget)

get_cost_summary()

get_cost_summary(time_range_hours: int = 24) -> Dict
PYTHON summary = cost_obs.get_cost_summary(time_range_hours=24) print(f"Total: ${summary['total_cost']:.2f}") for provider, cost in summary['by_provider'].items(): print(f"{provider}: ${cost:.2f}") for service, cost in summary['by_service'].items(): print(f"{service}: ${cost:.2f}")

get_budget_status()

get_budget_status() -> Dict
PYTHON status = cost_obs.get_budget_status() for budget_id, info in status.items(): print(f"{info['name']}:") print(f" Budget: ${info['amount']:.2f}") print(f" Spent: ${info['spent']:.2f}") print(f" Status: {info['status']}")

Dashboards

Role-based dashboard components for executives and team leads.

CFO Dashboard

CFODashboard(hub: ObservabilityHub)
PYTHON from openfinops.dashboard import CFODashboard cfo_dash = CFODashboard(hub) # Generate financial report report = cfo_dash.generate_financial_report() print(f"Total Spend: ${report.total_spend}") print(f"AI/ML ROI: {report.ai_ml_roi}%") print(f"Budget Status: {report.budget_utilization}%")

COO Dashboard

COODashboard(hub: ObservabilityHub)
PYTHON from openfinops.dashboard import COODashboard coo_dash = COODashboard(hub) # Get operational metrics metrics = coo_dash.get_operational_metrics() print(f"SLA Compliance: {metrics.sla_compliance}%") print(f"Efficiency Score: {metrics.operational_efficiency}%")

Infrastructure Leader Dashboard

InfrastructureLeaderDashboard(hub: ObservabilityHub)
PYTHON from openfinops.dashboard import InfrastructureLeaderDashboard infra_dash = InfrastructureLeaderDashboard(hub) # Get resource utilization util = infra_dash.get_resource_utilization() print(f"CPU: {util.cpu_percent}%") print(f"Memory: {util.memory_percent}%") print(f"Storage: {util.storage_percent}%")

Telemetry Agents

Deploy agents as separate processes to automatically discover resources, collect metrics, and calculate costs.

AWSTelemetryAgent

AWSTelemetryAgent(openfinops_endpoint: str, aws_region: str)
Parameter Type Description
openfinops_endpoint str OpenFinOps server URL
aws_region str AWS region (e.g., 'us-west-2')
PYTHON from agents.aws_telemetry_agent import AWSTelemetryAgent # Initialize agent (uses boto3 credential chain) agent = AWSTelemetryAgent( openfinops_endpoint="http://localhost:8080", aws_region="us-west-2" ) # Register with server if agent.register_agent(): print("✓ Agent registered") # Run continuous collection # Automatically discovers & calculates costs for: # - EC2 instances, EKS clusters, Lambda functions # - RDS databases, S3 buckets agent.run_continuous(interval_seconds=300)

AzureTelemetryAgent

PYTHON from agents.azure_telemetry_agent import AzureTelemetryAgent agent = AzureTelemetryAgent( openfinops_endpoint="http://localhost:8080", subscription_id="your-subscription-id" ) agent.register_agent() agent.run_continuous(interval_seconds=300)

GCPTelemetryAgent

PYTHON from agents.gcp_telemetry_agent import GCPTelemetryAgent agent = GCPTelemetryAgent( openfinops_endpoint="http://localhost:8080", project_id="your-project-id" ) agent.register_agent() agent.run_continuous(interval_seconds=300)

Databricks Telemetry Agent

Collect Databricks DBU consumption, cluster costs, job execution metrics, and SQL warehouse usage.

DatabricksTelemetryAgent

DatabricksTelemetryAgent(openfinops_endpoint: str, databricks_host: str, databricks_token: str, workspace_name: str = None)
Parameter Type Description
openfinops_endpoint str OpenFinOps server URL (e.g., http://localhost:8080)
databricks_host str Databricks workspace URL (e.g., https://your-workspace.cloud.databricks.com)
databricks_token str Databricks personal access token
workspace_name str (optional) Workspace identifier (default: derived from host)
PYTHON from agents.databricks_telemetry_agent import DatabricksTelemetryAgent # Initialize agent agent = DatabricksTelemetryAgent( openfinops_endpoint="http://localhost:8080", databricks_host="https://your-workspace.cloud.databricks.com", databricks_token="dapi***", workspace_name="production" ) # Register and run if agent.register_agent(): print("✓ Databricks agent registered") # Collect every 5 minutes agent.run_continuous(interval_seconds=300)

Metrics Collected

Metric Description
cluster_metrics Cluster uptime, instance types, worker count, DBU consumption
job_metrics Job runtime, success/failure, execution costs
sql_warehouse_metrics Warehouse size, state, estimated costs

DBU Pricing

Cost Calculation: The agent automatically calculates costs based on DBU pricing:

  • All-Purpose Compute: $0.40/DBU
  • Jobs Compute: $0.15/DBU
  • SQL Pro: $0.55/DBU
  • Serverless SQL: $0.70/DBU
  • Delta Live Tables: $0.20-$0.30/DBU

collect_cluster_metrics()

Collect metrics from all clusters in the workspace.

collect_cluster_metrics() → Dict[str, Any]
PYTHON # Returns cluster metrics { "total_clusters": 5, "running_clusters": 3, "total_estimated_cost_usd": 145.67, "clusters": [ { "cluster_name": "prod-cluster", "state": "RUNNING", "instance_type": "m5.xlarge", "num_workers": 4, "uptime_hours": 8.5, "estimated_cost_usd": 32.45 } ] }

collect_job_metrics()

Collect job execution metrics and costs.

collect_job_metrics(hours_lookback: int = 24) → Dict[str, Any]
PYTHON # Returns job metrics for last 24 hours { "total_runs": 42, "successful_runs": 38, "failed_runs": 4, "total_estimated_cost_usd": 89.23, "jobs": [ { "job_id": 123, "run_name": "ETL Pipeline", "state": "SUCCESS", "runtime_seconds": 1845, "estimated_cost_usd": 12.34 } ] }

CLI Usage

BASH # Install dependencies pip install databricks-sdk requests # Set credentials export DATABRICKS_HOST=https://your-workspace.cloud.databricks.com export DATABRICKS_TOKEN=dapi*** # Run agent python agents/databricks_telemetry_agent.py \ --openfinops-endpoint http://localhost:8080 \ --databricks-host $DATABRICKS_HOST \ --databricks-token $DATABRICKS_TOKEN \ --interval 300

Snowflake Telemetry Agent

Monitor Snowflake credit consumption, warehouse usage, storage costs, and query patterns.

SnowflakeTelemetryAgent

SnowflakeTelemetryAgent(openfinops_endpoint: str, snowflake_account: str, snowflake_user: str, snowflake_password: str, snowflake_warehouse: str, edition: str = 'enterprise')
Parameter Type Description
openfinops_endpoint str OpenFinOps server URL
snowflake_account str Snowflake account identifier (e.g., xy12345.us-east-1)
snowflake_user str Snowflake username
snowflake_password str Snowflake password
snowflake_warehouse str Warehouse name (default: COMPUTE_WH)
edition str Edition for pricing: 'standard', 'enterprise', 'business_critical'
PYTHON from agents.snowflake_telemetry_agent import SnowflakeTelemetryAgent # Initialize agent agent = SnowflakeTelemetryAgent( openfinops_endpoint="http://localhost:8080", snowflake_account="xy12345.us-east-1", snowflake_user="admin_user", snowflake_password="***", snowflake_warehouse="COMPUTE_WH", edition="enterprise" ) # Register and run if agent.register_agent(): print("✓ Snowflake agent registered") agent.run_continuous(interval_seconds=300)

Metrics Collected

Metric Description
warehouse_metrics Credit consumption (compute + cloud services), active hours
storage_metrics Database storage, failsafe storage, total TB
query_metrics Query count, execution time, data scanned
user_attribution Cost breakdown by user and warehouse

Credit Pricing

Cost Calculation: Automatic cost calculation based on Snowflake edition:

  • Standard Edition: $2.00/credit
  • Enterprise Edition: $3.00/credit
  • Business Critical: $4.00/credit
  • Storage: $40/TB/month (on-demand)

collect_warehouse_metrics()

Collect warehouse credit consumption for past 24 hours.

collect_warehouse_metrics(hours_lookback: int = 24) → Dict[str, Any]
PYTHON # Returns warehouse metrics { "total_warehouses": 3, "total_credits_used": 145.67, "total_estimated_cost_usd": 437.01, "warehouses": [ { "warehouse_name": "COMPUTE_WH", "total_credits_used": 89.45, "compute_credits": 78.23, "cloud_service_credits": 11.22, "active_hours": 18, "estimated_cost_usd": 268.35 } ] }

collect_storage_metrics()

Collect storage usage across all databases.

collect_storage_metrics() → Dict[str, Any]
PYTHON # Returns storage metrics { "total_storage_tb": 12.456, "estimated_monthly_cost_usd": 498.24, "databases": [ { "database_name": "PROD_DB", "current_storage_tb": 8.234, "database_tb": 7.123, "failsafe_tb": 1.111, "estimated_monthly_cost_usd": 329.36 } ] }

CLI Usage

BASH # Install dependencies pip install snowflake-connector-python requests # Set credentials export SNOWFLAKE_USER=admin_user export SNOWFLAKE_PASSWORD=*** # Run agent python agents/snowflake_telemetry_agent.py \ --openfinops-endpoint http://localhost:8080 \ --snowflake-account xy12345.us-east-1 \ --snowflake-warehouse COMPUTE_WH \ --edition enterprise \ --interval 300

SaaS Services Telemetry Agent

Multi-service agent for monitoring MongoDB Atlas, Redis Cloud, GitHub Actions, DataDog, and more.

SaaSServicesTelemetryAgent

SaaSServicesTelemetryAgent(openfinops_endpoint: str, config_file: str)
Parameter Type Description
openfinops_endpoint str OpenFinOps server URL
config_file str Path to JSON configuration file

Configuration File

Create a JSON configuration file to enable specific services:

JSON { "mongodb_atlas": { "enabled": true, "public_key": "your_public_key", "private_key": "your_private_key", "project_id": "your_project_id" }, "redis_cloud": { "enabled": true, "api_key": "your_api_key", "secret_key": "your_secret_key", "account_id": "your_account_id" }, "github_actions": { "enabled": true, "token": "ghp_your_token", "org_name": "your_organization" }, "datadog": { "enabled": true, "api_key": "your_api_key", "app_key": "your_app_key" } }

Python Usage

PYTHON from agents.saas_services_telemetry_agent import SaaSServicesTelemetryAgent # Initialize agent with config agent = SaaSServicesTelemetryAgent( openfinops_endpoint="http://localhost:8080", config_file="saas_config.json" ) # Register and run (collect hourly) if agent.register_agent(): print("✓ SaaS services agent registered") agent.run_continuous(interval_seconds=3600)

Supported Services

Service Metrics Collected Cost Estimation
mongodb_atlas Cluster size, replication, sharding, storage M10: $0.08/hr, M30: $0.54/hr
redis_cloud Subscriptions, databases, throughput Based on subscription pricing
github_actions Workflow minutes, billable time Linux: $0.008/min, macOS: $0.08/min
datadog Host count, usage metrics ~$15/host/month

MongoDB Atlas Metrics

PYTHON # MongoDB Atlas metrics structure { "service": "mongodb_atlas", "total_clusters": 3, "total_daily_cost_usd": 47.52, "clusters": [ { "cluster_name": "prod-cluster", "instance_size": "M30", "num_shards": 1, "replication_factor": 3, "provider": "AWS", "region": "us-east-1", "hourly_cost_usd": 1.62, "daily_cost_usd": 38.88 } ] }

GitHub Actions Metrics

PYTHON # GitHub Actions metrics structure { "service": "github_actions", "total_minutes_used": 12450, "included_minutes": 3000, "billable_minutes": 9450, "estimated_monthly_cost_usd": 75.60, "minutes_used_breakdown": { "UBUNTU": 8000, "MACOS": 1450 } }

CLI Usage

BASH # Create sample configuration python agents/saas_services_telemetry_agent.py \ --create-config saas_config.json # Edit the configuration file with your credentials # Then run the agent python agents/saas_services_telemetry_agent.py \ --openfinops-endpoint http://localhost:8080 \ --config saas_config.json \ --interval 3600

Adding Custom Services

Extend the agent by creating custom collectors:

PYTHON from agents.saas_services_telemetry_agent import SaaSServicesTelemetryAgent class CustomServiceCollector: def __init__(self, api_key: str): self.api_key = api_key def collect_metrics(self) -> dict: # Your custom collection logic return { "service": "custom_service", "total_cost": 123.45, "metrics": {...} } # Add to agent configuration agent = SaaSServicesTelemetryAgent( openfinops_endpoint="http://localhost:8080", config_file="config.json" ) agent.collectors.append(("custom", CustomServiceCollector("api_key"))) agent.run_continuous(interval_seconds=3600)

Server Setup & Configuration

Set up the OpenFinOps server to receive telemetry from agents and serve dashboards.

Step 1: Start the OpenFinOps Server

The server provides both web dashboards and API endpoints for telemetry ingestion.

Option A: Using CLI

BASH # Start server on default port 8080 openfinops-dashboard # Start on custom host/port openfinops-dashboard --host 0.0.0.0 --port 8080 # Alternative command openfinops-server --host 0.0.0.0 --port 8080

Option B: Using Python

PYTHON from openfinops.webui import start_server # Start server start_server( host='0.0.0.0', port=8080, debug=False )

Step 2: Verify Server is Running

BASH # Check health endpoint curl http://localhost:8080/api/health # Expected response: {"status": "healthy"} # Verify server in logs # You should see: # 🚀 Starting OpenFinOps Web UI Server... # Host: 0.0.0.0 # Port: 8080

Step 3: Configure Telemetry Endpoints

The server automatically exposes telemetry endpoints when started. Agents use these to send data.

Available API Endpoints

Endpoint Method Description
/api/v1/agents/register POST Agent registration endpoint
/api/v1/telemetry/ingest POST Telemetry data ingestion
/api/health GET Server health check
/ GET Overview dashboard
/dashboard/cfo GET CFO executive dashboard
/dashboard/coo GET COO operational dashboard
/dashboard/infrastructure GET Infrastructure leader dashboard

Step 4: Initialize Observability Components

The server automatically initializes these components, but you can access them programmatically.

PYTHON from openfinops import ObservabilityHub from openfinops.observability.cost_observatory import CostObservatory from openfinops.observability.llm_observability import LLMObservabilityHub # Initialize components (done automatically by server) hub = ObservabilityHub() cost_obs = CostObservatory() llm_hub = LLMObservabilityHub() # These will receive data from agents automatically # No manual configuration needed

Step 5: Deploy Telemetry Agents

Once the server is running, deploy agents to start collecting data.

PYTHON # deploy_agent.py from agents.aws_telemetry_agent import AWSTelemetryAgent # Point agent to your server agent = AWSTelemetryAgent( openfinops_endpoint="http://localhost:8080", # Your server URL aws_region="us-west-2" ) # Register with server if agent.register_agent(): print("✓ Agent registered with server") # Start continuous collection # Agent will POST to http://localhost:8080/api/v1/telemetry/ingest agent.run_continuous(interval_seconds=300)

Complete Setup Example

PYTHON # 1. Start server (in one terminal/process) # openfinops-dashboard --port 8080 # 2. Deploy agent (in another terminal/process) from agents.aws_telemetry_agent import AWSTelemetryAgent agent = AWSTelemetryAgent( openfinops_endpoint="http://localhost:8080", aws_region="us-west-2" ) agent.register_agent() agent.run_continuous(interval_seconds=300) # 3. Access dashboards in browser # http://localhost:8080/ # http://localhost:8080/dashboard/cfo # http://localhost:8080/dashboard/coo

Production Deployment

For production, run the server with proper configuration.

BASH # Using systemd service # /etc/systemd/system/openfinops-server.service [Unit] Description=OpenFinOps Server After=network.target [Service] Type=simple User=openfinops WorkingDirectory=/opt/openfinops Environment="HOST=0.0.0.0" Environment="PORT=8080" ExecStart=/usr/bin/openfinops-dashboard --host 0.0.0.0 --port 8080 Restart=always [Install] WantedBy=multi-user.target # Enable and start sudo systemctl enable openfinops-server sudo systemctl start openfinops-server
DOCKER # Using Docker docker run -d \ --name openfinops-server \ -p 8080:8080 \ -e HOST=0.0.0.0 \ -e PORT=8080 \ openfinops/server:latest # Using Docker Compose # docker-compose.yml version: '3.8' services: openfinops-server: image: openfinops/server:latest ports: - "8080:8080" environment: - HOST=0.0.0.0 - PORT=8080 restart: always

Network Configuration

Ensure agents can reach the server on the configured port.

BASH # Open firewall port (if needed) sudo ufw allow 8080/tcp # Verify server is listening netstat -tulpn | grep 8080 # Test from agent machine curl http://your-server-ip:8080/api/health

Web UI Reference

Flask-based web server with real-time WebSocket updates.

start_server()

start_server(host: str = '0.0.0.0', port: int = 8080, debug: bool = False)
Parameter Type Description
host str Host to bind to (default: '0.0.0.0')
port int Port to bind to (default: 8080)
debug bool Enable debug mode (default: False)

WebSocket Real-Time Updates

The server pushes real-time updates to connected clients every 5 seconds.

JAVASCRIPT // Client-side WebSocket connection const socket = io('http://localhost:8080'); // Listen for cost updates socket.on('cost_update', (data) => { console.log('New cost data:', data); updateDashboard(data); }); // Listen for metric updates socket.on('metrics_update', (data) => { console.log('New metrics:', data); updateCharts(data); });

Dashboard Routes

Route Description
/ Overview dashboard with key metrics
/dashboard/cfo CFO executive financial dashboard
/dashboard/coo COO operational efficiency dashboard
/dashboard/infrastructure Infrastructure leader technical dashboard