zernel/commands/
cloud.rs

1// Copyright (C) 2026 Dyber, Inc. — Proprietary
2
3//! zernel cloud — One command GPU cluster management
4//!
5//! Launch, manage, and destroy GPU clusters across providers.
6
7use anyhow::Result;
8use clap::Subcommand;
9use std::process::Command;
10
11#[derive(Subcommand)]
12pub enum CloudCommands {
13    /// Launch a GPU cluster
14    Launch {
15        /// Number of GPUs
16        #[arg(long, default_value = "1")]
17        gpus: u32,
18        /// Cloud provider (lambda, aws, gcp, azure, vast)
19        #[arg(long, default_value = "lambda")]
20        provider: String,
21        /// Instance type (provider-specific)
22        #[arg(long)]
23        instance: Option<String>,
24        /// Region
25        #[arg(long, default_value = "us-east-1")]
26        region: String,
27    },
28    /// SSH into the running cluster
29    Ssh,
30    /// Show cluster status
31    Status,
32    /// Destroy the cluster
33    Destroy {
34        /// Skip confirmation
35        #[arg(long)]
36        yes: bool,
37    },
38    /// List available GPU instances and pricing
39    Pricing {
40        /// Provider to check
41        #[arg(long, default_value = "all")]
42        provider: String,
43    },
44}
45
46fn cloud_state_file() -> std::path::PathBuf {
47    crate::experiments::tracker::zernel_dir().join("cloud-state.json")
48}
49
50pub async fn run(cmd: CloudCommands) -> Result<()> {
51    match cmd {
52        CloudCommands::Launch {
53            gpus,
54            provider,
55            instance,
56            region,
57        } => {
58            println!("Zernel Cloud Launch");
59            println!("{}", "=".repeat(50));
60            println!("  Provider: {provider}");
61            println!("  GPUs:     {gpus}");
62            println!("  Region:   {region}");
63            println!();
64
65            match provider.as_str() {
66                "lambda" => {
67                    let instance_type = instance.unwrap_or_else(|| "gpu_1x_a100".into());
68                    println!("  Instance: {instance_type}");
69                    println!();
70                    println!("Launching via Lambda Labs API...");
71                    println!("  Requires: LAMBDA_API_KEY environment variable");
72                    println!("  Get key at: https://cloud.lambdalabs.com/api-keys");
73                    println!();
74
75                    let api_key = std::env::var("LAMBDA_API_KEY").unwrap_or_default();
76                    if api_key.is_empty() {
77                        println!("  Set your API key:");
78                        println!("    export LAMBDA_API_KEY=your-key-here");
79                        println!("    zernel cloud launch --provider lambda --gpus {gpus}");
80                        return Ok(());
81                    }
82
83                    let body = serde_json::json!({
84                        "region_name": region,
85                        "instance_type_name": instance_type,
86                        "quantity": 1,
87                    });
88
89                    println!("  Sending launch request...");
90                    let output = Command::new("curl")
91                        .args([
92                            "-s",
93                            "-X",
94                            "POST",
95                            "https://cloud.lambdalabs.com/api/v1/instance-operations/launch",
96                            "-H",
97                            &format!("Authorization: Bearer {api_key}"),
98                            "-H",
99                            "Content-Type: application/json",
100                            "-d",
101                            &body.to_string(),
102                        ])
103                        .output();
104
105                    match output {
106                        Ok(o) => {
107                            let resp = String::from_utf8_lossy(&o.stdout);
108                            println!("  Response: {resp}");
109                            // Save state
110                            std::fs::write(cloud_state_file(), resp.to_string())?;
111                        }
112                        Err(e) => println!("  Error: {e}"),
113                    }
114                }
115
116                "aws" => {
117                    let instance_type = instance.unwrap_or_else(|| "p4d.24xlarge".into());
118                    println!("  Instance: {instance_type}");
119                    println!();
120                    println!("  Run:");
121                    println!("    aws ec2 run-instances \\");
122                    println!("      --instance-type {instance_type} \\");
123                    println!("      --image-id ami-zernel \\");
124                    println!("      --region {region} \\");
125                    println!("      --key-name your-key");
126                }
127
128                "vast" => {
129                    println!("  Searching Vast.ai for {gpus}x GPU instances...");
130                    println!();
131                    println!("  Run:");
132                    println!("    vastai search offers 'gpu_name=A100 num_gpus={gpus}'");
133                    println!("    vastai create instance <offer_id> --image zernel/zernel:latest");
134                }
135
136                other => {
137                    println!("  Provider '{other}' not yet supported.");
138                    println!("  Available: lambda, aws, vast");
139                }
140            }
141        }
142
143        CloudCommands::Ssh => {
144            let state_file = cloud_state_file();
145            if state_file.exists() {
146                let state = std::fs::read_to_string(&state_file)?;
147                println!("Connecting to cluster...");
148                println!("  State: {state}");
149                println!("  Run: ssh ubuntu@<instance-ip>");
150            } else {
151                println!("No active cluster. Launch one: zernel cloud launch --gpus 8");
152            }
153        }
154
155        CloudCommands::Status => {
156            let state_file = cloud_state_file();
157            if state_file.exists() {
158                let state = std::fs::read_to_string(&state_file)?;
159                println!("Zernel Cloud Status");
160                println!("{}", "=".repeat(50));
161                println!("{state}");
162            } else {
163                println!("No active cluster.");
164            }
165        }
166
167        CloudCommands::Destroy { yes } => {
168            if !yes {
169                println!("This will destroy your GPU cluster. Add --yes to confirm.");
170                return Ok(());
171            }
172            let state_file = cloud_state_file();
173            if state_file.exists() {
174                std::fs::remove_file(&state_file)?;
175                println!("Cluster state cleared.");
176                println!(
177                    "Note: You may need to manually terminate instances in your cloud console."
178                );
179            } else {
180                println!("No active cluster to destroy.");
181            }
182        }
183
184        CloudCommands::Pricing { provider } => {
185            println!("GPU Cloud Pricing Reference");
186            println!("{}", "=".repeat(60));
187            println!();
188            println!(
189                "{:<20} {:<15} {:>10} {:>12}",
190                "Provider", "GPU", "Per Hour", "Per Month"
191            );
192            println!("{}", "-".repeat(60));
193
194            let pricing = [
195                ("Lambda Labs", "1x A100 80GB", "$1.10", "$792"),
196                ("Lambda Labs", "8x A100 80GB", "$8.80", "$6,336"),
197                ("AWS", "1x A100 (p4d)", "$3.67", "$2,642"),
198                ("AWS", "8x A100 (p4d)", "$32.77", "$23,594"),
199                ("Vast.ai", "1x A100 80GB", "$0.80-1.50", "$576-1,080"),
200                ("Vast.ai", "1x RTX 4090", "$0.20-0.40", "$144-288"),
201                ("Google Cloud", "1x A100 (a2)", "$3.67", "$2,642"),
202                ("Azure", "1x A100 (NC)", "$3.67", "$2,642"),
203                ("RunPod", "1x A100 80GB", "$1.64", "$1,181"),
204                ("Hetzner", "1x RTX 3060", "$0.07", "$50"),
205            ];
206
207            for (prov, gpu, hourly, monthly) in &pricing {
208                if provider == "all" || prov.to_lowercase().contains(&provider.to_lowercase()) {
209                    println!("{:<20} {:<15} {:>10} {:>12}", prov, gpu, hourly, monthly);
210                }
211            }
212
213            println!();
214            println!("Prices are approximate and subject to change.");
215            println!("Launch: zernel cloud launch --provider lambda --gpus 8");
216        }
217    }
218    Ok(())
219}