1use anyhow::Result;
8use clap::Subcommand;
9use std::process::Command;
10
11#[derive(Subcommand)]
12pub enum CloudCommands {
13 Launch {
15 #[arg(long, default_value = "1")]
17 gpus: u32,
18 #[arg(long, default_value = "lambda")]
20 provider: String,
21 #[arg(long)]
23 instance: Option<String>,
24 #[arg(long, default_value = "us-east-1")]
26 region: String,
27 },
28 Ssh,
30 Status,
32 Destroy {
34 #[arg(long)]
36 yes: bool,
37 },
38 Pricing {
40 #[arg(long, default_value = "all")]
42 provider: String,
43 },
44}
45
46fn cloud_state_file() -> std::path::PathBuf {
47 crate::experiments::tracker::zernel_dir().join("cloud-state.json")
48}
49
50pub async fn run(cmd: CloudCommands) -> Result<()> {
51 match cmd {
52 CloudCommands::Launch {
53 gpus,
54 provider,
55 instance,
56 region,
57 } => {
58 println!("Zernel Cloud Launch");
59 println!("{}", "=".repeat(50));
60 println!(" Provider: {provider}");
61 println!(" GPUs: {gpus}");
62 println!(" Region: {region}");
63 println!();
64
65 match provider.as_str() {
66 "lambda" => {
67 let instance_type = instance.unwrap_or_else(|| "gpu_1x_a100".into());
68 println!(" Instance: {instance_type}");
69 println!();
70 println!("Launching via Lambda Labs API...");
71 println!(" Requires: LAMBDA_API_KEY environment variable");
72 println!(" Get key at: https://cloud.lambdalabs.com/api-keys");
73 println!();
74
75 let api_key = std::env::var("LAMBDA_API_KEY").unwrap_or_default();
76 if api_key.is_empty() {
77 println!(" Set your API key:");
78 println!(" export LAMBDA_API_KEY=your-key-here");
79 println!(" zernel cloud launch --provider lambda --gpus {gpus}");
80 return Ok(());
81 }
82
83 let body = serde_json::json!({
84 "region_name": region,
85 "instance_type_name": instance_type,
86 "quantity": 1,
87 });
88
89 println!(" Sending launch request...");
90 let output = Command::new("curl")
91 .args([
92 "-s",
93 "-X",
94 "POST",
95 "https://cloud.lambdalabs.com/api/v1/instance-operations/launch",
96 "-H",
97 &format!("Authorization: Bearer {api_key}"),
98 "-H",
99 "Content-Type: application/json",
100 "-d",
101 &body.to_string(),
102 ])
103 .output();
104
105 match output {
106 Ok(o) => {
107 let resp = String::from_utf8_lossy(&o.stdout);
108 println!(" Response: {resp}");
109 std::fs::write(cloud_state_file(), resp.to_string())?;
111 }
112 Err(e) => println!(" Error: {e}"),
113 }
114 }
115
116 "aws" => {
117 let instance_type = instance.unwrap_or_else(|| "p4d.24xlarge".into());
118 println!(" Instance: {instance_type}");
119 println!();
120 println!(" Run:");
121 println!(" aws ec2 run-instances \\");
122 println!(" --instance-type {instance_type} \\");
123 println!(" --image-id ami-zernel \\");
124 println!(" --region {region} \\");
125 println!(" --key-name your-key");
126 }
127
128 "vast" => {
129 println!(" Searching Vast.ai for {gpus}x GPU instances...");
130 println!();
131 println!(" Run:");
132 println!(" vastai search offers 'gpu_name=A100 num_gpus={gpus}'");
133 println!(" vastai create instance <offer_id> --image zernel/zernel:latest");
134 }
135
136 other => {
137 println!(" Provider '{other}' not yet supported.");
138 println!(" Available: lambda, aws, vast");
139 }
140 }
141 }
142
143 CloudCommands::Ssh => {
144 let state_file = cloud_state_file();
145 if state_file.exists() {
146 let state = std::fs::read_to_string(&state_file)?;
147 println!("Connecting to cluster...");
148 println!(" State: {state}");
149 println!(" Run: ssh ubuntu@<instance-ip>");
150 } else {
151 println!("No active cluster. Launch one: zernel cloud launch --gpus 8");
152 }
153 }
154
155 CloudCommands::Status => {
156 let state_file = cloud_state_file();
157 if state_file.exists() {
158 let state = std::fs::read_to_string(&state_file)?;
159 println!("Zernel Cloud Status");
160 println!("{}", "=".repeat(50));
161 println!("{state}");
162 } else {
163 println!("No active cluster.");
164 }
165 }
166
167 CloudCommands::Destroy { yes } => {
168 if !yes {
169 println!("This will destroy your GPU cluster. Add --yes to confirm.");
170 return Ok(());
171 }
172 let state_file = cloud_state_file();
173 if state_file.exists() {
174 std::fs::remove_file(&state_file)?;
175 println!("Cluster state cleared.");
176 println!(
177 "Note: You may need to manually terminate instances in your cloud console."
178 );
179 } else {
180 println!("No active cluster to destroy.");
181 }
182 }
183
184 CloudCommands::Pricing { provider } => {
185 println!("GPU Cloud Pricing Reference");
186 println!("{}", "=".repeat(60));
187 println!();
188 println!(
189 "{:<20} {:<15} {:>10} {:>12}",
190 "Provider", "GPU", "Per Hour", "Per Month"
191 );
192 println!("{}", "-".repeat(60));
193
194 let pricing = [
195 ("Lambda Labs", "1x A100 80GB", "$1.10", "$792"),
196 ("Lambda Labs", "8x A100 80GB", "$8.80", "$6,336"),
197 ("AWS", "1x A100 (p4d)", "$3.67", "$2,642"),
198 ("AWS", "8x A100 (p4d)", "$32.77", "$23,594"),
199 ("Vast.ai", "1x A100 80GB", "$0.80-1.50", "$576-1,080"),
200 ("Vast.ai", "1x RTX 4090", "$0.20-0.40", "$144-288"),
201 ("Google Cloud", "1x A100 (a2)", "$3.67", "$2,642"),
202 ("Azure", "1x A100 (NC)", "$3.67", "$2,642"),
203 ("RunPod", "1x A100 80GB", "$1.64", "$1,181"),
204 ("Hetzner", "1x RTX 3060", "$0.07", "$50"),
205 ];
206
207 for (prov, gpu, hourly, monthly) in &pricing {
208 if provider == "all" || prov.to_lowercase().contains(&provider.to_lowercase()) {
209 println!("{:<20} {:<15} {:>10} {:>12}", prov, gpu, hourly, monthly);
210 }
211 }
212
213 println!();
214 println!("Prices are approximate and subject to change.");
215 println!("Launch: zernel cloud launch --provider lambda --gpus 8");
216 }
217 }
218 Ok(())
219}