diff --git a/Cargo.lock b/Cargo.lock index 292fe15..557fbe0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -396,7 +396,7 @@ dependencies = [ [[package]] name = "detee-shared" version = "0.1.0" -source = "git+ssh://git@gitea.detee.cloud/testnet/proto?branch=surreal_brain#d6ca058d2de78b5257517034bca2b2c7d5929db8" +source = "git+ssh://git@gitea.detee.cloud/testnet/proto?branch=credits-v2#f344c171c5a8d7ae8cad1628396e6b3a1af0f1ba" dependencies = [ "bincode", "prost", diff --git a/Cargo.toml b/Cargo.toml index 13c3a1d..3360d27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ bs58 = "0.5.1" chrono = "0.4.39" # TODO: switch this back to main after the upgrade -detee-shared = { git = "ssh://git@gitea.detee.cloud/testnet/proto", branch = "surreal_brain" } +detee-shared = { git = "ssh://git@gitea.detee.cloud/testnet/proto", branch = "credits-v2" } # detee-shared = { path = "../detee-shared" } [build-dependencies] diff --git a/src/config.rs b/src/config.rs index 1804e6f..f7e1a9e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -10,7 +10,7 @@ use std::{ #[derive(Deserialize, Debug, Clone)] pub struct Volume { pub path: String, - pub max_reservation_gb: usize, + pub max_reservation_mib: usize, } #[derive(Deserialize, Debug)] @@ -48,9 +48,8 @@ pub enum InterfaceType { pub struct Config { pub owner_wallet: String, pub network: String, - pub max_cores_per_vm: usize, pub max_vcpu_reservation: usize, - pub max_mem_reservation_mb: usize, + pub max_mem_reservation_mib: usize, pub network_interfaces: Vec, pub volumes: Vec, #[serde(with = "range_format")] diff --git a/src/global.rs b/src/global.rs index ea270aa..1101482 100644 --- a/src/global.rs +++ b/src/global.rs @@ -9,14 +9,16 @@ use sha2::{Digest, Sha256}; use std::{ fs::File, io::{Read, Write}, - sync::LazyLock, }; pub(crate) const DETEE_ROOT_CA: &str = "/etc/detee/root_ca.pem"; -pub(crate) const BRAIN_STAGING_URLS: [&str; 3] = - ["https://156.146.63.216:31337", "https://156.146.63.216:31337", "https://156.146.63.216:31337"]; +pub(crate) const BRAIN_STAGING_URLS: [&str; 3] = [ + "https://156.146.63.216:31337", + "https://156.146.63.216:31337", + "https://156.146.63.216:31337", +]; pub(crate) const BRAIN_TESTING_URLS: [&str; 3] = - ["https://184.107.169.199:45223", "https://149.22.95.1:44522", "https://149.36.48.99:48638"]; + ["https://156.146.63.218:31337", "https://173.234.17.2:8050", "https://156.146.63.218:31337"]; pub(crate) const VM_BOOT_DIR: &str = "/var/lib/detee/boot/"; pub(crate) const USED_RESOURCES: &str = "/etc/detee/daemon/used_resources.yaml"; pub(crate) const VM_CONFIG_DIR: &str = "/etc/detee/daemon/vms/"; @@ -31,13 +33,13 @@ pub(crate) const OVMF_HASH: &str = pub(crate) const OVMF_URL: &str = "https://drive.google.com/uc?export=download&id=1V-vLkaiLaGmFSjrN84Z6nELQOxKNAoSJ"; -pub static BRAIN_STAGING: LazyLock<(&str, &str)> = LazyLock::new(|| { +pub fn brain_staging() -> (&'static str, &'static str) { (BRAIN_STAGING_URLS[rand::thread_rng().gen_range(0..BRAIN_STAGING_URLS.len())], "staging-brain") -}); +} -pub static BRAIN_TESTING: LazyLock<(&str, &str)> = LazyLock::new(|| { +pub fn brain_testing() -> (&'static str, &'static str) { (BRAIN_TESTING_URLS[rand::thread_rng().gen_range(0..BRAIN_TESTING_URLS.len())], "testnet-brain") -}); +} lazy_static! { pub static ref PUBLIC_KEY: String = get_public_key(); diff --git a/src/grpc.rs b/src/grpc.rs index 5718cdb..fe0a1d5 100644 --- a/src/grpc.rs +++ b/src/grpc.rs @@ -18,8 +18,8 @@ pub mod snp_proto { async fn client(network: &str) -> Result> { let (brain_url, brain_san) = match network { - "staging" => *BRAIN_STAGING, - "testnet" => *BRAIN_TESTING, + "staging" => brain_staging(), + "testnet" => brain_testing(), _ => { return Err(anyhow::anyhow!( "The only networks currently supported are staging and testnet." diff --git a/src/main.rs b/src/main.rs index 8a243c8..cd908f9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -59,29 +59,62 @@ impl VMHandler { ) } - async fn send_node_resources(&mut self) { - let (avail_ipv4, avail_ipv6) = self.get_available_ips(); - let mut total_gb_available = 0; + // returns storage available per VM and total storage available + fn storage_available(&self) -> (usize, usize) { + let mut total_storage_available = 0_usize; + let mut avail_storage_mib = 0_usize; for volume in self.config.volumes.iter() { let reservation: usize = match self.res.reserved_storage.get(&volume.path) { Some(reserved) => *reserved, None => 0 as usize, }; - let volume_gb_available = volume.max_reservation_gb - reservation; - if total_gb_available < volume_gb_available { - total_gb_available = volume_gb_available; + let volume_mib_available = volume.max_reservation_mib.saturating_sub(reservation); + total_storage_available += volume_mib_available; + if avail_storage_mib < volume_mib_available { + avail_storage_mib = volume_mib_available; } } - let avail_storage_gb = total_gb_available as u32; + + (avail_storage_mib, total_storage_available) + } + + /// returns Memory per vCPU and Disk per vCPU ratio + fn slot_ratios(&self) -> (usize, usize) { + let (_, total_storage_mib) = self.storage_available(); + let available_cpus: usize = + self.config.max_vcpu_reservation.saturating_sub(self.res.reserved_vcpus); + let available_mem: usize = + self.config.max_mem_reservation_mib.saturating_sub(self.res.reserved_memory_mib); + + let memory_per_cpu = available_mem / available_cpus; + let disk_per_cpu = total_storage_mib / available_cpus; + + (memory_per_cpu, disk_per_cpu) + } + + async fn send_node_resources(&mut self) { + let (avail_ipv4, avail_ipv6) = self.get_available_ips(); + + let (avail_storage_mib, total_storage_available) = self.storage_available(); + let avail_vcpus = self.config.max_vcpu_reservation.saturating_sub(self.res.reserved_vcpus); + let avail_memory_mib = + self.config.max_mem_reservation_mib.saturating_sub(self.res.reserved_memory_mib); + + // If storage is separated into multiple volumes, that limits the maxium VM size. + // Due to this, we have to limit the maximum amount of vCPUs and Memory per VM, based on + // the maximum possible disk size per VM. + let avail_vcpus = avail_vcpus * avail_storage_mib / total_storage_available; + let avail_memory_mib = avail_memory_mib * avail_storage_mib / total_storage_available; + let res = snp_proto::VmNodeResources { node_pubkey: PUBLIC_KEY.clone(), avail_ports: (self.config.public_port_range.len() - self.res.reserved_ports.len()) as u32, avail_ipv4, avail_ipv6, - avail_vcpus: (self.config.max_vcpu_reservation - self.res.reserved_vcpus) as u32, - avail_memory_mb: (self.config.max_mem_reservation_mb - self.res.reserved_memory) as u32, - avail_storage_gb, + avail_vcpus: avail_vcpus as u32, + avail_memory_mib: avail_memory_mib as u32, + avail_storage_mib: avail_storage_mib as u32, max_ports_per_vm: self.config.max_ports_per_vm as u32, }; debug!("sending node resources on brain: {res:?}"); @@ -89,6 +122,30 @@ impl VMHandler { } async fn handle_new_vm_req(&mut self, new_vm_req: snp_proto::NewVmReq) { + // Currently the daemon allows a deviation of 10% for newly created VMs, so that the + // process doesn't get interrupted by small bugs caused by human error in coding. + // Over time, we will probably allow deviation based on server utilization, however that + // needs to be implmeneted for both VM creation and VM upgrade. + let (memory_per_cpu, disk_per_cpu) = self.slot_ratios(); + let vm_memory_per_cpu = new_vm_req.memory_mib / new_vm_req.vcpus; + let vm_disk_per_cpu = new_vm_req.disk_size_mib / new_vm_req.vcpus; + if !within_10_percent(memory_per_cpu, vm_memory_per_cpu as usize) + || !within_10_percent(disk_per_cpu, vm_disk_per_cpu as usize) + { + warn!("Refusing to create vm due to unbalanced resources: {new_vm_req:?}"); + let _ = self + .sender + .send( + snp_proto::NewVmResp { + uuid: new_vm_req.uuid, + error: format!("Unbalanced hardware resources."), + ..Default::default() + } + .into(), + ) + .await; + return; + }; debug!("Processing new vm request: {new_vm_req:?}"); let uuid = new_vm_req.uuid.clone(); match state::VM::new(new_vm_req.into(), &self.config, &mut self.res) { @@ -299,3 +356,9 @@ fn download_and_replace_binary() -> Result<()> { } Ok(()) } + +fn within_10_percent(a: usize, b: usize) -> bool { + let diff = a.abs_diff(b); // u32 + let reference = a.max(b); // the larger of the two + diff * 10 <= reference +} diff --git a/src/state.rs b/src/state.rs index 05d0921..6ed0439 100644 --- a/src/state.rs +++ b/src/state.rs @@ -19,7 +19,7 @@ pub struct Resources { pub existing_vms: HashSet, // QEMU does not support MHz limiation pub reserved_vcpus: usize, - pub reserved_memory: usize, + pub reserved_memory_mib: usize, pub reserved_ports: HashSet, pub reserved_storage: HashMap, pub reserved_ipv4: HashSet, @@ -49,16 +49,16 @@ impl Resources { let vm: VM = serde_yaml::from_str(&content)?; res.existing_vms.insert(vm.uuid); res.reserved_vcpus = res.reserved_vcpus.saturating_add(vm.vcpus); - res.reserved_memory = res.reserved_memory.saturating_add(vm.memory_mb); + res.reserved_memory_mib = res.reserved_memory_mib.saturating_add(vm.memory_mib); for (port, _) in vm.fw_ports.iter() { res.reserved_ports.insert(*port); } res.reserved_storage .entry(vm.storage_dir.clone()) .and_modify(|gb| { - *gb = gb.saturating_add(vm.disk_size_gb); + *gb = gb.saturating_add(vm.disk_size_mib); }) - .or_insert(vm.disk_size_gb); + .or_insert(vm.disk_size_mib); for nic in vm.nics { for ip in nic.ips { if let Ok(ip_address) = ip.address.parse::() { @@ -89,13 +89,13 @@ impl Resources { storage_pools.push(StoragePool { path: config_vol.path.clone(), // TODO: check if the storage is actualy available at that path - available_gb: config_vol.max_reservation_gb, + available_gb: config_vol.max_reservation_mib, }); } let mut res = Resources { existing_vms: HashSet::new(), reserved_vcpus: 0, - reserved_memory: 0, + reserved_memory_mib: 0, reserved_ports: HashSet::new(), reserved_storage: HashMap::new(), reserved_ipv4: HashSet::new(), @@ -112,12 +112,12 @@ impl Resources { let mut volumes = config.volumes.clone(); for volume in volumes.iter_mut() { if let Some(reservation) = self.reserved_storage.get(&volume.path) { - volume.max_reservation_gb = volume.max_reservation_gb.saturating_sub(*reservation); + volume.max_reservation_mib = volume.max_reservation_mib.saturating_sub(*reservation); } } - volumes.sort_by_key(|v| v.max_reservation_gb); + volumes.sort_by_key(|v| v.max_reservation_mib); if let Some(biggest_volume) = volumes.last() { - if biggest_volume.max_reservation_gb >= required_gb { + if biggest_volume.max_reservation_mib >= required_gb { return Some(biggest_volume.path.clone()); } } @@ -264,7 +264,7 @@ impl Resources { fn reserve_vm_resources(&mut self, vm: &VM) { self.existing_vms.insert(vm.uuid.clone()); self.reserved_vcpus += vm.vcpus; - self.reserved_memory += vm.memory_mb; + self.reserved_memory_mib += vm.memory_mib; for nic in vm.nics.iter() { if let Some(vtap) = nic.if_config.vtap_name() { self.reserved_if_names.insert(vtap); @@ -286,8 +286,8 @@ impl Resources { self.reserved_storage .entry(vm.storage_dir.clone()) - .and_modify(|gb| *gb = gb.saturating_add(vm.disk_size_gb)) - .or_insert(vm.disk_size_gb); + .and_modify(|gb| *gb = gb.saturating_add(vm.disk_size_mib)) + .or_insert(vm.disk_size_mib); let _ = self.save_to_disk(); } @@ -296,7 +296,7 @@ impl Resources { return; } self.reserved_vcpus = self.reserved_vcpus.saturating_sub(vm.vcpus); - self.reserved_memory = self.reserved_memory.saturating_sub(vm.memory_mb); + self.reserved_memory_mib = self.reserved_memory_mib.saturating_sub(vm.memory_mib); for nic in vm.nics.iter() { if let Some(vtap) = nic.if_config.vtap_name() { self.reserved_if_names.remove(&vtap); @@ -317,7 +317,7 @@ impl Resources { } self.reserved_storage .entry(vm.storage_dir.clone()) - .and_modify(|gb| *gb = gb.saturating_sub(vm.disk_size_gb)); + .and_modify(|gb| *gb = gb.saturating_sub(vm.disk_size_mib)); if let Err(e) = self.save_to_disk() { log::error!("Could not save resources to disk: {e}"); } @@ -400,8 +400,8 @@ pub struct VM { // currently hardcoded to EPYC-v4 // cpu_type: String, vcpus: usize, - memory_mb: usize, - disk_size_gb: usize, + memory_mib: usize, + disk_size_mib: usize, kernel_sha: String, dtrfs_sha: String, storage_dir: String, @@ -465,9 +465,9 @@ pub struct NewVMRequest { extra_ports: Vec, public_ipv4: bool, public_ipv6: bool, - disk_size_gb: usize, + disk_size_mib: usize, vcpus: usize, - memory_mb: usize, + memory_mib: usize, kernel_url: String, kernel_sha: String, dtrfs_url: String, @@ -483,9 +483,9 @@ impl From for NewVMRequest { extra_ports: req.extra_ports.iter().map(|&port| port as u16).collect(), public_ipv4: req.public_ipv4, public_ipv6: req.public_ipv6, - disk_size_gb: req.disk_size_gb as usize, + disk_size_mib: req.disk_size_mib as usize, vcpus: req.vcpus as usize, - memory_mb: req.memory_mb as usize, + memory_mib: req.memory_mib as usize, kernel_url: req.kernel_url, kernel_sha: req.kernel_sha, dtrfs_url: req.dtrfs_url, @@ -499,8 +499,8 @@ impl From for NewVMRequest { pub struct UpdateVMReq { pub uuid: String, vcpus: usize, - memory_mb: usize, - disk_size_gb: usize, + memory_mib: usize, + disk_size_mib: usize, // we are not using Option, as these will be passed from gRPC kernel_url: String, kernel_sha: String, @@ -513,8 +513,8 @@ impl From for UpdateVMReq { Self { uuid: req.uuid, vcpus: req.vcpus as usize, - memory_mb: req.memory_mb as usize, - disk_size_gb: req.disk_size_gb as usize, + memory_mib: req.memory_mib as usize, + disk_size_mib: req.disk_size_mib as usize, kernel_url: req.kernel_url, kernel_sha: req.kernel_sha, dtrfs_url: req.dtrfs_url, @@ -528,7 +528,6 @@ pub enum VMCreationErrors { PriceIsTooLow, VMAlreadyExists(VM), NATandIPv4Conflict, - TooManyCores, NotEnoughPorts, NotEnoughCPU, NotEnoughMemory, @@ -536,6 +535,7 @@ pub enum VMCreationErrors { IPv4NotAvailable, IPv6NotAvailable, DiskTooSmall, + DowngradeNotSupported, ServerDiskError(String), BootFileError(String), HypervizorError(String), @@ -560,16 +560,13 @@ impl VM { if req.extra_ports.len() > 0 && req.public_ipv4 { return Err(VMCreationErrors::NATandIPv4Conflict); } - if config.max_cores_per_vm < req.vcpus { - return Err(VMCreationErrors::TooManyCores); - } if config.max_vcpu_reservation < res.reserved_vcpus.saturating_add(req.vcpus) { return Err(VMCreationErrors::NotEnoughCPU); } - if config.max_mem_reservation_mb < res.reserved_memory.saturating_add(req.memory_mb) { + if config.max_mem_reservation_mib < res.reserved_memory_mib.saturating_add(req.memory_mib) { return Err(VMCreationErrors::NotEnoughMemory); } - if req.disk_size_gb < 4 { + if req.disk_size_mib < 4 { return Err(VMCreationErrors::DiskTooSmall); } @@ -632,7 +629,7 @@ impl VM { } } - let storage_pool_path = match res.available_storage_pool(req.disk_size_gb, config) { + let storage_pool_path = match res.available_storage_pool(req.disk_size_mib, config) { Some(path) => path, None => return Err(VMCreationErrors::NotEnoughStorage), }; @@ -642,8 +639,8 @@ impl VM { admin_key: req.admin_key, nics: vm_nics, vcpus: req.vcpus, - memory_mb: req.memory_mb, - disk_size_gb: req.disk_size_gb, + memory_mib: req.memory_mib, + disk_size_mib: req.disk_size_mib, kernel_sha: req.kernel_sha, dtrfs_sha: req.dtrfs_sha, fw_ports: port_pairs, @@ -663,8 +660,12 @@ impl VM { config: &Config, res: &mut Resources, ) -> Result<(), VMCreationErrors> { - if req.vcpus > 0 && config.max_cores_per_vm < req.vcpus { - return Err(VMCreationErrors::TooManyCores); + if req.vcpus < self.vcpus { + // Downgrade will be upported only after we implement deviation for VMs. + // (Deviation from the slot size allows management of VMs with unbalanced resources, + // without fully saturating a node. We are disabling downgrades to avoid complexity at + // this stage of the product) + return Err(VMCreationErrors::DowngradeNotSupported); } if req.vcpus > 0 && config.max_vcpu_reservation @@ -672,13 +673,13 @@ impl VM { { return Err(VMCreationErrors::NotEnoughCPU); } - if req.memory_mb > 0 - && config.max_mem_reservation_mb - < res.reserved_memory.saturating_sub(self.memory_mb).saturating_add(req.memory_mb) + if req.memory_mib > 0 + && config.max_mem_reservation_mib + < res.reserved_memory_mib.saturating_sub(self.memory_mib).saturating_add(req.memory_mib) { return Err(VMCreationErrors::NotEnoughMemory); } - if req.disk_size_gb > 0 && req.disk_size_gb < self.disk_size_gb { + if req.disk_size_mib > 0 && req.disk_size_mib < self.disk_size_mib { return Err(VMCreationErrors::DiskTooSmall); } @@ -710,24 +711,24 @@ impl VM { } // Update the resources - res.reserved_memory = res.reserved_memory.saturating_add(req.memory_mb); - res.reserved_memory = res.reserved_memory.saturating_sub(self.memory_mb); + res.reserved_memory_mib = res.reserved_memory_mib.saturating_add(req.memory_mib); + res.reserved_memory_mib = res.reserved_memory_mib.saturating_sub(self.memory_mib); res.reserved_vcpus = res.reserved_vcpus.saturating_add(req.vcpus); res.reserved_vcpus = res.reserved_vcpus.saturating_sub(self.vcpus); res.reserved_storage.entry(self.storage_dir.clone()).and_modify(|gb| { - *gb = gb.saturating_add(req.disk_size_gb); - *gb = gb.saturating_sub(self.disk_size_gb); + *gb = gb.saturating_add(req.disk_size_mib); + *gb = gb.saturating_sub(self.disk_size_mib); }); let _ = res.save_to_disk(); - if req.memory_mb != 0 { - self.memory_mb = req.memory_mb; + if req.memory_mib != 0 { + self.memory_mib = req.memory_mib; } if req.vcpus != 0 { self.vcpus = req.vcpus; } - if req.disk_size_gb != 0 { - self.disk_size_gb = req.disk_size_gb; + if req.disk_size_mib != 0 { + self.disk_size_mib = req.disk_size_mib; } if let Err(e) = systemctl_stop_and_disable(&self.uuid) { @@ -854,9 +855,9 @@ impl VM { vars += "\n"; vars += &format!(r#"export VCPUS="{}""#, self.vcpus); vars += "\n"; - vars += &format!(r#"export MEMORY="{}M""#, (self.memory_mb / 2 * 2)); + vars += &format!(r#"export MEMORY="{}M""#, (self.memory_mib / 2 * 2)); vars += "\n"; - vars += &format!(r#"export MAX_MEMORY="{}M""#, (self.memory_mb / 2 * 2) + 256); + vars += &format!(r#"export MAX_MEMORY="{}M""#, (self.memory_mib / 2 * 2) + 256); vars += "\n"; vars += &format!(r#"export DISK="{}""#, self.disk_path()); vars += "\n"; @@ -925,7 +926,7 @@ impl VM { .arg("-f") .arg("qcow2") .arg(self.disk_path()) - .arg(self.disk_size_gb.to_string() + "G") + .arg(self.disk_size_mib.to_string() + "M") .output()?; if !result.status.success() { return Err(anyhow!( @@ -943,7 +944,7 @@ impl VM { let result = Command::new("qemu-img") .arg("resize") .arg(self.disk_path()) - .arg(self.disk_size_gb.to_string() + "G") + .arg(self.disk_size_mib.to_string() + "M") .output()?; if !result.status.success() { return Err(anyhow!(