diff --git a/scripts/start_qemu_vm.sh b/scripts/start_qemu_vm.sh index 3716258..d8d2016 100644 --- a/scripts/start_qemu_vm.sh +++ b/scripts/start_qemu_vm.sh @@ -6,9 +6,8 @@ } source "/etc/detee/daemon/vms/$VM_UUID" -mandatory_vars=("IF_DEVICE" "IF_NAME" "IF_TYPE" "KERNEL" \ - "INITRD" "PARAMS" "CPU_TYPE" "VCPUS" "MEMORY" \ - "MAX_MEMORY" "DISK" "DISK_SIZE") +mandatory_vars=("KERNEL" "INITRD" "PARAMS" "CPU_TYPE" \ + "VCPUS" "MEMORY" "MAX_MEMORY" "DISK" "DISK_SIZE") for var in "${mandatory_vars[@]}"; do if [ -z "${!var}" ]; then echo "Environment variable $var is not set." @@ -16,28 +15,39 @@ for var in "${mandatory_vars[@]}"; do fi done -if [[ "$IF_TYPE" == "macvtap" || "$IF_TYPE" == "ipvtap" ]]; then - ip link add link $IF_DEVICE name $IF_NAME type $IF_TYPE mode bridge - ip link set $IF_NAME up - ip link set $IF_NAME promisc on - vtap_index="$(cat /sys/class/net/${IF_NAME}/ifindex)" - vtap_addr="$(cat /sys/class/net/${IF_NAME}/address)" - qemu_device_params="-netdev tap,id=hostnet1,fd=3 3<>/dev/tap${macvtap_index}" - qemu_device_params+=" -device virtio-net-pci,netdev=hostnet1,mac=${macvtap_addr},romfile=" -fi +interfaces=$(env | grep -oE '^NETWORK_INTERFACE_[0-9]*') +nat_configured="false" +while read -r interface; do -if [[ "$IF_TYPE" == "NAT" ]]; then - ports="" - for port_pair in "$NAT_PORT_FW"; do - host_port="$( echo $port_pair | cut -d ':' -f1 )" - guest_port="$( echo $port_pair | cut -d ':' -f2 )" - ports+=",hostfwd=tcp::${host_port}-:${guest_port}" - done - qemu_device_params="-netdev user,id=vmnic${ports}" - qemu_device_params+=" -device virtio-net-pci,netdev=vmnic,romfile=" -fi + interface_type="$( echo ${!interface} | cut -d '_' -f1 )" -# TODO: also handle bridge device (when IPs are public, but the host is the gateway) + if [[ "$interface_type" == "macvtap" || "$interface_type" == "ipvtap" ]]; then + interface_device="$( echo ${!interface} | cut -d '_' -f2 )" + interface_name="$( echo ${!interface} | cut -d '_' -f3 )" + ip link add link $interface_device name $interface_name type $interface_type mode bridge + ip link set $interface_name up + ip link set $interface_name promisc on + vtap_index="$(cat /sys/class/net/${interface_name}/ifindex)" + vtap_addr="$(cat /sys/class/net/${interface_name}/address)" + qemu_device_params="-netdev tap,id=hostnet1,fd=3 3<>/dev/tap${macvtap_index}" + qemu_device_params+=" -device virtio-net-pci,netdev=hostnet1,mac=${macvtap_addr},romfile=" + fi + + if [[ "$interface_type" == "NAT" && "$nat_configured" == "false" ]]; then + ports="" + nat_configured="true" + for port_pair in "$NAT_PORT_FW"; do + host_port="$( echo $port_pair | cut -d ':' -f1 )" + guest_port="$( echo $port_pair | cut -d ':' -f2 )" + ports+=",hostfwd=tcp::${host_port}-:${guest_port}" + done + qemu_device_params="-netdev user,id=vmnic${ports}" + qemu_device_params+=" -device virtio-net-pci,netdev=vmnic,romfile=" + fi + + # TODO: also handle bridge device (when IPs are public, but the host is the gateway) + +done <<< "$( echo "$interfaces" )" vm_disk="/root/dtrfs/arch-1-ghe0.qcow2" @@ -46,7 +56,7 @@ vm_disk="/root/dtrfs/arch-1-ghe0.qcow2" qemu-system-x86_64 $qemu_device_params \ -enable-kvm -cpu $CPU_TYPE -vga none \ -machine q35,confidential-guest-support=sev0,memory-backend=ram1 \ - -smp $VCPUS,maxcpus=255 -m $MEMORY,slots=5,maxmem=$MAX_MEMORY \ + -smp $VCPUS,maxcpus=$VCPUS -m $MEMORY,slots=5,maxmem=$MAX_MEMORY \ -no-reboot -bios /usr/share/edk2/ovmf/OVMF.amdsev.fd \ -drive file=${DISK},if=none,id=disk0,format=qcow2 \ -device virtio-blk-pci,drive=disk0 \ diff --git a/src/config.rs b/src/config.rs index 6b994e0..2954b2d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -4,24 +4,25 @@ use cidr::Ipv6Cidr; use core::net::Ipv4Addr; use core::net::Ipv6Addr; +use std::ops::Range; + struct Volume { path: String, - // maximum allowed storage in MB - max_storage: u64, + // maximum allowed storage in GB + max_reservation: u64, } struct Interface { - r#type: InterfaceType, + driver: InterfaceType, name: String, ipv4_ranges: Vec, reserved_v4_addrs: Vec, ipv6_ranges: Vec, reserved_v6_addrs: Vec, - // TODO: add bandwidth } +// TODO: create mechanic to autodetect interface type enum InterfaceType { - NAT, MACVTAP, IPVTAP, Bridge, @@ -29,8 +30,9 @@ enum InterfaceType { struct Config { max_cores_per_vm: u64, - max_cpu_reservation: u64, + max_vcpu_reservation: u64, max_mem_reservation: u64, network_interfaces: Vec, volumes: Vec, + public_port_range: Range, } diff --git a/src/constants.rs b/src/constants.rs index ee60b2c..6fced57 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,7 +1,10 @@ #![allow(dead_code)] pub(crate) const DEFAULT_OVMF: &str = "/usr/share/edk2/ovmf/OVMF.amdsev.fd"; -pub(crate) const BOOT_DIR: &str = "/var/lib/libvirt/detee/"; +pub(crate) const VM_BOOT_DIR: &str = "/var/lib/detee/boot/"; +pub(crate) const VM_DISK_DIR: &str = "/var/lib/detee/boot/"; pub(crate) const VM_CONFIG_DIR: &str = "/etc/detee/daemon/vms/"; -pub(crate) const CONFIG_PATH: &str = "/etc/detee/daemon/config.json"; -pub(crate) const START_VM_SH: &str = "/usr/local/bin/detee/start_qemu_vm.sh"; +pub(crate) const DAEMON_CONFIG_PATH: &str = "/etc/detee/daemon/config.json"; +pub(crate) const START_VM_SCRIPT: &str = "/usr/local/bin/detee/start_qemu_vm.sh"; +// TODO: research if other CPU types provide better performance +pub(crate) const QEMU_VM_CPU_TYPE: &str = "EPYC-v4"; diff --git a/src/main.rs b/src/main.rs index 07488b2..04751c3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod config; mod state; mod constants; +mod tcontract; fn main() { println!("Hello, world!"); diff --git a/src/state.rs b/src/state.rs index 4a649eb..2de7ed2 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,11 +1,20 @@ #![allow(dead_code)] -use std::fs::remove_file; use crate::constants::*; +use anyhow::anyhow; use anyhow::Result; +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs::remove_file; use std::fs::File; use std::io::Write; +use std::ops::Range; +use std::process::Command; -enum NIC { +type VMUUID = String; + +pub enum NIC { + // TODO: instead of QEMU userspace NAT, use iptables kernelspace NAT + // in case of QEMU-base NAT, device name is not needed NAT { device: String }, // TODO: figure how to calculate IF_NAME based on index MACVTAP { name: String, device: String }, @@ -13,6 +22,58 @@ enum NIC { Bridge { device: String }, } +#[derive(PartialEq)] +enum IPStatus { + Available, + Reserved(VMUUID), + Blacklisted, +} + +struct IPData { + interface: NIC, + status: IPStatus, +} + +pub struct StoragePool { + path: String, + max_reservation: u64, + current_reservation: u64, + // add mechanic to detect storage tier + // tier: StorageTier, +} + +pub struct PortPool { + port_range: Range, + used_ports: HashSet, +} + +pub struct Resources { + // QEMU does not support MHz limiation + mac_vcpus_reservation: u64, + available_vcpus_reservation: u64, + total_memory_reservation: u64, + available_memory_reservation: u64, + // will be only one StoragePool for now and multiple later + storage_pools: Vec, + ipv4_pool: HashMap, + ipv6_pool: HashMap, +} + +impl Resources { + fn get_available_ipv4(&self) -> usize { + self.ipv4_pool + .values() + .filter(|ip_data| ip_data.status == IPStatus::Available) + .count() + } + fn get_available_ipv6(&self) -> usize { + self.ipv6_pool + .values() + .filter(|ip_data| ip_data.status == IPStatus::Available) + .count() + } +} + impl NIC { fn if_type(&self) -> String { match self { @@ -48,22 +109,25 @@ impl NIC { } } -struct VM { - uuid: String, - hostname: String, - ip: String, +struct IPConfig { + address: String, // requires short format (example: 24) subnet: String, gateway: String, nameserver: String, - admin_key: String, - // TODO: add support for multiple NICs nic: NIC, - cpu_type: String, +} + +pub struct VM { + uuid: VMUUID, + hostname: String, + admin_key: String, + ips: Vec, + // currently hardcoded to EPYC-v4 + // cpu_type: String, vcpus: u32, // memory in MB memory: u32, - disk_absolute_path: String, // disk size in GB disk_size: u32, kernel_path: String, @@ -72,11 +136,22 @@ struct VM { } impl VM { + // For the MVP, the T-Contract offers VM+IP+Disk as a bundle. + // This means we can enforce the path to the disk. + // This may change in the future as the VM is allowed to have multiple disks. + pub fn disk_path(&self) -> String { + VM_DISK_DIR.to_string() + "/" + &self.uuid + ".qcow2" + } pub fn kernel_params(&self) -> String { - let ip_string = format!( - "detee_net={}_{}_{}_{}", - self.ip, self.subnet, self.gateway, self.nameserver - ); + let mut ip_string = String::new(); + let mut i = 0; + for ip in self.ips.iter() { + ip_string += &format!( + "detee_net_eth{}={}_{}_{}_{}", + i, ip.address, ip.subnet, ip.gateway, ip.nameserver + ); + i += 1; + } let admin_key = format!("detee_admin={}", self.admin_key); let hostname = format!("detee_name={}", self.hostname); format!("{} {} {}", ip_string, admin_key, hostname) @@ -85,24 +160,52 @@ impl VM { pub fn export_vm_env(&self) -> String { let mut vars = String::new(); - vars += &format!("IF_DEVICE={}\n", self.nic.device_name()); - if let Some(vtap_name) = self.nic.vtap_name() { - vars += &format!("IF_NAME={}\n", vtap_name); + let mut i = 0; + for ip in self.ips.iter() { + let mut interface = String::new(); + interface += &format!("NETWORK_INTERFACE_{}={}", i, ip.nic.if_type()); + // device is currently ignored in case of NAT cause we assume QEMU userspace NAT + if let Some(vtap_name) = ip.nic.vtap_name() { + interface += &format!("_{}_{}", ip.nic.device_name(), vtap_name); + } + vars += &format!("{}\n", interface); + i += 1; } - vars += &format!("IF_TYPE={}\n", self.nic.if_type()); + vars += &format!("KERNEL={}\n", self.kernel_path); vars += &format!("INITRD={}\n", self.initrd_path); vars += &format!("PARAMS={}\n", self.kernel_params()); - vars += &format!("CPU_TYPE={}\n", self.cpu_type); + vars += &format!("CPU_TYPE={}\n", QEMU_VM_CPU_TYPE); vars += &format!("VCPUS={}\n", self.vcpus); vars += &format!("MEMORY={}MB\n", self.memory); vars += &format!("MAX_MEMORY={}MB\n", self.memory + 256); - vars += &format!("DISK={}\n", self.disk_absolute_path); + vars += &format!("DISK={}\n", self.disk_path()); vars += &format!("DISK_SIZE={}GB\n", self.disk_size); todo!(); } + pub fn delete_vtap_interfaces(&self) -> Result<()> { + for ip in self.ips.iter() { + if let Some(name) = ip.nic.vtap_name() { + let result = Command::new("ip") + .arg("link") + .arg("del") + .arg(&name) + .output()?; + if !result.status.success() { + return Err(anyhow!( + "Could not delete vtap interface {:?}:\n{:?}\n{:?}", + name, + result.stdout, + result.stderr + )); + } + } + } + Ok(()) + } + pub fn write_systemd_unit_file(&self) -> Result<()> { let mut contents = String::new(); contents += &format!("[Unit]"); @@ -112,7 +215,7 @@ impl VM { contents += &format!("[Service]"); contents += &format!("Type=simple"); contents += &format!("Environment=VM_UUID={}", self.uuid); - contents += &format!("ExecStart={}", START_VM_SH); + contents += &format!("ExecStart={}", START_VM_SCRIPT); contents += &format!("ExecStop=/bin/kill -s SIGINT $MAINPID"); contents += &format!("Restart=always"); contents += &format!(""); @@ -128,4 +231,26 @@ impl VM { remove_file(VM_CONFIG_DIR.to_string() + "/" + &self.uuid)?; Ok(()) } + + pub fn create_disk(&self) -> Result<()> { + let result = Command::new("qemu-img") + .arg("create") + .arg("-f") + .arg(self.disk_path()) + .arg(self.disk_size.to_string() + "G") + .output()?; + if !result.status.success() { + return Err(anyhow!( + "Could not create VM Disk:\n{:?}\n{:?}", + result.stdout, + result.stderr + )); + } + Ok(()) + } + + pub fn delete_disk(&self) -> Result<()> { + remove_file(self.disk_path())?; + Ok(()) + } } diff --git a/src/tcontract.rs b/src/tcontract.rs new file mode 100644 index 0000000..0424677 --- /dev/null +++ b/src/tcontract.rs @@ -0,0 +1,20 @@ +#![allow(dead_code)] +// this is defined in the engine but we will mock it here for now + +pub struct FinalizedTContract { + pub owner: String, + pub user: String, + pub alloc: ResourceAllocation, +} + +#[derive(Default)] +pub struct ResourceAllocation { + pub vcpus: usize, + pub memory: usize, + pub storage: usize, + pub published_ports: Vec, + // storage tier: not part of MVP + // pub storage_tier: usize, + pub public_ipv4: Option, + pub public_ipv6: Option, +}