fixed connection management

all nodes now properly maintain 3 outbound connections
This commit is contained in:
ghe0 2024-08-20 03:36:19 +03:00
parent efa36737a0
commit 4d00b116f4
Signed by: ghe0
GPG Key ID: 451028EE56A0FBB4
5 changed files with 121 additions and 102 deletions

@ -7,7 +7,7 @@ message NodeUpdate {
string ip = 1;
string keypair = 2;
google.protobuf.Timestamp updated_at = 3;
bool online = 4;
bool public = 4;
}
service Update {

@ -1,8 +1,7 @@
#![allow(dead_code)]
use crate::grpc::challenge::NodeUpdate;
use ed25519_dalek::{Signer, SigningKey, VerifyingKey};
use rand::rngs::OsRng;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::time::Duration;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
@ -13,12 +12,13 @@ use tokio::sync::Mutex;
pub struct NodeInfo {
pub pubkey: VerifyingKey,
pub updated_at: SystemTime,
pub online: bool,
pub public: bool,
}
/// Needs to be surrounded in an Arc.
pub struct Store {
nodes: Mutex<HashMap<IP, NodeInfo>>,
conns: Mutex<HashSet<IP>>,
keys: Mutex<HashMap<VerifyingKey, SigningKey>>,
}
pub enum SigningError {
@ -57,20 +57,32 @@ impl std::fmt::Display for SigningError {
}
impl Store {
// app should exit if any error happens here so unwrap() is good
pub fn init() -> Self {
Self {
nodes: Mutex::new(HashMap::new()),
keys: Mutex::new(HashMap::new()),
conns: Mutex::new(HashSet::new()),
}
}
pub async fn add_conn(&self, ip: &str) {
let mut conns = self.conns.lock().await;
conns.insert(ip.to_string());
}
pub async fn delete_conn(&self, ip: &str) {
let mut conns = self.conns.lock().await;
conns.remove(ip);
}
pub async fn tabled_node_list(&self) -> String {
#[derive(Tabled)]
struct OutputRow {
ip: String,
pubkey: String,
age: u64,
online: bool,
public: bool,
}
let mut output = vec![];
for (ip, node_info) in self.nodes.lock().await.iter() {
@ -80,12 +92,12 @@ impl Store {
.duration_since(node_info.updated_at)
.unwrap_or(Duration::ZERO)
.as_secs();
let online = node_info.online;
let public = node_info.public;
output.push(OutputRow {
ip,
pubkey,
age,
online,
public,
});
}
Table::new(output).to_string()
@ -99,8 +111,7 @@ impl Store {
let key_bytes = hex::decode(pubkey)?;
let pubkey = VerifyingKey::from_bytes(&key_bytes.as_slice().try_into()?)?;
let key_store = self.keys.lock().await;
let signing_key = match { key_store.get(&pubkey) } {
let signing_key = match self.get_privkey(&pubkey).await {
Some(k) => k,
None => return Err(SigningError::KeyNotFound),
};
@ -154,11 +165,11 @@ impl Store {
let node_info = NodeInfo {
pubkey,
updated_at: updated_at_std,
online: node.online,
public: node.public,
};
if let Some(mut old_node_info) = self.update_node(node.ip, node_info.clone()).await {
if !node_info.online {
old_node_info.online = false;
if !node_info.public {
old_node_info.public = false;
}
match old_node_info.ne(&node_info) {
true => {
@ -178,15 +189,10 @@ impl Store {
nodes.insert(ip, info.clone())
}
pub async fn remove_node(&self, ip: &str) {
let mut nodes = self.nodes.lock().await;
nodes.remove(ip);
}
pub async fn get_pubkey(&self, ip: &str) -> Option<NodeInfo> {
let nodes = self.nodes.lock().await;
nodes.get(ip).cloned()
}
// pub async fn remove_node(&self, ip: &str) {
// let mut nodes = self.nodes.lock().await;
// nodes.remove(ip);
// }
pub async fn get_localhost(&self) -> NodeUpdate {
// these unwrap never fail
@ -200,7 +206,7 @@ impl Store {
ip: "localhost".to_string(),
keypair: hex::encode(keypair.as_bytes()),
updated_at: Some(prost_types::Timestamp::from(node_info.updated_at)),
online: false,
public: false,
}
}
@ -212,13 +218,13 @@ impl Store {
let pubkey = keypair_raw.verifying_key();
let ip = "localhost".to_string();
let updated_at = std::time::SystemTime::now();
let online = false;
let public = false;
self.update_node(
ip.clone(),
NodeInfo {
pubkey,
updated_at,
online,
public,
},
)
.await;
@ -228,7 +234,7 @@ impl Store {
ip,
keypair,
updated_at,
online,
public,
}
}
@ -242,42 +248,29 @@ impl Store {
ip: ip.to_string(),
keypair: hex::encode(signing_key.as_bytes()),
updated_at: Some(prost_types::Timestamp::from(node_info.updated_at)),
online: node_info.online,
public: node_info.public,
})
})
.collect()
}
/// you can specify the online argument to get only nodes that are online
pub async fn get_random_nodes(&self, online: bool) -> Vec<String> {
// returns a random node that does not have an active connection
pub async fn get_random_node(&self) -> Option<String> {
use rand::rngs::OsRng;
use rand::RngCore;
let nodes = self.nodes.lock().await;
let conns = self.conns.lock().await;
let len = nodes.len();
if len == 0 {
return Vec::new();
return None;
}
let skip = OsRng.next_u64().try_into().unwrap_or(0) % len;
let mut iter = nodes.iter().cycle().skip(skip);
let mut random_nodes = vec![];
let mut count = 0;
let mut iterations = 0;
while count < 3 && iterations < len {
if let Some((ip, info)) = iter.next() {
if online || info.online {
random_nodes.push(ip.clone());
count -= 1;
}
iterations += 1;
}
}
random_nodes
}
pub async fn set_online(&self, ip: &str, online: bool) {
let mut nodes = self.nodes.lock().await;
if let Some(node) = nodes.get_mut(ip) {
node.online = online;
}
nodes
.keys()
.cycle()
.skip(skip)
.filter(|k| !conns.contains(*k))
.next()
.cloned()
}
}

@ -1,52 +1,65 @@
#![allow(dead_code)]
use super::challenge::NodeUpdate;
use crate::datastore::Store;
use crate::grpc::challenge::update_client::UpdateClient;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::sync::Arc;
use tokio::sync::broadcast::Sender;
use tokio::task::JoinSet;
use tokio::time::{sleep, Duration};
use tokio_stream::wrappers::BroadcastStream;
use tokio_stream::StreamExt;
struct Connection {
ds: Arc<Store>,
tx: Sender<NodeUpdate>,
}
#[derive(Clone)]
struct ConnManager {
pub struct ConnManager {
ds: Arc<Store>,
tx: Sender<NodeUpdate>,
}
impl ConnManager {
fn init(ds: Arc<Store>, tx: Sender<NodeUpdate>) -> Self {
pub fn init(ds: Arc<Store>, tx: Sender<NodeUpdate>) -> Self {
Self { ds, tx }
}
async fn connect(self, node_ip: String) {
pub async fn start_with_node(self, node_ip: String) {
self.connect_wrapper(node_ip).await;
}
pub async fn start(self) {
loop {
if let Some(node) = self.ds.get_random_node().await {
if node != "localhost" {
self.connect_wrapper(node.clone()).await;
}
}
sleep(Duration::from_secs(3)).await;
}
}
async fn connect_wrapper(&self, node_ip: String) {
let ds = self.ds.clone();
ds.add_conn(&node_ip).await;
if let Err(e) = self.connect(node_ip.clone()).await {
println!("Client connection for {node_ip} failed: {e:?}");
}
ds.delete_conn(&node_ip).await;
}
async fn connect(&self, node_ip: String) -> Result<(), Box<dyn std::error::Error>> {
println!("Connecting to {node_ip}...");
let mut client = UpdateClient::connect(format!("http://{node_ip}:31373"))
.await
.unwrap();
let mut client = UpdateClient::connect(format!("http://{node_ip}:31373")).await?;
let rx = self.tx.subscribe();
let rx_stream = BroadcastStream::new(rx).filter_map(|n| n.ok());
let response = client.get_updates(rx_stream).await.unwrap();
let response = client.get_updates(rx_stream).await?;
let mut resp_stream = response.into_inner();
let _ = self.tx.send(self.ds.get_localhost().await);
while let Some(mut update) = resp_stream.message().await.unwrap() {
println!("Received message");
while let Some(mut update) = resp_stream.message().await? {
// "localhost" IPs need to be changed to the real IP of the counterpart
if update.ip == "localhost" {
update.ip = node_ip.clone();
// since we are connecting TO this server, we have a guarantee that this
// server is not behind NAT, so we can set it online
update.online = true;
// server is not behind NAT, so we can set it public
update.public = true;
}
// update the entire network in case the information is new
@ -56,32 +69,23 @@ impl ConnManager {
}
};
}
Ok(())
}
}
// this must panic on failure; app can't start without init nodes
fn load_init_nodes(path: &str) -> Vec<String> {
let input = File::open(path).unwrap();
let buffered = BufReader::new(input);
let mut ips = Vec::new();
for line in buffered.lines() {
ips.push(line.unwrap());
}
ips
}
pub async fn init_connections(ds: Arc<Store>, tx: Sender<NodeUpdate>) {
let mut nodes = load_init_nodes("detee_challenge_nodes");
// we rotate online and offline nodes, to constantly check new nodes
let mut only_online_nodes = true;
loop {
let mut set = JoinSet::new();
for node in nodes {
let conn = ConnManager::init(ds.clone(), tx.clone());
set.spawn(conn.connect(node));
}
while let Some(_) = set.join_next().await {}
nodes = ds.get_random_nodes(only_online_nodes).await;
only_online_nodes = !only_online_nodes;
}
}
// pub async fn init_connections(ds: Arc<Store>, tx: Sender<NodeUpdate>) {
// let mut nodes = load_init_nodes("detee_challenge_nodes");
// // we rotate online and offline nodes, to constantly check new nodes
// let mut only_online_nodes = true;
// loop {
// let mut set = JoinSet::new();
// for node in nodes {
// let conn = ConnManager::init(ds.clone(), tx.clone());
// set.spawn(conn.connect_wrapper(node));
// }
// while let Some(_) = set.join_next().await {}
// nodes = ds.get_random_nodes(only_online_nodes).await;
// only_online_nodes = !only_online_nodes;
// }
// }

@ -75,7 +75,6 @@ impl Update for MyServer {
}
}
Ok(update) = rx.recv() => {
println!("Sending message.");
yield Ok(update);
// disconnect client if too many connections are active
if tx.receiver_count() > 9 {

@ -3,6 +3,8 @@ use tokio::task::JoinSet;
mod grpc;
mod http_server;
use crate::datastore::Store;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::sync::Arc;
use tokio::sync::broadcast;
@ -13,13 +15,34 @@ async fn main() {
ds.reset_localhost_keys().await;
let mut join_set = JoinSet::new();
let mut long_term_tasks = JoinSet::new();
let mut init_tasks = JoinSet::new();
join_set.spawn(http_server::init(ds.clone()));
join_set.spawn(grpc::server::MyServer::init(ds.clone(), tx.clone()).start());
join_set.spawn(grpc::client::init_connections(ds.clone(), tx.clone()));
long_term_tasks.spawn(http_server::init(ds.clone()));
long_term_tasks.spawn(grpc::server::MyServer::init(ds.clone(), tx.clone()).start());
let input = File::open("detee_challenge_nodes").unwrap();
let buffered = BufReader::new(input);
for line in buffered.lines() {
init_tasks.spawn(
grpc::client::ConnManager::init(ds.clone(), tx.clone()).start_with_node(line.unwrap()),
);
}
let mut connection_count = 0;
while init_tasks.join_next().await.is_some() {
if connection_count < 3 {
long_term_tasks.spawn(grpc::client::ConnManager::init(ds.clone(), tx.clone()).start());
connection_count += 1;
}
}
while connection_count < 3 {
long_term_tasks.spawn(grpc::client::ConnManager::init(ds.clone(), tx.clone()).start());
connection_count += 1;
}
// exit no matter which task finished
join_set.join_next().await;
long_term_tasks.join_next().await;
println!("Shutting down...");
}