[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: ipropd slave



On Mon, Jun 16, 2003 at 05:26:41PM +0200, Johan Danielsson wrote:
> Ulrich Weber <uw@imos.net> writes:
> 
> > Wouldn't it be possible to recheck every minute or so if ipropd-master
> > is up again instead of just quitting?
> 
> Yes.

here's a patch against 0.6 that tries to work around connection failures
and broken connections but that hasn't had enough real use for me to say
that i trust it. i'm not sure it's portable and/or not just plain hein-
ously broken in other ways, either

(sorry not to put the patch up on a web page, but i don't have one right
now)

take care, all

--buck

--- ipropd_slave.c~	Mon Oct 21 11:51:44 2002
+++ ipropd_slave.c	Mon Jun 16 15:00:54 2003
@@ -35,30 +35,78 @@
 
 RCSID("$Id: ipropd_slave.c,v 1.27 2002/10/21 15:51:44 joda Exp $");
 
+#define APP "ipropd-slave"
+
 static krb5_log_facility *log_facility;
 
 static int
 connect_to_master (krb5_context context, const char *master)
 {
-    int fd;
+    static time_t reconnect_min;
+    static time_t backoff;
+    static time_t reconnect_max;
+    time_t reconnect;
     struct sockaddr_in addr;
-    struct hostent *he;
 
-    fd = socket (AF_INET, SOCK_STREAM, 0);
-    if (fd < 0)
-	krb5_err (context, 1, errno, "socket AF_INET");
     memset (&addr, 0, sizeof(addr));
     addr.sin_family = AF_INET;
     addr.sin_port   = krb5_getportbyname (context,
 					  IPROP_SERVICE, "tcp", IPROP_PORT);
+    if (!reconnect_min) {
+	krb5_appdefault_time(context, APP, NULL, "reconnect",
+			     (time_t) 10, &reconnect_min);
+	krb5_appdefault_time(context, APP, NULL, "backoff",
+			     (time_t) 10, &backoff);
+	krb5_appdefault_time(context, APP, NULL, "max",
+			     (time_t) 300, &reconnect_max);
+    }
+    reconnect = 0;
+    for (;;) {
+	static time_t before = (time_t) 0;
+	time_t now = time(NULL);
+	time_t elapsed = now - before;
+	struct hostent *he;
+    	int fd;
+	if (elapsed < reconnect) {
+	    struct timeval timeout;
+	    timeout.tv_sec = reconnect - elapsed;
+	    timeout.tv_usec = 0;
+	    krb5_warnx (context, "sleeping %lu seconds", timeout.tv_sec);
+	    if (-1 == select(0, NULL, NULL, NULL, &timeout)) {
+		if (errno != EINTR) {
+		    krb5_err(context, 1, errno, "select");
+		}
+	    }
+	}
+	before = now;
+    	fd = socket (AF_INET, SOCK_STREAM, 0);
+	if (fd < 0)
+	    krb5_err (context, 1, errno, "socket AF_INET");
     he = roken_gethostbyname (master);
     if (he == NULL)
-	krb5_errx (context, 1, "gethostbyname: %s", hstrerror(h_errno));
+	    krb5_warnx (context, "gethostbyname: %s", hstrerror(h_errno));
+	else {
     memcpy (&addr.sin_addr, he->h_addr, sizeof(addr.sin_addr));
-    if(connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
-	krb5_err (context, 1, errno, "connect");
+	    krb5_warnx (context, "connecting to master %s", master);
+	    if(connect(fd, (struct sockaddr *)&addr, sizeof(addr)) >= 0) {
+		krb5_warnx (context, "connected");
     return fd;
 }
+	    krb5_warn (context, errno, "connect");
+	    close(fd);
+	}
+	if (reconnect == 0) {
+	    reconnect = reconnect_min;
+	}
+	else if (reconnect < reconnect_max) {
+	    reconnect += backoff;
+	    if (reconnect > reconnect_max)
+		reconnect = reconnect_max;
+	}
+    }
+    /* not reached */
+    return -1;
+}
 
 static void
 get_creds(krb5_context context, const char *keytab_str,
@@ -111,7 +159,7 @@
     if(ret) krb5_err(context, 1, ret, "krb5_cc_store_cred");
 }
 
-static void
+static int
 ihave (krb5_context context, krb5_auth_context auth_context,
        int fd, u_int32_t version)
 {
@@ -119,6 +167,7 @@
     u_char buf[8];
     krb5_storage *sp;
     krb5_data data, priv_data;
+    int disconnected = FALSE;
 
     sp = krb5_storage_from_mem (buf, 8);
     krb5_store_int32 (sp, I_HAVE);
@@ -132,10 +181,18 @@
 	krb5_err (context, 1, ret, "krb_mk_priv");
 
     ret = krb5_write_message (context, &fd, &priv_data);
-    if (ret)
+    if (ret) {
+	if (errno == EPIPE) {
+	    disconnected = TRUE;
+	    krb5_warn (context, ret, "krb5_write_message");
+	}
+	else {
 	krb5_err (context, 1, ret, "krb5_write_message");
+	}
+    }
 
     krb5_data_free (&priv_data);
+    return disconnected;
 }
 
 static void
@@ -218,7 +275,7 @@
 	krb5_err (context, 1, ret, "db->close");
 }
 
-static void
+static int
 receive_everything (krb5_context context, int fd,
 		    kadm5_server_context *server_context,
 		    krb5_auth_context auth_context)
@@ -228,6 +285,7 @@
     int32_t vno;
     int32_t opcode;
     unsigned long tmp;
+    int disconnected = FALSE;
 
     char *dbname;
     HDB *mydb;
@@ -254,9 +312,16 @@
 	krb5_storage *sp;
 
 	ret = krb5_read_priv_message(context, auth_context, &fd, &data);
-
-	if (ret)
+	if (ret) {
+	    if (ret == HEIM_ERR_EOF) {
+		krb5_warn (context, ret, "krb5_read_priv_message");
+		disconnected = TRUE;
+		goto cleanup;
+	    }
+	    else {
 	    krb5_err (context, 1, ret, "krb5_read_priv_message");
+	    }
+	}
 
 	sp = krb5_storage_from_data (&data);
 	krb5_ret_int32 (sp, &opcode);
@@ -299,17 +364,21 @@
     if (ret)
 	krb5_err (context, 1, ret, "kadm5_log_nop");
 
+cleanup:
     krb5_data_free (&data);
 
     ret = mydb->close (context, mydb);
     if (ret)
 	krb5_err (context, 1, ret, "db->close");
+    if (!disconnected) {
     ret = mydb->rename (context, mydb, server_context->db->name);
     if (ret)
 	krb5_err (context, 1, ret, "db->rename");
+    }
     ret = mydb->destroy (context, mydb);
     if (ret)
 	krb5_err (context, 1, ret, "db->destroy");
+    return disconnected;
 }
 
 static char *realm;
@@ -397,25 +466,41 @@
 
     get_creds(context, keytab_str, &ccache, master);
 
-    master_fd = connect_to_master (context, master);
-
     ret = krb5_sname_to_principal (context, master, IPROP_NAME,
 				   KRB5_NT_SRV_HST, &server);
     if (ret)
 	krb5_err (context, 1, ret, "krb5_sname_to_principal");
 
     auth_context = NULL;
+    master_fd = -1;
+
+    for (;;) {
+
+	int disconnected = FALSE;
+
+	if (master_fd != -1) {
+	    shutdown(master_fd, SHUT_RDWR);
+	    close(master_fd);
+	}
+	master_fd = connect_to_master (context, master);
+
+	if (auth_context) {
+	    krb5_auth_con_free(context, auth_context);
+	    auth_context = NULL;
+	}
     ret = krb5_sendauth (context, &auth_context, &master_fd,
 			 IPROP_VERSION, NULL, server,
 			 AP_OPTS_MUTUAL_REQUIRED, NULL, NULL,
 			 ccache, NULL, NULL, NULL);
-    if (ret)
-	krb5_err (context, 1, ret, "krb5_sendauth");
+	if (ret) {
+	    krb5_warn (context, ret, "krb5_sendauth");
+	    continue;
+	}
 
-    ihave (context, auth_context, master_fd,
+	ret = ihave (context, auth_context, master_fd,
 	   server_context->log_context.version);
-
-    for (;;) {
+	if (!ret)
+	do {
 	int ret;
 	krb5_data out;
 	krb5_storage *sp;
@@ -423,19 +508,28 @@
 
 	ret = krb5_read_priv_message(context, auth_context, &master_fd, &out);
 
-	if (ret)
+	    if (ret) {
+		if (ret == HEIM_ERR_EOF) {
+			krb5_warn (context, ret, "krb5_read_priv_message");
+			disconnected = TRUE;
+			break;
+		}
+		else {
 	    krb5_err (context, 1, ret, "krb5_read_priv_message");
+		}
+	    }
 
 	sp = krb5_storage_from_mem (out.data, out.length);
 	krb5_ret_int32 (sp, &tmp);
 	switch (tmp) {
 	case FOR_YOU :
 	    receive (context, sp, server_context);
-	    ihave (context, auth_context, master_fd,
+		disconnected = ihave (context, auth_context, master_fd,
 		   server_context->log_context.version);
 	    break;
 	case TELL_YOU_EVERYTHING :
-	    receive_everything (context, master_fd, server_context,
+		disconnected = receive_everything (context, master_fd,
+						   server_context,
 				auth_context);
 	    break;
 	case NOW_YOU_HAVE :
@@ -447,6 +541,9 @@
 	}
 	krb5_storage_free (sp);
 	krb5_data_free (&out);
+	} while (!disconnected);
+
+	krb5_warnx (context, "disconnected");
     }
     
     return 0;