[mvapich-discuss] alternative RSH_CMD for supporting condor

Imre Szeberenyi szebi at iit.bme.hu
Thu Sep 6 04:40:38 EDT 2012


Hi,

I am using condor scheduler on a dedicated HPC cluster at Budapest
University of Technology and Economics.

To start mpi jobs on the workers I must use an alternative ssh cmd.
I did not find any configuration option to change the default ssh command.
To supporting this suggest to introduce MV2_RSH_CMD environment variable 
to mpirun_ssh.

I modified the source. Please find attached the patch. I hope you find 
is useful.


Btw: I have found a minor bug in configuration file processing:

If  MV2_USER_CONFIG and HOME are not set, segmentation fault occurs in 
function
read_user_config (variable user_config is null and used at line 224 of file
src/mpid/ch3/channels/common/src/util/mv2_config.c)


Thank you,

Imre Szeberenyi
-------------- next part --------------
diff -rupN mpirun.org/mpirun_params.c mpirun/mpirun_params.c
--- mpirun.org/mpirun_params.c	2012-09-06 00:13:53.932464881 +0200
+++ mpirun/mpirun_params.c	2012-09-06 09:02:38.850417992 +0200
@@ -70,6 +70,7 @@ int USE_LINEAR_SSH = 1;         /* By de
                                    -fastssh for tree based ssh */
 
 char hostfile[HOSTFILE_LEN + 1];
+char rshcmd[RSHCMD_LEN + 1];
 
 /*
   The group active for mpispawn. NULL if no group change is required.
@@ -208,7 +209,17 @@ void commandLine(int argc, char *argv[],
                 show_on = 1;
                 break;
             case 5:
-                use_rsh = 1;
+                {
+                    char *rsh_env;
+                    rsh_env = getenv("MV2_RSH_CMD");
+                    if (rsh_env != NULL) { 
+                        strncpy(rshcmd, rsh_env, RSHCMD_LEN);
+                        if (strlen(rsh_env) >= RSHCMD_LEN) 
+                            rshcmd[RSHCMD_LEN] = '\0';
+                    } else 
+                        strcpy(rshcmd, RSH_CMD);
+                    use_rsh = 1;
+                }
                 break;
             case 6:
                 use_rsh = 0;
diff -rupN mpirun.org/mpirun_params.h mpirun/mpirun_params.h
--- mpirun.org/mpirun_params.h	2012-09-06 00:13:53.932464881 +0200
+++ mpirun/mpirun_params.h	2012-09-06 01:01:48.361420969 +0200
@@ -44,7 +44,9 @@ extern int use_rsh;
 */
 
 #define HOSTFILE_LEN 256
+#define RSHCMD_LEN 256
 extern char hostfile[HOSTFILE_LEN + 1];
+extern char rshcmd[RSHCMD_LEN + 1];
 
 extern int xterm_on;
 extern int show_on;
diff -rupN mpirun.org/mpirun_params_comp.c mpirun/mpirun_params_comp.c
--- mpirun.org/mpirun_params_comp.c	2012-09-06 00:13:53.933456850 +0200
+++ mpirun/mpirun_params_comp.c	2012-09-06 08:57:33.029419861 +0200
@@ -78,6 +78,7 @@ int USE_LINEAR_SSH = 1;         /* By de
                                    -fastssh for tree based ssh */
 
 char hostfile[HOSTFILE_LEN + 1];
+char rshcmd[RSHCMD_LEN + 1];
 
 /*
   The group active for mpispawn. NULL if no group change is required.
@@ -210,7 +211,17 @@ static void check_option(int argc, char 
         show_on = 1;
         break;
     case 5:
-        use_rsh = 1;
+        {
+            char *rsh_env;
+            rsh_env = getenv("MV2_RSH_CMD"); 
+            if (rsh_env != NULL) {
+                strncpy(rshcmd, rsh_env, RSHCMD_LEN);
+            if (strlen(rsh_env) >= RSHCMD_LEN)
+                rshcmd[RSHCMD_LEN] = '\0';
+            } else
+                strcpy(rshcmd, RSH_CMD);
+            use_rsh = 1;
+        }
         break;
     case 6:
         use_rsh = 0;
diff -rupN mpirun.org/mpirun_rsh.c mpirun/mpirun_rsh.c
--- mpirun.org/mpirun_rsh.c	2012-09-06 00:13:53.930450704 +0200
+++ mpirun/mpirun_rsh.c	2012-09-06 00:50:01.410431063 +0200
@@ -975,7 +975,7 @@ void remote_signal(char const host[256],
     pid_t const * next_pid = pid;
 
     if (use_rsh) {
-        sprintf(remote_command, "%s %s kill -s %s", RSH_CMD, host, signal);
+        sprintf(remote_command, "%s %s kill -s %s", rshcmd, host, signal);
     } else {
         sprintf(remote_command, "%s %s -x %s kill -s %s", SSH_CMD, SSH_ARG,
                 host, signal);
@@ -1696,7 +1696,7 @@ void spawn_fast(int argc, char *argv[], 
                 }
 
                 if (use_rsh) {
-                    nargv[arg_offset++] = RSH_CMD;
+                    nargv[arg_offset++] = rshcmd;
                 } else {
                     nargv[arg_offset++] = SSH_CMD;
                     nargv[arg_offset++] = SSH_ARG;
@@ -2134,7 +2134,7 @@ void spawn_one(int argc, char *argv[], c
             }
 
             if (use_rsh) {
-                nargv[arg_offset++] = RSH_CMD;
+                nargv[arg_offset++] = rshcmd;
             }
 
             else {


More information about the mvapich-discuss mailing list