[mvapich-discuss] alternative RSH_CMD for supporting condor
Imre Szeberenyi
szebi at iit.bme.hu
Thu Sep 6 04:40:38 EDT 2012
Hi,
I am using condor scheduler on a dedicated HPC cluster at Budapest
University of Technology and Economics.
To start mpi jobs on the workers I must use an alternative ssh cmd.
I did not find any configuration option to change the default ssh command.
To supporting this suggest to introduce MV2_RSH_CMD environment variable
to mpirun_ssh.
I modified the source. Please find attached the patch. I hope you find
is useful.
Btw: I have found a minor bug in configuration file processing:
If MV2_USER_CONFIG and HOME are not set, segmentation fault occurs in
function
read_user_config (variable user_config is null and used at line 224 of file
src/mpid/ch3/channels/common/src/util/mv2_config.c)
Thank you,
Imre Szeberenyi
-------------- next part --------------
diff -rupN mpirun.org/mpirun_params.c mpirun/mpirun_params.c
--- mpirun.org/mpirun_params.c 2012-09-06 00:13:53.932464881 +0200
+++ mpirun/mpirun_params.c 2012-09-06 09:02:38.850417992 +0200
@@ -70,6 +70,7 @@ int USE_LINEAR_SSH = 1; /* By de
-fastssh for tree based ssh */
char hostfile[HOSTFILE_LEN + 1];
+char rshcmd[RSHCMD_LEN + 1];
/*
The group active for mpispawn. NULL if no group change is required.
@@ -208,7 +209,17 @@ void commandLine(int argc, char *argv[],
show_on = 1;
break;
case 5:
- use_rsh = 1;
+ {
+ char *rsh_env;
+ rsh_env = getenv("MV2_RSH_CMD");
+ if (rsh_env != NULL) {
+ strncpy(rshcmd, rsh_env, RSHCMD_LEN);
+ if (strlen(rsh_env) >= RSHCMD_LEN)
+ rshcmd[RSHCMD_LEN] = '\0';
+ } else
+ strcpy(rshcmd, RSH_CMD);
+ use_rsh = 1;
+ }
break;
case 6:
use_rsh = 0;
diff -rupN mpirun.org/mpirun_params.h mpirun/mpirun_params.h
--- mpirun.org/mpirun_params.h 2012-09-06 00:13:53.932464881 +0200
+++ mpirun/mpirun_params.h 2012-09-06 01:01:48.361420969 +0200
@@ -44,7 +44,9 @@ extern int use_rsh;
*/
#define HOSTFILE_LEN 256
+#define RSHCMD_LEN 256
extern char hostfile[HOSTFILE_LEN + 1];
+extern char rshcmd[RSHCMD_LEN + 1];
extern int xterm_on;
extern int show_on;
diff -rupN mpirun.org/mpirun_params_comp.c mpirun/mpirun_params_comp.c
--- mpirun.org/mpirun_params_comp.c 2012-09-06 00:13:53.933456850 +0200
+++ mpirun/mpirun_params_comp.c 2012-09-06 08:57:33.029419861 +0200
@@ -78,6 +78,7 @@ int USE_LINEAR_SSH = 1; /* By de
-fastssh for tree based ssh */
char hostfile[HOSTFILE_LEN + 1];
+char rshcmd[RSHCMD_LEN + 1];
/*
The group active for mpispawn. NULL if no group change is required.
@@ -210,7 +211,17 @@ static void check_option(int argc, char
show_on = 1;
break;
case 5:
- use_rsh = 1;
+ {
+ char *rsh_env;
+ rsh_env = getenv("MV2_RSH_CMD");
+ if (rsh_env != NULL) {
+ strncpy(rshcmd, rsh_env, RSHCMD_LEN);
+ if (strlen(rsh_env) >= RSHCMD_LEN)
+ rshcmd[RSHCMD_LEN] = '\0';
+ } else
+ strcpy(rshcmd, RSH_CMD);
+ use_rsh = 1;
+ }
break;
case 6:
use_rsh = 0;
diff -rupN mpirun.org/mpirun_rsh.c mpirun/mpirun_rsh.c
--- mpirun.org/mpirun_rsh.c 2012-09-06 00:13:53.930450704 +0200
+++ mpirun/mpirun_rsh.c 2012-09-06 00:50:01.410431063 +0200
@@ -975,7 +975,7 @@ void remote_signal(char const host[256],
pid_t const * next_pid = pid;
if (use_rsh) {
- sprintf(remote_command, "%s %s kill -s %s", RSH_CMD, host, signal);
+ sprintf(remote_command, "%s %s kill -s %s", rshcmd, host, signal);
} else {
sprintf(remote_command, "%s %s -x %s kill -s %s", SSH_CMD, SSH_ARG,
host, signal);
@@ -1696,7 +1696,7 @@ void spawn_fast(int argc, char *argv[],
}
if (use_rsh) {
- nargv[arg_offset++] = RSH_CMD;
+ nargv[arg_offset++] = rshcmd;
} else {
nargv[arg_offset++] = SSH_CMD;
nargv[arg_offset++] = SSH_ARG;
@@ -2134,7 +2134,7 @@ void spawn_one(int argc, char *argv[], c
}
if (use_rsh) {
- nargv[arg_offset++] = RSH_CMD;
+ nargv[arg_offset++] = rshcmd;
}
else {
More information about the mvapich-discuss
mailing list