[mvapich-discuss] alternative RSH_CMD for supporting condor
Jonathan Perkins
perkinjo at cse.ohio-state.edu
Thu Sep 6 08:02:53 EDT 2012
On Thu, Sep 06, 2012 at 10:40:38AM +0200, Imre Szeberenyi wrote:
> Hi,
>
> I am using condor scheduler on a dedicated HPC cluster at Budapest
> University of Technology and Economics.
>
> To start mpi jobs on the workers I must use an alternative ssh cmd.
> I did not find any configuration option to change the default ssh command.
> To supporting this suggest to introduce MV2_RSH_CMD environment
> variable to mpirun_ssh.
>
> I modified the source. Please find attached the patch. I hope you
> find is useful.
Thank you! This patch looks like it can be a benefit to other users as
well. We will review it and you should expect to see it in our future
release of MVAPICH2.
>
>
> Btw: I have found a minor bug in configuration file processing:
>
> If MV2_USER_CONFIG and HOME are not set, segmentation fault occurs
> in function
> read_user_config (variable user_config is null and used at line 224 of file
> src/mpid/ch3/channels/common/src/util/mv2_config.c)
Looks like we didn't consider the case where HOME was not set. We'll
also take a look at this. Thanks again.
>
>
> Thank you,
>
> Imre Szeberenyi
> diff -rupN mpirun.org/mpirun_params.c mpirun/mpirun_params.c
> --- mpirun.org/mpirun_params.c 2012-09-06 00:13:53.932464881 +0200
> +++ mpirun/mpirun_params.c 2012-09-06 09:02:38.850417992 +0200
> @@ -70,6 +70,7 @@ int USE_LINEAR_SSH = 1; /* By de
> -fastssh for tree based ssh */
>
> char hostfile[HOSTFILE_LEN + 1];
> +char rshcmd[RSHCMD_LEN + 1];
>
> /*
> The group active for mpispawn. NULL if no group change is required.
> @@ -208,7 +209,17 @@ void commandLine(int argc, char *argv[],
> show_on = 1;
> break;
> case 5:
> - use_rsh = 1;
> + {
> + char *rsh_env;
> + rsh_env = getenv("MV2_RSH_CMD");
> + if (rsh_env != NULL) {
> + strncpy(rshcmd, rsh_env, RSHCMD_LEN);
> + if (strlen(rsh_env) >= RSHCMD_LEN)
> + rshcmd[RSHCMD_LEN] = '\0';
> + } else
> + strcpy(rshcmd, RSH_CMD);
> + use_rsh = 1;
> + }
> break;
> case 6:
> use_rsh = 0;
> diff -rupN mpirun.org/mpirun_params.h mpirun/mpirun_params.h
> --- mpirun.org/mpirun_params.h 2012-09-06 00:13:53.932464881 +0200
> +++ mpirun/mpirun_params.h 2012-09-06 01:01:48.361420969 +0200
> @@ -44,7 +44,9 @@ extern int use_rsh;
> */
>
> #define HOSTFILE_LEN 256
> +#define RSHCMD_LEN 256
> extern char hostfile[HOSTFILE_LEN + 1];
> +extern char rshcmd[RSHCMD_LEN + 1];
>
> extern int xterm_on;
> extern int show_on;
> diff -rupN mpirun.org/mpirun_params_comp.c mpirun/mpirun_params_comp.c
> --- mpirun.org/mpirun_params_comp.c 2012-09-06 00:13:53.933456850 +0200
> +++ mpirun/mpirun_params_comp.c 2012-09-06 08:57:33.029419861 +0200
> @@ -78,6 +78,7 @@ int USE_LINEAR_SSH = 1; /* By de
> -fastssh for tree based ssh */
>
> char hostfile[HOSTFILE_LEN + 1];
> +char rshcmd[RSHCMD_LEN + 1];
>
> /*
> The group active for mpispawn. NULL if no group change is required.
> @@ -210,7 +211,17 @@ static void check_option(int argc, char
> show_on = 1;
> break;
> case 5:
> - use_rsh = 1;
> + {
> + char *rsh_env;
> + rsh_env = getenv("MV2_RSH_CMD");
> + if (rsh_env != NULL) {
> + strncpy(rshcmd, rsh_env, RSHCMD_LEN);
> + if (strlen(rsh_env) >= RSHCMD_LEN)
> + rshcmd[RSHCMD_LEN] = '\0';
> + } else
> + strcpy(rshcmd, RSH_CMD);
> + use_rsh = 1;
> + }
> break;
> case 6:
> use_rsh = 0;
> diff -rupN mpirun.org/mpirun_rsh.c mpirun/mpirun_rsh.c
> --- mpirun.org/mpirun_rsh.c 2012-09-06 00:13:53.930450704 +0200
> +++ mpirun/mpirun_rsh.c 2012-09-06 00:50:01.410431063 +0200
> @@ -975,7 +975,7 @@ void remote_signal(char const host[256],
> pid_t const * next_pid = pid;
>
> if (use_rsh) {
> - sprintf(remote_command, "%s %s kill -s %s", RSH_CMD, host, signal);
> + sprintf(remote_command, "%s %s kill -s %s", rshcmd, host, signal);
> } else {
> sprintf(remote_command, "%s %s -x %s kill -s %s", SSH_CMD, SSH_ARG,
> host, signal);
> @@ -1696,7 +1696,7 @@ void spawn_fast(int argc, char *argv[],
> }
>
> if (use_rsh) {
> - nargv[arg_offset++] = RSH_CMD;
> + nargv[arg_offset++] = rshcmd;
> } else {
> nargv[arg_offset++] = SSH_CMD;
> nargv[arg_offset++] = SSH_ARG;
> @@ -2134,7 +2134,7 @@ void spawn_one(int argc, char *argv[], c
> }
>
> if (use_rsh) {
> - nargv[arg_offset++] = RSH_CMD;
> + nargv[arg_offset++] = rshcmd;
> }
>
> else {
> _______________________________________________
> mvapich-discuss mailing list
> mvapich-discuss at cse.ohio-state.edu
> http://mail.cse.ohio-state.edu/mailman/listinfo/mvapich-discuss
--
Jonathan Perkins
http://www.cse.ohio-state.edu/~perkinjo
More information about the mvapich-discuss
mailing list