[mvapich-discuss] alternative RSH_CMD for supporting condor

Jonathan Perkins perkinjo at cse.ohio-state.edu
Thu Sep 6 08:02:53 EDT 2012


On Thu, Sep 06, 2012 at 10:40:38AM +0200, Imre Szeberenyi wrote:
> Hi,
> 
> I am using condor scheduler on a dedicated HPC cluster at Budapest
> University of Technology and Economics.
> 
> To start mpi jobs on the workers I must use an alternative ssh cmd.
> I did not find any configuration option to change the default ssh command.
> To supporting this suggest to introduce MV2_RSH_CMD environment
> variable to mpirun_ssh.
> 
> I modified the source. Please find attached the patch. I hope you
> find is useful.

Thank you!  This patch looks like it can be a benefit to other users as
well.  We will review it and you should expect to see it in our future
release of MVAPICH2.

> 
> 
> Btw: I have found a minor bug in configuration file processing:
> 
> If  MV2_USER_CONFIG and HOME are not set, segmentation fault occurs
> in function
> read_user_config (variable user_config is null and used at line 224 of file
> src/mpid/ch3/channels/common/src/util/mv2_config.c)

Looks like we didn't consider the case where HOME was not set.  We'll
also take a look at this.  Thanks again.

> 
> 
> Thank you,
> 
> Imre Szeberenyi

> diff -rupN mpirun.org/mpirun_params.c mpirun/mpirun_params.c
> --- mpirun.org/mpirun_params.c	2012-09-06 00:13:53.932464881 +0200
> +++ mpirun/mpirun_params.c	2012-09-06 09:02:38.850417992 +0200
> @@ -70,6 +70,7 @@ int USE_LINEAR_SSH = 1;         /* By de
>                                     -fastssh for tree based ssh */
>  
>  char hostfile[HOSTFILE_LEN + 1];
> +char rshcmd[RSHCMD_LEN + 1];
>  
>  /*
>    The group active for mpispawn. NULL if no group change is required.
> @@ -208,7 +209,17 @@ void commandLine(int argc, char *argv[],
>                  show_on = 1;
>                  break;
>              case 5:
> -                use_rsh = 1;
> +                {
> +                    char *rsh_env;
> +                    rsh_env = getenv("MV2_RSH_CMD");
> +                    if (rsh_env != NULL) { 
> +                        strncpy(rshcmd, rsh_env, RSHCMD_LEN);
> +                        if (strlen(rsh_env) >= RSHCMD_LEN) 
> +                            rshcmd[RSHCMD_LEN] = '\0';
> +                    } else 
> +                        strcpy(rshcmd, RSH_CMD);
> +                    use_rsh = 1;
> +                }
>                  break;
>              case 6:
>                  use_rsh = 0;
> diff -rupN mpirun.org/mpirun_params.h mpirun/mpirun_params.h
> --- mpirun.org/mpirun_params.h	2012-09-06 00:13:53.932464881 +0200
> +++ mpirun/mpirun_params.h	2012-09-06 01:01:48.361420969 +0200
> @@ -44,7 +44,9 @@ extern int use_rsh;
>  */
>  
>  #define HOSTFILE_LEN 256
> +#define RSHCMD_LEN 256
>  extern char hostfile[HOSTFILE_LEN + 1];
> +extern char rshcmd[RSHCMD_LEN + 1];
>  
>  extern int xterm_on;
>  extern int show_on;
> diff -rupN mpirun.org/mpirun_params_comp.c mpirun/mpirun_params_comp.c
> --- mpirun.org/mpirun_params_comp.c	2012-09-06 00:13:53.933456850 +0200
> +++ mpirun/mpirun_params_comp.c	2012-09-06 08:57:33.029419861 +0200
> @@ -78,6 +78,7 @@ int USE_LINEAR_SSH = 1;         /* By de
>                                     -fastssh for tree based ssh */
>  
>  char hostfile[HOSTFILE_LEN + 1];
> +char rshcmd[RSHCMD_LEN + 1];
>  
>  /*
>    The group active for mpispawn. NULL if no group change is required.
> @@ -210,7 +211,17 @@ static void check_option(int argc, char 
>          show_on = 1;
>          break;
>      case 5:
> -        use_rsh = 1;
> +        {
> +            char *rsh_env;
> +            rsh_env = getenv("MV2_RSH_CMD"); 
> +            if (rsh_env != NULL) {
> +                strncpy(rshcmd, rsh_env, RSHCMD_LEN);
> +            if (strlen(rsh_env) >= RSHCMD_LEN)
> +                rshcmd[RSHCMD_LEN] = '\0';
> +            } else
> +                strcpy(rshcmd, RSH_CMD);
> +            use_rsh = 1;
> +        }
>          break;
>      case 6:
>          use_rsh = 0;
> diff -rupN mpirun.org/mpirun_rsh.c mpirun/mpirun_rsh.c
> --- mpirun.org/mpirun_rsh.c	2012-09-06 00:13:53.930450704 +0200
> +++ mpirun/mpirun_rsh.c	2012-09-06 00:50:01.410431063 +0200
> @@ -975,7 +975,7 @@ void remote_signal(char const host[256],
>      pid_t const * next_pid = pid;
>  
>      if (use_rsh) {
> -        sprintf(remote_command, "%s %s kill -s %s", RSH_CMD, host, signal);
> +        sprintf(remote_command, "%s %s kill -s %s", rshcmd, host, signal);
>      } else {
>          sprintf(remote_command, "%s %s -x %s kill -s %s", SSH_CMD, SSH_ARG,
>                  host, signal);
> @@ -1696,7 +1696,7 @@ void spawn_fast(int argc, char *argv[], 
>                  }
>  
>                  if (use_rsh) {
> -                    nargv[arg_offset++] = RSH_CMD;
> +                    nargv[arg_offset++] = rshcmd;
>                  } else {
>                      nargv[arg_offset++] = SSH_CMD;
>                      nargv[arg_offset++] = SSH_ARG;
> @@ -2134,7 +2134,7 @@ void spawn_one(int argc, char *argv[], c
>              }
>  
>              if (use_rsh) {
> -                nargv[arg_offset++] = RSH_CMD;
> +                nargv[arg_offset++] = rshcmd;
>              }
>  
>              else {

> _______________________________________________
> mvapich-discuss mailing list
> mvapich-discuss at cse.ohio-state.edu
> http://mail.cse.ohio-state.edu/mailman/listinfo/mvapich-discuss


-- 
Jonathan Perkins
http://www.cse.ohio-state.edu/~perkinjo


More information about the mvapich-discuss mailing list