[mvapich-discuss] Re: [openfabrics-ewg] Announcing the release of MVAPICH2 0.9.8 with Checkpoint/Restart, iWARP, RDMA CM-based connection manageme

Sundeep Narravula narravul at cse.ohio-state.edu
Wed Nov 15 12:47:05 EST 2006


Hi David,
  Can you please perform mpdallexit and mpdcleanup before retrying?
Thanks,
  --Sundeep.

On Tue, 14 Nov 2006, david elsen wrote:

> Hi Sundeep,
>
> I see the following error messages:
>
> [root at ammasso1 ~]# mpdboot
> /usr/local/mvapich2/bin/mpdroot: error while loading shared libraries: librdmacm.so: cannot open shared object file: No such file or directory
> mpdboot_ammasso1.qlogic.org (handle_mpd_output 359): failed to ping mpd on ammasso1.qlogic.org; recvd output={}
>
> [root at ammasso1 ~]# /usr/local/mvapich2/bin/mpdroot: error while loading shared libraries: librdmacm.so: cannot open shared object file: No such file or directory
>
>
>
>
> Please see the following for the environment variables settings. I highlighted the
> LD_LIBRARY_PATH there.
>
> [root at ammasso1 lib]# env
> SSH_AGENT_PID=2616
> HOSTNAME=ammasso1
> DESKTOP_STARTUP_ID=
> SHELL=/bin/bash
> TERM=xterm
> HISTSIZE=1000
> GTK_RC_FILES=/etc/gtk/gtkrc:/root/.gtkrc-1.2-gnome2
> WINDOWID=48240070
> OLDPWD=/root
> QTDIR=/usr/lib/qt-3.3
> QTINC=/usr/lib/qt-3.3/include
> USER=root
> LD_LIBRARY_PATH=/usr/local/lib
> LS_COLORS=no=00:fi=00:di=00;34:ln=00;36:pi=40;33:so=00;35:bd=40;33;01:cd=40;33;01:or=01;05;37;41:mi=01;05;37;41:ex=00;32:*.cmd=00;32:*.exe=00;32:*.com=00;32:*.btm=00;32:*.bat=00;32:*.sh=00;32:*.csh=00;32:*.tar=00;31:*.tgz=00;31:*.arj=00;31:*.taz=00;31:*.lzh=00;31:*.zip=00;31:*.z=00;31:*.Z=00;31:*.gz=00;31:*.bz2=00;31:*.bz=00;31:*.tz=00;31:*.rpm=00;31:*.cpio=00;31:*.jpg=00;35:*.gif=00;35:*.bmp=00;35:*.xbm=00;35:*.xpm=00;35:*.png=00;35:*.tif=00;35:
> GNOME_KEYRING_SOCKET=/tmp/keyring-4A96MN/socket
> SSH_AUTH_SOCK=/tmp/ssh-jCRBDJ2561/agent.2561
> SESSION_MANAGER=local/ammasso1.qlogic.org:/tmp/.ICE-unix/2561
> MAIL=/var/spool/mail/root
> DESKTOP_SESSION=default
> PATH=/usr/local/mvapich2/bin:/usr/lib/qt-3.3/bin:/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/X11R6/bin:/root/bin
> QT_IM_MODULE=scim
> GDM_XSERVER_LOCATION=local
> INPUTRC=/etc/inputrc
> PWD=/usr/local/lib
> XMODIFIERS=@im=SCIM
> LANG=en_US.UTF-8
> GDMSESSION=default
> SSH_ASKPASS=/usr/libexec/openssh/gnome-ssh-askpass
> HOME=/root
> SHLVL=2
> MPD_BIN=/usr/local/mvapich2/bin
> GNOME_DESKTOP_SESSION_ID=Default
> LOGNAME=root
> QTLIB=/usr/lib/qt-3.3/lib
> DBUS_SESSION_BUS_ADDRESS=unix:abstract=/tmp/dbus-x3qJbgqCQn,guid=2d255a45b9c7569d80f09bbf76c29d00
> LESSOPEN=|/usr/bin/lesspipe.sh %s
> DISPLAY=:0.0
> GTK_IM_MODULE=scim
> G_BROKEN_FILENAMES=1
> COLORTERM=gnome-terminal
> XAUTHORITY=/tmp/.gdmYI4NIT
> _=/bin/env
> [root at ammasso1 lib]#
>
>
>
>
>
> Please see the following for the files in /usr/local/lib directory:
>
> [root at ammasso1 lib]# pwd
> /usr/local/lib
> [root at ammasso1 lib]#
> [root at ammasso1 lib]# ls -la
> total 1800
> drwxr-xr-x  3 root root   4096 Nov  9 12:38 .
> drwxr-xr-x 13 root root   4096 Nov 13 19:51 ..
> drwxr-xr-x  2 root root   4096 Nov  8 18:38 infiniband
> -rwxr-xr-x  1 root root    773 Nov  8 18:38 libibat.la
> -rwxr-xr-x  1 root root  28419 Nov  8 18:38 libibat.so
> -rw-r--r--  1 root root  38662 Nov  8 18:35 libibcommon.a
> -rwxr-xr-x  1 root root    820 Nov  8 18:35 libibcommon.la
> lrwxrwxrwx  1 root root     20 Nov  8 18:35 libibcommon.so -> libibcommon.so.1.0.0
> lrwxrwxrwx  1 root root     20 Nov  8 18:35 libibcommon.so.1 -> libibcommon.so.1.0.0
> -rwxr-xr-x  1 root root  29138 Nov  8 18:35 libibcommon.so.1.0.0
> -rw-r--r--  1 root root 172056 Nov  8 18:36 libibmad.a
> -rwxr-xr-x  1 root root    857 Nov  8 18:36 libibmad.la
> lrwxrwxrwx  1 root root     17 Nov  8 18:36 libibmad.so -> libibmad.so.1.0.0
> lrwxrwxrwx  1 root root     17 Nov  8 18:36 libibmad.so.1 -> libibmad.so.1.0.0
> -rwxr-xr-x  1 root root 125987 Nov  8 18:36 libibmad.so.1.0.0
> -rw-r--r--  1 root root  45358 Nov  8 18:35 libibumad.a
> -rwxr-xr-x  1 root root    836 Nov  8 18:35 libibumad.la
> lrwxrwxrwx  1 root root     18 Nov  8 18:35 libibumad.so -> libibumad.so.1.0.0
> lrwxrwxrwx  1 root root     18 Nov  8 18:35 libibumad.so.1 -> libibumad.so.1.0.0
> -rwxr-xr-x  1 root root  44419 Nov  8 18:35 libibumad.so.1.0.0
> -rw-r--r--  1 root root 180672 Nov  9 12:38 libibverbs.a
> -rwxr-xr-x  1 root root    828 Nov  9 12:38 libibverbs.la
> lrwxrwxrwx  1 root root     19 Nov  9 12:38 libibverbs.so -> libibverbs.so.2.0.0
> lrwxrwxrwx  1 root root     19 Nov  9 12:38 libibverbs.so.2 -> libibverbs.so.2.0.0
> -rwxr-xr-x  1 root root 123655 Nov  9 12:38 libibverbs.so.2.0.0
> lrwxrwxrwx  1 root root     18 Nov  8 18:37 libopensm-1.2.0-rc6.so -> libopensm.so.1.0.0
> -rw-r--r--  1 root root 130594 Nov  8 18:37 libopensm.a
> -rwxr-xr-x  1 root root    806 Nov  8 18:37 libopensm.la
> lrwxrwxrwx  1 root root     18 Nov  8 18:37 libopensm.so -> libopensm.so.1.0.0
> lrwxrwxrwx  1 root root     18 Nov  8 18:37 libopensm.so.1 -> libopensm.so.1.0.0
> -rwxr-xr-x  1 root root 121937 Nov  8 18:37 libopensm.so.1.0.0
> lrwxrwxrwx  1 root root     19 Nov  8 18:37 libosmcomp-1.2.0-rc6.so -> libosmcomp.so.1.0.1
> -rw-r--r--  1 root root 242594 Nov  8 18:37 libosmcomp.a
> -rwxr-xr-x  1 root root    823 Nov  8 18:37 libosmcomp.la
> lrwxrwxrwx  1 root root     19 Nov  8 18:37 libosmcomp.so -> libosmcomp.so.1.0.1
> lrwxrwxrwx  1 root root     19 Nov  8 18:37 libosmcomp.so.1 -> libosmcomp.so.1.0.1
> -rwxr-xr-x  1 root root 194469 Nov  8 18:37 libosmcomp.so.1.0.1
> lrwxrwxrwx  1 root root     21 Nov  8 18:37 libosmvendor-1.2.0-rc6.so -> libosmvendor.so.1.0.0
> -rw-r--r--  1 root root  86786 Nov  8 18:37 libosmvendor.a
> -rwxr-xr-x  1 root root    885 Nov  8 18:37 libosmvendor.la
> lrwxrwxrwx  1 root root     21 Nov  8 18:04 libosmvendor_openib.so -> libosmvendor.so.1.0.0
> lrwxrwxrwx  1 root root     21 Nov  8 18:37 libosmvendor.so -> libosmvendor.so.1.0.0
> lrwxrwxrwx  1 root root     21 Nov  8 18:37 libosmvendor.so.1 -> libosmvendor.so.1.0.0
> -rwxr-xr-x  1 root root  83296 Nov  8 18:37 libosmvendor.so.1.0.0
> -rwxr-xr-x  1 root root    837 Nov  8 19:04 librdmacm.la
> -rwxr-xr-x  1 root root  54472 Nov  8 19:04 librdmacm.so
> [root at ammasso1 lib]#
>
> [root at ammasso1 ~]# mpdtrace
> /usr/local/mvapich2/bin/mpdroot: error while loading shared libraries: librdmacm.so: cannot open shared object file: No such file or directory
> mpdtrace: mpd_uncaught_except_tb handling:
>   exceptions.TypeError: not all arguments converted during string formatting
>     /usr/local/mvapich2/bin/mpdlib.py  899  __init__
>         mpd_print(1,'forked process failed; status=' % status)
>     /usr/local/mvapich2/bin/mpdtrace  46  mpdtrace
>         conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
>     /usr/local/mvapich2/bin/mpdtrace  83  ?
>         mpdtrace()
> [root at ammasso1 ~]#
>
>
> Thanks in advance for your help,
> David
>
>
> Sundeep Narravula <narravul at cse.ohio-state.edu> wrote: Hi David,
>
> The error you are seeing is due to the fact that the librdmacm.so
> library is not found by default in your environment. You need to have the
> following variable set for this.
>
> export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
>
> Regards,
>   --Sundeep.
>
> On Tue, 14 Nov 2006, david elsen wrote:
>
> > Sundeep, Yes, I updated my system to get the g77 compiler. I did not
> > need it but I updated it anyway. Now I can build the MVAPICH2, but it
> > gives me some error while trying to run it.
> >
> > Please see the following:
> > [root at ammasso1 bin]# ./mpdtrace
> > /root/0.9.8-RELEASE/bin/mpdroot: error while loading shared libraries: librdmacm.so: cannot open shared object file: No such file or directory
> > mpdtrace: mpd_uncaught_except_tb handling:
> >   exceptions.TypeError: not all arguments converted during string formatting
> >     /root/0.9.8-RELEASE/bin/mpdlib.py  899  __init__
> >         mpd_print(1,'forked process failed; status=' % status)
> >     ./mpdtrace  46  mpdtrace
> >         conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
> >     ./mpdtrace  83  ?
> >         mpdtrace()
> > [root at ammasso1 bin]#
> > [root at ammasso1 bin]#
> > [root at ammasso1 bin]# ./mpiexec -n 1 ./cpi
> > /root/0.9.8-RELEASE/bin/mpdroot: error while loading shared libraries: librdmacm.so: cannot open shared object file: No such file or directory
> > mpiexec_ammasso1: mpd_uncaught_except_tb handling:
> >   exceptions.TypeError: not all arguments converted during string formatting
> >     /root/0.9.8-RELEASE/bin/mpdlib.py  899  __init__
> >         mpd_print(1,'forked process failed; status=' % status)
> >     ./mpiexec  231  mpiexec
> >         conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
> >     ./mpiexec  1509  ?
> >         mpiexec()
> > [root at ammasso1 bin]#
> >
> >
> >
> > Sundeep Narravula  wrote: Hi David,
> >
> >   Your environment does not have a g77 compiler. You need to either make
> > sure you have that on your system or you can disable that option through
> > the --disable-f77 and --disable-f90 options in the configure command.
> > Please refer to the output of ./configure --help for details on these
> > options.
> >
> > Regards,
> >   --Sundeep.
> >
>
>
>
>
>
> ---------------------------------
> Sponsored Link
>
> For just $24.99/mo., Vonage offers unlimited local and long- distance calling. Sign up now.



More information about the mvapich-discuss mailing list