[mvapich-discuss] problem w/MVAPICH in the frames of Gen1

Mikhail Kuzminsky kus at free.net
Thu Aug 3 11:10:04 EDT 2006


We worked very well w/MVAPICH-0.9.4 from Gen1 IBGD-1.6.1 Mellanox 
toolset. Then about 1 year ago we upgraded to IBGD-1.8.0 
w/MVAPICH-0.9.5 but worked only w/applications using TCP/IP over IB 
stack, and I didn't verify MPI (the common IBGD installation process 
said that all is OK) :-(

But now I found that MVAPICH-0.9.5 in 1.8.0 don't work for me :-( !
Any issuing of mpirun_rsh (for any MPI application - from our own 
program to standard tests) gives the messages like following:

/home/local/ibgd/mpi/osu/gcc/mvapich-0.9.5/bin/mpirun_rsh -rsh -np 2 
-hostfile mf /home/local/ibgd/mpi/osu/gcc/tests/osu-tests/bw 1000 16 > 
testmpi 2>&1

[1] Abort: Cannot allocate PD (Invalid Virtual Address) at line 688 in 
file viainit.c
[0] Abort: Cannot allocate PD (Invalid Virtual Address)mpirun: 
executable version 1 does not match our version 3.
  at line 688 in file viainit.c

I installed also mvapich-0.9.5 from IBGD-1.8.0 "manually", i.e. not as 
part of standard IBGD installation. The reason was that after IBGD 
installation I found wrong pathes in mpif77/mpif90/mpirun etc scripts 
(they don't include starting "prefixes" of whole pathes). But after 
manual installation of mvapich-0.9.5 for Intel and Pathscale compilers 
I found the same wrong pathes, and the same problem as it pointed 
above :-( 

Could you pls help me to solve of this problem ? 

Some (may be stupid's) ideas - strange for me things, based on strace 
output (I'm also applying strace output below): 

1) I have no LD_LIBRARY_PATH set for user, it looks as not necessary 
according README, but ld.so.cache was searched

2) There is an attempt to access to //home/local/.../mvapich.conf - 
i.e. w/2 slashes instead of one at begin ?

We use old versions of software because of using of (now ancient) SuSE 
Prof. 9.0 for x86-64 w/2.4.21 SMP kernel; this environment is
necessary because of restrictions of some used binary applications.

Yours
Mikhail Kuzminsky
Zelinsky Institute of Organic Chemistry
Moscow

strace /home/local/ibgd/mpi/osu/gcc/mvapich-0.9.5/bin/mpirun_rsh -rsh 
-np 2 -hostfile mf /home/local/ibgd/mpi/osu/gcc/tests/osu-tests/bw 
1000 1

===================================== strace output =================
execve("/home/local/ibgd/mpi/osu/gcc/mvapich-0.9.5/bin/mpirun_rsh", 
["/home/local/ibgd/mpi/osu/gcc/mvapich-0.9.5/bin/mpirun_rsh", "-rsh", 
"-np", "2", "-hostfile", "mf", 
"/home/local/ibgd/mpi/osu/gcc/tests/osu-tests/bw", "1000", "16"], [/* 
69 vars */]) = 0
uname({sys="Linux", node="c5ws1", ...}) = 0
brk(0) = 0x505b80
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 
0) = 0x2a9556b000
open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or 
directory)
open("/home/SGE/lib/lx24-amd64/tls/x86_64/libc.so.6", O_RDONLY) = -1 
ENOENT (No such file or directory)
<a set of like striings was skipped>
stat("/home/SGE/lib/lx24-amd64", {st_mode=S_IFDIR|0755, st_size=4096, 
...}) = 0
open("/usr/local/ifort/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 
ENOENT (No such file or directory)
stat("/usr/local/ifort/lib/tls/x86_64", 0x7fbfffe640) = -1 ENOENT (No 
such file or directory)
<a set of like messages was skipped>
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=117044, ...}) = 0
mmap(NULL, 117044, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a9556c000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\335\1"..., 
640) = 640
fstat(3, {st_mode=S_IFREG|0755, st_size=1534814, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 
0) = 0x2a95589000
mmap(NULL, 2365888, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 
0x2a9566d000
mprotect(0x2a95791000, 1169856, PROT_NONE) = 0
mmap(0x2a9586d000, 253952, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED, 3, 0x100000) = 0x2a9586d000
mmap(0x2a958ab000, 14784, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2a958ab000
close(3) = 0
munmap(0x2a9556c000, 117044) = 0
access("//home/local/ibgd/mpi/osu/gcc/mvapich-0.9.5/etc/mvapich.conf", 
R_OK) = -1 ENOENT (No such file or directory)
brk(0) = 0x505b80
brk(0x526b80) = 0x526b80
brk(0) = 0x526b80
brk(0x527000) = 0x527000
open("mf", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=34, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 
0) = 0x2a9556c000
read(3, "c5ws1.chem.ac.ru\nc5ws1.chem.ac.r"..., 4096) = 34
close(3) = 0
munmap(0x2a9556c000, 4096) = 0
getcwd("/home/kus/an/examples", 256) = 22
uname({sys="Linux", node="c5ws1", ...}) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 3
bind(3, {sa_family=0xeee0 /* AF_??? */, 
sa_data="\0\0\0\0\0\0000\223X\225*\0\0\0"}, 16) = 0
getsockname(3, {sa_family=AF_INET, sin_port=htons(39601), 
sin_addr=inet_addr("0.0.0.0")}, [12884901904]) = 0
listen(3, 2) = 0
rt_sigaction(SIGHUP, {0x4033e0, [HUP], SA_RESTART|0x4000000}, 
{SIG_DFL}, 8) = 0
rt_sigaction(SIGINT, {0x4033e0, [INT], SA_RESTART|0x4000000}, 
{SIG_DFL}, 8) = 0
rt_sigaction(SIGTSTP, {0x4035d0, [TSTP], SA_RESTART|0x4000000}, 
{SIG_DFL}, 8) = 0
rt_sigaction(SIGCHLD, {0x403640, [CHLD], SA_RESTART|0x4000000}, 
{SIG_DFL}, 8) = 0
rt_sigaction(SIGALRM, {0x4035f0, [ALRM], SA_RESTART|0x4000000}, 
{SIG_DFL}, 8) = 0
alarm(1000) = 0
getpid() = 19113
fork() = 19114
brk(0) = 0x527000
brk(0) = 0x527000
brk(0x526000) = 0x526000
brk(0) = 0x526000
getpid() = 19113
fork() = 19115
accept(3, [0] Abort: Cannot allocate PD (Invalid Virtual Address) at 
line 688 in file viainit.c
{sa_family=AF_INET, sin_port=htons(39604), 
sin_addr=inet_addr("192.168.0.21")}, [12884901904]) = 4
--- SIGCHLD (Child exited) @ 0 (0) ---
alarm(10) = 1000
wait4(-1, [WIFEXITED(s) && WEXITSTATUS(s) == 0], 0, NULL) = 19114
wait4(-1, [1] Abort: Cannot allocate PD (Invalid Virtual Address) at 
line 688 in file viainit.c
[WIFEXITED(s) && WEXITSTATUS(s) == 0], 0, NULL) = 19115
alarm(0) = 10
exit_group(0) = ?


More information about the mvapich-discuss mailing list