diff -c -r --exclude '*.Po' osu-micro-benchmarks-5.6.3-orig/mpi/pt2pt/osu_latency.c osu-micro-benchmarks-5.6.3/mpi/pt2pt/osu_latency.c *** osu-micro-benchmarks-5.6.3-orig/mpi/pt2pt/osu_latency.c Fri May 21 17:34:08 2021 --- osu-micro-benchmarks-5.6.3/mpi/pt2pt/osu_latency.c Mon Jun 14 11:28:32 2021 *************** *** 86,92 **** --- 86,96 ---- exit(EXIT_FAILURE); } + #if 1 + if (allocate_memory_pt2pt_lat(&s_buf, &r_buf, myid)) { + #else if (allocate_memory_pt2pt(&s_buf, &r_buf, myid)) { + #endif /* Error allocating memory */ MPI_CHECK(MPI_Finalize()); exit(EXIT_FAILURE); *************** *** 97,104 **** --- 101,113 ---- /* Latency test */ for(size = options.min_message_size; size <= options.max_message_size; size = (size ? size * 2 : 1)) { + #if 1 + set_buffer_pt2pt_lat(s_buf, options.src, options.accel, 'a', size); + set_buffer_pt2pt_lat(r_buf, options.dst, options.accel, 'b', size); + #else set_buffer_pt2pt(s_buf, myid, options.accel, 'a', size); set_buffer_pt2pt(r_buf, myid, options.accel, 'b', size); + #endif if(size > LARGE_MESSAGE_SIZE) { options.iterations = options.iterations_large; *************** *** 113,118 **** --- 122,128 ---- t_start = MPI_Wtime(); } + //MPI_CHECK(MPI_Ssend(s_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD)); MPI_CHECK(MPI_Send(s_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD)); MPI_CHECK(MPI_Recv(r_buf, size, MPI_CHAR, 1, 1, MPI_COMM_WORLD, &reqstat)); } *************** *** 123,128 **** --- 133,139 ---- else if(myid == 1) { for(i = 0; i < options.iterations + options.skip; i++) { MPI_CHECK(MPI_Recv(r_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD, &reqstat)); + //MPI_CHECK(MPI_Ssend(s_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD)); MPI_CHECK(MPI_Send(s_buf, size, MPI_CHAR, 0, 1, MPI_COMM_WORLD)); } } *************** *** 136,142 **** --- 147,157 ---- } } + #if 1 + free_memory_lat(s_buf, r_buf, myid); + #else free_memory(s_buf, r_buf, myid); + #endif MPI_CHECK(MPI_Finalize()); if (NONE != options.accel) { diff -c -r --exclude '*.Po' osu-micro-benchmarks-5.6.3-orig/util/osu_util_mpi.c osu-micro-benchmarks-5.6.3/util/osu_util_mpi.c *** osu-micro-benchmarks-5.6.3-orig/util/osu_util_mpi.c Fri May 21 17:33:27 2021 --- osu-micro-benchmarks-5.6.3/util/osu_util_mpi.c Fri May 21 17:44:39 2021 *************** *** 749,754 **** --- 749,784 ---- } } + #if 1 + void set_buffer_pt2pt_lat (void * buffer, char buf_type, enum accel_type type, int data, size_t size) + { + switch (buf_type) { + case 'H': + memset(buffer, data, size); + break; + case 'D': + case 'M': + #ifdef _ENABLE_OPENACC_ + if (type == OPENACC) { + size_t i; + char * p = (char *)buffer; + #pragma acc parallel loop deviceptr(p) + for (i = 0; i < size; i++) { + p[i] = data; + } + break; + } else + #endif + #ifdef _ENABLE_CUDA_ + { + CUDA_CHECK(cudaMemset(buffer, data, size)); + } + #endif + break; + } + } + #endif + void set_buffer (void * buffer, enum accel_type type, int data, size_t size) { #ifdef _ENABLE_OPENACC_ *************** *** 1032,1037 **** --- 1062,1109 ---- return 0; } + #if 1 + int allocate_memory_pt2pt_lat (char ** sbuf, char ** rbuf, int rank) + { + unsigned long align_size = sysconf(_SC_PAGESIZE); + + if ('D' == options.src) { + if (allocate_device_buffer(sbuf)) { + fprintf(stderr, "Error allocating cuda memory\n"); + return 1; + } + } else if ('M' == options.src) { + if (allocate_managed_buffer(sbuf)) { + fprintf(stderr, "Error allocating cuda unified memory\n"); + return 1; + } + } else { + if (posix_memalign((void**)sbuf, align_size, options.max_message_size)) { + fprintf(stderr, "Error allocating host memory\n"); + return 1; + } + } + if ('D' == options.dst) { + if (allocate_device_buffer(rbuf)) { + fprintf(stderr, "Error allocating cuda memory\n"); + return 1; + } + } else if ('M' == options.dst) { + if (allocate_managed_buffer(rbuf)) { + fprintf(stderr, "Error allocating cuda unified memory\n"); + return 1; + } + } else { + if (posix_memalign((void**)rbuf, align_size, options.max_message_size)) { + fprintf(stderr, "Error allocating host memory\n"); + return 1; + } + } + + return 0; + } + #endif + void allocate_memory_one_sided(int rank, char **sbuf, char **rbuf, char **win_base, size_t size, enum WINDOW type, MPI_Win *win) { *************** *** 1293,1298 **** --- 1365,1386 ---- } } + #if 1 + void free_memory_lat (void * sbuf, void * rbuf, int rank) + { + if ('D' == options.src || 'M' == options.src) { + free_device_buffer(sbuf); + } else { + free(sbuf); + } + if ('D' == options.dst || 'M' == options.dst) { + free_device_buffer(rbuf); + } else { + free(rbuf); + } + } + #endif + void free_memory_pt2pt_mul (void * sbuf, void * rbuf, int rank, int pairs) { if (rank < pairs) { diff -c -r --exclude '*.Po' osu-micro-benchmarks-5.6.3-orig/util/osu_util_mpi.h osu-micro-benchmarks-5.6.3/util/osu_util_mpi.h *** osu-micro-benchmarks-5.6.3-orig/util/osu_util_mpi.h Fri May 21 17:33:32 2021 --- osu-micro-benchmarks-5.6.3/util/osu_util_mpi.h Fri May 21 17:44:05 2021 *************** *** 60,65 **** --- 60,68 ---- void free_buffer (void * buffer, enum accel_type type); void set_buffer (void * buffer, enum accel_type type, int data, size_t size); void set_buffer_pt2pt (void * buffer, int rank, enum accel_type type, int data, size_t size); + #if 1 + void set_buffer_pt2pt_lat (void * buffer, char buf_type, enum accel_type type, int data, size_t size); + #endif /* * CUDA Context Management *************** *** 74,82 **** --- 77,91 ---- void usage_mbw_mr(); int allocate_memory_pt2pt (char **sbuf, char **rbuf, int rank); + #if 1 + int allocate_memory_pt2pt_lat (char **sbuf, char **rbuf, int rank); + #endif int allocate_memory_pt2pt_mul (char **sbuf, char **rbuf, int rank, int pairs); void print_header_pt2pt (int rank, int type); void free_memory (void *sbuf, void *rbuf, int rank); + #if 1 + void free_memory_lat (void *sbuf, void *rbuf, int rank); + #endif void free_memory_pt2pt_mul (void *sbuf, void *rbuf, int rank, int pairs); void print_header(int rank, int full); void usage_one_sided (char const *);