<!-- BaNnErBlUrFlE-BoDy-start -->
<!-- Preheader Text : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;height:0px;max-height:0px;opacity:0;overflow:hidden;">
Hello, When trying to do “osu_bw H D” from a node without a GPU to a node with a GPU I got cuda device failures on the “H” node. “no CUDA-capable device is detected” This appears to be caused by the benchmarks</div>
<!-- Preheader Text : END -->

<!-- Email Banner : BEGIN -->
<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerStart</div>

<!--[if ((ie)|(mso))]>
  <table border="0" cellspacing="0" cellpadding="0" width="100%" style="padding: 16px 0px 16px 0px; direction: ltr" lang="en"><tr><td>
    <table border="0" cellspacing="0" cellpadding="0" style="padding: 0px 10px 5px 6px; width: 100%; border-radius:4px; border-top:4px solid #8c8e91;background-color:#CFD3D7;"><tr><td valign="top">
      <table align="left" border="0" cellspacing="0" cellpadding="0" style="padding: 4px 8px 4px 8px">
        <tr><td style="color:#000000; font-family: 'Arial', sans-serif; font-weight:bold; font-size:14px; direction: ltr">
          This Message Is From an External Sender
        </td></tr>
        <tr><td style="color:#000000; font-weight:normal; font-family: 'Arial', sans-serif; font-size:12px; direction: ltr">
          This message came from outside your organization.
        </td></tr>

      </table>
      <![if ie]><br clear="all"><![endif]>
      <table align="right" border="0" cellspacing="0" cellpadding="0" style="padding: 4px 0px 4px 0px"><tr>
        <td style="direction: ltr">  <a target="_blank" href="https://us-phishalarm-ewt.proofpoint.com/EWT/v1/KGKeukY!vwQd0oYtD6YgRRdwH04lVq3dwf4Co07c8t_a1HZ4SLN9JCCeSBgmlAFYxCvT0ZZ45LtAcnKgnCJu3FkxFCfzuvUKSa_0Wvn68LnfDi5NadwqRqeIo2xZs6EKUL_iIeF4cWpNfveSiQeXP1fOG6daDg$" style="mso-padding-alt: 7.5px; padding: 7.5px; border-radius: 2px; border: 1.5px solid #666666; "><strong style="font-weight: normal; color: #000000; text-decoration: none; font-family: 'Arial', sans-serif; font-size:14px; line-height: 40px; ">  Report Suspicious  </strong></a>  ‌ </td>
      </tr></table>
    </td></tr></table>
  </td></tr></table>
<![endif]-->

<![if !((ie)|(mso))]>
  <div dir="ltr" lang="en" id="pfptBanner3sc8sjl" style="all: revert !important; display:block !important; text-align: left !important; margin:16px 0px 16px 0px !important; padding:8px 16px 8px 16px !important; border-radius: 4px !important; min-width: 200px !important; background-color: #CFD3D7 !important; background-color: #CFD3D7; border-top: 4px solid #8c8e91 !important; border-top: 4px solid #8c8e91;">
    <div id="pfptBanner3sc8sjl" style="all: unset !important; float:left !important; display:block !important; margin: 0px 0px 1px 0px !important; max-width: 600px !important;">
      <div id="pfptBanner3sc8sjl" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #CFD3D7 !important; color:#000000 !important; color:#000000; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-weight:bold !important; font-weight:bold; font-size:14px !important; line-height:18px !important; line-height:18px">
        This Message Is From an External Sender
      </div>
      <div id="pfptBanner3sc8sjl" style="all: unset !important; display:block !important; visibility: visible !important; background-color: #CFD3D7 !important; color:#000000 !important; color:#000000; font-weight:normal; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-size:12px !important; line-height:18px !important; line-height:18px; margin-top:2px !important;">
This message came from outside your organization.
      </div>

    </div>
    <div id="pfptBanner3sc8sjl" style="all: unset !important; float: right !important; display: block !important; display: block; margin: 0px 0px 0px 16px !important; text-align: right !important; width: fit-content !important;">
<a id="pfptBanner3sc8sjl" href="https://us-phishalarm-ewt.proofpoint.com/EWT/v1/KGKeukY!vwQd0oYtD6YgRRdwH04lVq3dwf4Co07c8t_a1HZ4SLN9JCCeSBgmlAFYxCvT0ZZ45LtAcnKgnCJu3FkxFCfzuvUKSa_0Wvn68LnfDi5NadwqRqeIo2xZs6EKUL_iIeF4cWpNfveSiQeXP1fOG6daDg$"
    style="all: unset !important; display: inline-block !important; text-decoration: none">
    <div class="pfptPrimaryButton3sc8sjl" style="display: inline-block !important; display: inline-block; visibility: visible !important; opacity: 1 !important; color: #000000 !important; color: #000000; font-family: 'Arial', sans-serif !important; font-family: 'Arial', sans-serif; font-size: 14px !important; font-weight: normal !important; text-decoration: none !important; border-radius: 2px !important; padding: 7.5px 16px !important; margin: 3px 0 3px 16px !important; white-space: nowrap !important; width: fit-content !important;
        border: 1px solid #666666">
        Report Suspicious
    </div>
</a>
    </div>
    <div style="clear: both !important; display: block !important; visibility: hidden !important; line-height: 0 !important; font-size: 0.01px !important; height: 0px"> </div>
  </div>
<![endif]>

<div style="display:none !important;display:none;visibility:hidden;mso-hide:all;font-size:1px;color:#ffffff;line-height:1px;max-height:0px;opacity:0;overflow:hidden;">ZjQcmQRYFpfptBannerEnd</div>
<!-- Email Banner : END -->

<!-- BaNnErBlUrFlE-BoDy-end -->
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head><!-- BaNnErBlUrFlE-HeAdEr-start -->
<style>
  #pfptBanner3sc8sjl { all: revert !important; display: block !important; 
    visibility: visible !important; opacity: 1 !important; 
    background-color: #CFD3D7 !important; 
    max-width: none !important; max-height: none !important }
  .pfptPrimaryButton3sc8sjl:hover, .pfptPrimaryButton3sc8sjl:focus {
    background-color: #adb0b4 !important; }
  .pfptPrimaryButton3sc8sjl:active {
    background-color: #8c8e91 !important; }
</style>

<!-- BaNnErBlUrFlE-HeAdEr-end -->

<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Aptos;}
@font-face
        {font-family:Consolas;
        panose-1:2 11 6 9 2 2 4 3 2 4;}
@font-face
        {font-family:"Lucida Calligraphy";
        panose-1:3 1 1 1 1 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Aptos",sans-serif;
        mso-ligatures:standardcontextual;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#467886;
        text-decoration:underline;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        font-size:11.0pt;
        font-family:"Aptos",sans-serif;
        mso-ligatures:standardcontextual;}
span.EmailStyle20
        {mso-style-type:personal-compose;
        font-family:"Aptos",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:11.0pt;
        mso-ligatures:none;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:593635398;
        mso-list-type:hybrid;
        mso-list-template-ids:-1971267944 67698703 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l0:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1
        {mso-list-id:1023941759;
        mso-list-type:hybrid;
        mso-list-template-ids:-181343110 67698703 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l1:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="#467886" vlink="#96607D" style="word-wrap:break-word">
<div class="WordSection1">
<p class="MsoNormal">Hello,<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">When trying to do “osu_bw H D” from a node without a GPU to a node with a GPU I got cuda device failures on the “H” node.
<o:p></o:p></p>
<p class="MsoNormal" style="text-indent:.5in">“no CUDA-capable device is detected”<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">This appears to be caused by the benchmarks calling init_accel()/cleanup_accel() even when the local rank will not be using an accelerator/gpu.<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">I hacked the code a bit to fix this in an older version (not sure how to do this on newer versions)<o:p></o:p></p>
<ol style="margin-top:0in" start="1" type="1">
<li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level1 lfo2">Move init_accel() after rank is valid - after call to “MPI_Comm_rank(MPI_COMM_WORLD, &myid)”<o:p></o:p></li><li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level1 lfo2">Update init_accel()/cleanup_accel() call to take a rank as argument .<o:p></o:p></li><ol style="margin-top:0in" start="1" type="a">
<li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level2 lfo2">int init_accel (int rank) {}<o:p></o:p></li></ol>
<li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level1 lfo2">Add checks to *_accel(int rank) funcs to check if rank is not using a GPU.<o:p></o:p></li><ol style="margin-top:0in" start="1" type="a">
<li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level2 lfo2">if (rank == 0 && 'H' == options.src) // skip accel init<o:p></o:p></li><li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level2 lfo2">if (rank == 1 && 'H' == options.dst) // skip accel init<o:p></o:p></li><li class="MsoListParagraph" style="margin-left:0in;mso-list:l0 level2 lfo2">Not sure how to handle collectives<o:p></o:p></li></ol>
</ol>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">Regards,<o:p></o:p></p>
<p class="MsoNormal">Adam<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal"><span style="font-family:"Lucida Calligraphy"">Adam Goldman<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;font-family:"Arial",sans-serif">High Performance Networking<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;font-family:"Arial",sans-serif">Intel Corporation<o:p></o:p></span></p>
<p class="MsoNormal"><a href="mailto:adam.goldman@intel.com"><span style="font-size:9.0pt">adam.goldman@intel.com</span></a><span style="font-size:9.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt"><o:p> </o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
</body>
</html>