<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:11:01 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14584] LNet: 2 CPTs on a single NUMA node instead of one</title>
                <link>https://jira.whamcloud.com/browse/LU-14584</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;By default, Lustre 2.14 LNet routers detect 2 CPTs instead of 1 CPT on a single NUMA node server. If not discovered, this could lead to very unbalance routers:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh02-oak01 ~]# cat /sys/kernel/debug/lnet/nis
nid                      status alive refs peer  rtr   max    tx   min
0@lo                         up     0    2    0    0     0     0     0
0@lo                         up     0    0    0    0     0     0     0
10.50.0.131@o2ib2            up     0 122544    8    0   128   127    40
10.50.0.131@o2ib2            up     0    1    8    0   128   127    57
10.0.2.214@o2ib5             up     0    2    8    0   128   128    75
10.0.2.214@o2ib5             up     0    2    8    0   128   128    70
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The expected behavior is that LNet would instantiate only a single CPT if there is a single NUMA node available. More about such single NUMA node LNet router available below.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh02-oak01 ~]# lscpu 
Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                8
On-line CPU(s) list:   0-7
Thread(s) per core:    2
Core(s) per socket:    4
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 79
Model name:            Intel(R) Xeon(R) CPU E5-2637 v4 @ 3.50GHz
Stepping:              1
CPU MHz:               1387.481
CPU max MHz:           3700.0000
CPU min MHz:           1200.0000
BogoMIPS:              6999.30
Virtualization:        VT-x
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              15360K
NUMA node0 CPU(s):     0-7
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb cat_l3 cdp_l3 intel_pt ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdt_a rdseed adx smap xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts md_clear spec_ctrl intel_stibp flush_l1d


[root@sh02-oak01 ~]# ls -ald  /sys/devices/system/node/node*
drwxr-xr-x 4 root root 0 Apr  5 14:10 /sys/devices/system/node/node0


[root@sh02-oak01 ~]# numactl --hardware
available: 1 nodes (0)
node 0 cpus: 0 1 2 3 4 5 6 7
node 0 size: 65314 MB
node 0 free: 42722 MB
node distances:
node   0 
  0:  10 


[root@sh02-oak01 ~]# lctl get_param cpu_partition_table
cpu_partition_table=0	: 0 1 4 5
1	: 2 3 6 7
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This is happening with a default libcfs configuration (no libcfs module tuning):&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh02-oak01 ~]# cat /sys/module/libcfs/parameters/cpu_npartitions 
0
[root@sh02-oak01 ~]# cat /sys/module/libcfs/parameters/cpu_pattern 
N
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>CentOS 7</environment>
        <key id="63654">LU-14584</key>
            <summary>LNet: 2 CPTs on a single NUMA node instead of one</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="ashehata">Amir Shehata</assignee>
                                    <reporter username="sthiell">Stephane Thiell</reporter>
                        <labels>
                    </labels>
                <created>Mon, 5 Apr 2021 21:31:07 +0000</created>
                <updated>Wed, 7 Apr 2021 18:59:03 +0000</updated>
                                            <version>Lustre 2.13.0</version>
                    <version>Lustre 2.14.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="298111" author="ashehata" created="Wed, 7 Apr 2021 16:51:35 +0000"  >&lt;p&gt;What&apos;s the output of &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
lnetctl net show -v 4?
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;on the routers.&lt;/p&gt;

&lt;p&gt;Also can you share how you configure your routers? do you configure them as services? How do you load the configuration? From modprobe.d or from /etc/lnet.conf?&lt;/p&gt;

&lt;p&gt;Technically, LNet doesn&apos;t create the NUMA binding it queries it from libcfs.&lt;/p&gt;</comment>
                            <comment id="298125" author="sthiell" created="Wed, 7 Apr 2021 17:31:11 +0000"  >&lt;p&gt;Hi Amir!&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;attaching the output of &lt;tt&gt;lnetctl net show -v 4&lt;/tt&gt; on a router with 1 NUMA node and the default libcfs config, which shows CPT=2 see  &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/38243/38243_sh01-oak01-numa1cpt2.txt&quot; title=&quot;sh01-oak01-numa1cpt2.txt attached to LU-14584&quot;&gt;sh01-oak01-numa1cpt2.txt&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/li&gt;
	&lt;li&gt;attaching the output of &lt;tt&gt;lnetctl net show -v 4&lt;/tt&gt; on a router with 1 NUMA node and the forced CPT=1 config, so CPT=1 see  &lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/38244/38244_sh01-oak01-numa1cpt1_forced.txt&quot; title=&quot;sh01-oak01-numa1cpt1_forced.txt attached to LU-14584&quot;&gt;sh01-oak01-numa1cpt1_forced.txt&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;We use &lt;tt&gt;lnet.conf&lt;/tt&gt; and the &lt;tt&gt;lnet.service&lt;/tt&gt;:&lt;/p&gt;

&lt;p&gt;/etc/lnet.conf:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;global:
    - health_sensitivity: 0
net:
    - net type: o2ib1
      local NI(s):
        - nid:
          interfaces:
            0: ib0
    - net type: o2ib5
      local NI(s):
        - nid:
          interfaces:
            0: ib1
routing:
    - enable: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh01-oak01 ~]# systemctl status lnet.service
&#9679; lnet.service - lnet management
   Loaded: loaded (/usr/lib/systemd/system/lnet.service; enabled; vendor preset: disabled)
  Drop-In: /etc/systemd/system/lnet.service.d
           &#9492;&#9472;deps.conf, ibdev.conf
   Active: active (exited) since Fri 2021-04-02 13:58:26 PDT; 4 days ago
  Process: 79596 ExecStart=/usr/sbin/lnetctl import /etc/lnet.conf (code=exited, status=0/SUCCESS)
  Process: 79592 ExecStart=/usr/sbin/lnetctl lnet configure (code=exited, status=0/SUCCESS)
  Process: 79585 ExecStart=/sbin/modprobe lnet (code=exited, status=0/SUCCESS)
  Process: 79443 ExecStartPre=/bin/sh -c sleep 5 (code=exited, status=0/SUCCESS)
  Process: 78830 ExecStartPre=/usr/bin/systemctl restart openibd (code=exited, status=0/SUCCESS)
 Main PID: 79596 (code=exited, status=0/SUCCESS)
   CGroup: /system.slice/lnet.service

Apr 02 13:57:58 sh01-oak01.int systemd[1]: Starting lnet management...
Apr 02 13:58:26 sh01-oak01.int systemd[1]: Started lnet management.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;lnet service overrides:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh01-oak01 ~]# cat /etc/systemd/system/lnet.service.d/deps.conf 
[Unit]
After=dkms.service
[root@sh01-oak01 ~]# cat /etc/systemd/system/lnet.service.d/ibdev.conf 
[Service]
ExecStartPre=/usr/bin/systemctl restart openibd
ExecStartPre=/bin/sh -c &apos;sleep 5&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We have the default &lt;tt&gt;/etc/modprobe.d/ko2iblnd.conf&lt;/tt&gt; from lustre-client RPM, untouched.&lt;/p&gt;

&lt;p&gt;Finally, to force the use of 1 CPT, we use the following configuration in &lt;tt&gt;/etc/modprobe.d/lnet.conf&lt;/tt&gt;:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;options libcfs cpu_pattern=&quot;0[0-7]&quot;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="298127" author="sthiell" created="Wed, 7 Apr 2021 17:34:52 +0000"  >&lt;p&gt;BTW, the high numbers of refs that I mentioned in this ticket is probably not related to this CPT issue at all, for that I opened &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14589&quot; title=&quot;LNet routers don&amp;#39;t reset peers after they reboot&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14589&quot;&gt;LU-14589&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="298133" author="ashehata" created="Wed, 7 Apr 2021 17:53:43 +0000"  >&lt;p&gt;When you have cpu_pattern=N (or undefined) can you show me the output of&lt;/p&gt;

&lt;p&gt;cat /sys/kernel/debug/lnet/cpu_partition_distance&lt;br/&gt;
cat /sys/kernel/debug/lnet/cpu_partition_table&lt;/p&gt;</comment>
                            <comment id="298139" author="sthiell" created="Wed, 7 Apr 2021 18:24:51 +0000"  >&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh01-oak02 ~]# cat /sys/module/libcfs/parameters/cpu_pattern 
N
[root@sh01-oak02 ~]# cat /sys/kernel/debug/lnet/cpu_partition_distance
0	: 0:10 1:10
1	: 0:10 1:10
[root@sh01-oak02 ~]# cat /sys/kernel/debug/lnet/cpu_partition_table
0	: 0 1 4 5
1	: 2 3 6 7
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And just to make sure, this one has a single NUMA node:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@sh01-oak02 ~]# ls -ald  /sys/devices/system/node/node*
drwxr-xr-x 4 root root 0 Apr  5 15:54 /sys/devices/system/node/node0
[root@sh01-oak02 ~]# 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="298141" author="ashehata" created="Wed, 7 Apr 2021 18:47:42 +0000"  >&lt;p&gt;Yes. So that&apos;s the issue (guess you added that at the top, but I missed it). For some reason libcfs is creating two CPTs even though there is only 1 NUMA node. I couldn&apos;t reproduce this issue on my VM. We will need to look at the code around this area and see if there has been some recent changes.&lt;/p&gt;</comment>
                            <comment id="298142" author="sthiell" created="Wed, 7 Apr 2021 18:58:00 +0000"  >&lt;p&gt;OK. We looked at some old Splunk logs, as LNet prints a message when loading, and it looks like it&apos;s not a new issue! Even with previous versions of Lustre (prior to 2.13), these routers were initializing 2 CPTs. From 2019:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;kernel: LNet: HW NUMA nodes: 1, HW CPU cores: 8, npartitions: 2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;From the &lt;a href=&quot;https://build.whamcloud.com/job/lustre-manual/lastSuccessfulBuild/artifact/lustre_manual.xhtml#dbdoclet.libcfstuning&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;Lustre manual&lt;/a&gt;:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Introduced in Lustre 2.9&lt;br/&gt;
In Lustre 2.9 and later the default is to use one CPT per NUMA node. In earlier versions of Lustre, by default there was a single CPT if the online CPU core count was four or fewer, and additional CPTs would be created depending on the number of CPU cores, typically with 4-8 cores per CPT.&lt;/p&gt;&lt;/blockquote&gt;


&lt;p&gt;Maybe it is a remnant of pre-Lustre 2.9, eg. if HW NUMA = 1 and core count &amp;gt; 4, then 1 CPT is used for every four cores?&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="38244" name="sh01-oak01-numa1cpt1_forced.txt" size="4041" author="sthiell" created="Wed, 7 Apr 2021 17:26:23 +0000"/>
                            <attachment id="38243" name="sh01-oak01-numa1cpt2.txt" size="4027" author="sthiell" created="Wed, 7 Apr 2021 17:26:08 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01rcv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>