<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:26:12 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9439] Introduce an lnet systemd service</title>
                <link>https://jira.whamcloud.com/browse/LU-9439</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This is effectively porting the initd version of the lnet service to systemd. This ticket should:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;Create an lnet systemd unit file&lt;/li&gt;
	&lt;li&gt;Correctly determine if systemd is on the target system and setup the rpm to install the unit file and enable the service&lt;/li&gt;
&lt;/ol&gt;
</description>
                <environment></environment>
        <key id="45827">LU-9439</key>
            <summary>Introduce an lnet systemd service</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="dmiter">Dmitry Eremin</assignee>
                                    <reporter username="dinatale2">Giuseppe Di Natale</reporter>
                        <labels>
                    </labels>
                <created>Tue, 2 May 2017 23:04:23 +0000</created>
                <updated>Wed, 7 Nov 2018 07:39:35 +0000</updated>
                            <resolved>Sat, 3 Jun 2017 04:36:55 +0000</resolved>
                                                    <fixVersion>Lustre 2.10.0</fixVersion>
                    <fixVersion>Lustre 2.11.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>15</watches>
                                                                            <comments>
                            <comment id="194214" author="gerrit" created="Tue, 2 May 2017 23:05:51 +0000"  >&lt;p&gt;Giuseppe Di Natale (dinatale2@llnl.gov) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/26925&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26925&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: lnet systemd service&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 064f80b504d6eb83c9ddca376bc3eb8aa0845e95&lt;/p&gt;</comment>
                            <comment id="194216" author="dinatale2" created="Tue, 2 May 2017 23:10:49 +0000"  >&lt;p&gt;I&apos;d also like to provide a sample lnet.conf yaml file for lnetctl as part of this ticket. Could someone point me to a suitable example file?&lt;/p&gt;</comment>
                            <comment id="194313" author="pjones" created="Wed, 3 May 2017 16:47:59 +0000"  >&lt;p&gt;Amir&lt;/p&gt;

&lt;p&gt;Do you have a suitable example to share?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="194330" author="ashehata" created="Wed, 3 May 2017 17:29:02 +0000"  >&lt;p&gt;I can provide an example, but the YAML file format has changed in the latest master. We still support the older format, but since this is ticket is on master, below is a sample of the latest YAML config file.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;net:
    - net type: o2ib1
      local NI(s):
        - nid: 172.16.1.4@o2ib1
          status: up
          interfaces:
              0: ib0
          statistics:
              send_count: 7
              recv_count: 7
              drop_count: 0
          tunables:
              peer_timeout: 180
              peer_credits: 128
              peer_buffer_credits: 0
              credits: 1024
          lnd tunables:
              peercredits_hiw: 64
              map_on_demand: 32
              concurrent_sends: 256
              fmr_pool_size: 2048
              fmr_flush_trigger: 512
              fmr_cache: 1
          tcp bonding: 0
          dev cpt: 0
          CPT: &lt;span class=&quot;code-quote&quot;&gt;&quot;[0,1]&quot;&lt;/span&gt;
        - nid: 172.16.2.4@o2ib1
          status: up
          interfaces:
              0: ib1
          statistics:
              send_count: 0
              recv_count: 0
              drop_count: 0
          tunables:
              peer_timeout: 180
              peer_credits: 128
              peer_buffer_credits: 0
              credits: 1024
          lnd tunables:
              peercredits_hiw: 64
              map_on_demand: 32
              concurrent_sends: 256
              fmr_pool_size: 2048
              fmr_flush_trigger: 512
              fmr_cache: 1
          tcp bonding: 0
          dev cpt: 1
          CPT: &lt;span class=&quot;code-quote&quot;&gt;&quot;[0,1]&quot;&lt;/span&gt;
route:
    - net: o2ib
      gateway: 172.16.1.1@o2ib1
      hop: -1
      priority: 0
      state: down
peer:
    - primary nid: 192.168.1.2@o2ib
      Multi-Rail: True
      peer ni:
        - nid: 192.168.1.2@o2ib
          state: NA
          max_ni_tx_credits: 0
          available_tx_credits: 0
          min_tx_credits: 0
          tx_q_num_of_buf: 0
          available_rtr_credits: 0
          min_rtr_credits: 0
          send_count: 0
          recv_count: 0
          drop_count: 0
          refcount: 2
        - nid: 192.168.2.2@o2ib
          state: NA
          max_ni_tx_credits: 0
          available_tx_credits: 0
          min_tx_credits: 0
          tx_q_num_of_buf: 0
          available_rtr_credits: 0
          min_rtr_credits: 0
          send_count: 0
          recv_count: 0
          drop_count: 0
          refcount: 2
    - primary nid: 172.16.1.1@o2ib1
      Multi-Rail: True
      peer ni:
        - nid: 172.16.1.1@o2ib1
          state: up
          max_ni_tx_credits: 128
          available_tx_credits: 128
          min_tx_credits: 127
          tx_q_num_of_buf: 0
          available_rtr_credits: 128
          min_rtr_credits: 128
          send_count: 7
          recv_count: 7
          drop_count: 0
          refcount: 4
        - nid: 172.16.2.1@o2ib1
          state: NA
          max_ni_tx_credits: 128
          available_tx_credits: 128
          min_tx_credits: 127
          tx_q_num_of_buf: 0
          available_rtr_credits: 128
          min_rtr_credits: 128
          send_count: 0
          recv_count: 0
          drop_count: 0
          refcount: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="194361" author="morrone" created="Wed, 3 May 2017 18:55:16 +0000"  >&lt;p&gt;Is that really an input file, or was that output?  For instance, &quot;status&quot; doesn&apos;t seem like something that would appear in input.&lt;/p&gt;</comment>
                            <comment id="194392" author="ashehata" created="Thu, 4 May 2017 01:35:51 +0000"  >&lt;p&gt;This is an output. But the way it&apos;s designed you can feed the output YAML config, into the input. The code will only look at relevant parameters. Here is a cleaned input file, removing the unnecessary parameters:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;net:
    - net type: o2ib1
      local NI(s):
        - nid: 172.16.1.4@o2ib1
          interfaces:
              0: ib0
          tunables:
              peer_timeout: 180
              peer_credits: 128
              peer_buffer_credits: 0
              credits: 1024
          lnd tunables:
              peercredits_hiw: 64
              map_on_demand: 32
              concurrent_sends: 256
              fmr_pool_size: 2048
              fmr_flush_trigger: 512
              fmr_cache: 1
          CPT: &lt;span class=&quot;code-quote&quot;&gt;&quot;[0,1]&quot;&lt;/span&gt;
        - nid: 172.16.2.4@o2ib1
          interfaces:
              0: ib1
          tunables:
              peer_timeout: 180
              peer_credits: 128
              peer_buffer_credits: 0
              credits: 1024
          lnd tunables:
              peercredits_hiw: 64
              map_on_demand: 32
              concurrent_sends: 256
              fmr_pool_size: 2048
              fmr_flush_trigger: 512
              fmr_cache: 1
          CPT: &lt;span class=&quot;code-quote&quot;&gt;&quot;[0,1]&quot;&lt;/span&gt;
route:
    - net: o2ib
      gateway: 172.16.1.1@o2ib1
      hop: -1
      priority: 0
peer:
    - primary nid: 192.168.1.2@o2ib
      Multi-Rail: True
      peer ni:
        - nid: 192.168.1.2@o2ib
        - nid: 192.168.2.2@o2ib
    - primary nid: 172.16.1.1@o2ib1
      Multi-Rail: True
      peer ni:
        - nid: 172.16.1.1@o2ib1
        - nid: 172.16.2.1@o2ib1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="194593" author="gerrit" created="Fri, 5 May 2017 00:26:58 +0000"  >&lt;p&gt;Giuseppe Di Natale (dinatale2@llnl.gov) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/26959&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26959&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 18e36250bd06605e20f3c42ae4802ee428c27f57&lt;/p&gt;</comment>
                            <comment id="194720" author="dinatale2" created="Fri, 5 May 2017 17:53:39 +0000"  >&lt;p&gt;Thank you for the sample lnet.conf file. I&apos;m going to be generating a patch to provide a sample lnet.conf and I&apos;m noticing that it&apos;s going to require changes to init.d/lnet. The lnet init.d script relies on the existence of lnet.conf to determine if lnetctl should be used. I really don&apos;t want to hold this ticket up for that change... Should I go ahead and break that change out into it&apos;s own ticket?&lt;/p&gt;</comment>
                            <comment id="194744" author="gerrit" created="Fri, 5 May 2017 22:26:58 +0000"  >&lt;p&gt;Giuseppe Di Natale (dinatale2@llnl.gov) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/26971&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26971&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Provide a sample lnet.conf file&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: bb5954645a672b261a4e73edd76a29e79ce3542d&lt;/p&gt;</comment>
                            <comment id="194988" author="adilger" created="Tue, 9 May 2017 00:34:26 +0000"  >&lt;p&gt;One option to handle this difference in the presence on lnet.conf would be to skip it if &lt;tt&gt;egrep -c -v &quot;&amp;#94;#|&amp;#94;$&quot; /etc/lnet.conf&lt;/tt&gt; returns zero lines of real input. Not perfect, but should handle the case of the example lnet.conf. &lt;/p&gt;</comment>
                            <comment id="195622" author="gerrit" created="Fri, 12 May 2017 05:06:47 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/26959/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26959/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 645153be3eb1fd8c634717507f73d85625d1b84a&lt;/p&gt;</comment>
                            <comment id="195913" author="bogl" created="Mon, 15 May 2017 23:15:59 +0000"  >&lt;p&gt;Since landing in master of &lt;a href=&quot;https://review.whamcloud.com/26959&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26959&lt;/a&gt; a few days ago lustre_rmmod called with no arguments no longer works as expected.  It&apos;s supposed to remove all lustre modules in that case.  it doesn&apos;t.  example:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lustre_rmmod
ERROR: Module ksocklnd is in use
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;It refuses to unload lnet modules due to ptlrpc still being loaded.  The following shows what modules stay loaded after lustre_rmmod is run, and shows that after an explicit remove of ptlrpc lustre_rmmod then operates as expected:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lsmod | more
Module                  Size  Used by
ksocklnd              187377  1 
ptlrpc               2278586  0 
obdclass             1785957  1 ptlrpc
lnet                  486107  3 ksocklnd,ptlrpc,obdclass
libcfs                393722  4 ksocklnd,ptlrpc,obdclass,lnet
sunrpc                261975  0 
crc32c                 12759  0 
ppdev                  17750  0 
parport_pc             45587  0 
  .  
  .
# rmmod ptlrpc
# lustre_rmmod
# lsmod | more
Module                  Size  Used by
sunrpc                261975  0 
crc32c                 12759  0 
ppdev                  17750  0 
parport_pc             45587  0 
  .
  .
  .
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="196035" author="dinatale2" created="Tue, 16 May 2017 16:25:31 +0000"  >&lt;p&gt;Bob, can you point me to some logs or provide more details? I can&apos;t reproduce the lustre_rmmod issue locally.&lt;/p&gt;</comment>
                            <comment id="196047" author="bogl" created="Tue, 16 May 2017 17:24:40 +0000"  >&lt;p&gt;reproduces 100% on sles11sp4 client.  another example:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;sles11sp4gm:/home/bogl/lustre-release # mount -t lustre -o flock,user_xattr centos2:/lustre /mnt/lustre
sles11sp4gm:/home/bogl/lustre-release # umount /mnt/lustre
sles11sp4gm:/home/bogl/lustre-release # lustre_rmmod
ERROR: Module ksocklnd is in use
sles11sp4gm:/home/bogl/lustre-release # rmmod ptlrpc
sles11sp4gm:/home/bogl/lustre-release # lustre_rmmod
sles11sp4gm:/home/bogl/lustre-release # 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="196129" author="gerrit" created="Wed, 17 May 2017 07:40:21 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/26925/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26925/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: lnet systemd service&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 32d1a1c5d610d054ad4609c1cf332172e8310805&lt;/p&gt;</comment>
                            <comment id="196200" author="bogl" created="Wed, 17 May 2017 17:06:33 +0000"  >&lt;p&gt;lustre_rmmod problem reproduces on el6 client too.  another example:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@centos69 x86_64]# mount -t lustre -o flock,user_xattr centos2:/lustre /mnt/lustre
[root@centos69 x86_64]# umount /mnt/lustre
[root@centos69 x86_64]# lustre_rmmod
ERROR: Module ksocklnd is in use
[root@centos69 x86_64]# rmmod ptlrpc
[root@centos69 x86_64]# lustre_rmmod
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Can&apos;t make it happen on el7 or sles12sp2.&lt;/p&gt;
</comment>
                            <comment id="196243" author="cliffw" created="Wed, 17 May 2017 21:00:16 +0000"  >&lt;p&gt;DDN-410 also appears to be related to this patch. &lt;br/&gt;
We also see the issue of soak/sprit clusters. &lt;br/&gt;
We reproduce the issue in the same way as Bob above.&lt;br/&gt;
I do not like this change, I don&apos;t see how you can unload all modules without multiple invocations of lustre_rmmod.  The original intent of lustre_rmmod was to have a script that scrubbed everything, always. Because it&#8217;s simple and stupid and works. Smart admins who only want to remove one module can use the lnetctl and rmmod commands without this script. &lt;/p&gt;</comment>
                            <comment id="196265" author="dinatale2" created="Wed, 17 May 2017 23:18:14 +0000"  >&lt;p&gt;I may have already asked this, but is there a reason why we have a custom module removal script? Why aren&apos;t we just using `modprobe -r`? The tool along with the removal option exists in el6 and sles11. In the case of the init scripts, we call `modprobe -r ptlrpc`, then lctl/lnetctl, then `modprobe -r` the top module in the stack.&lt;/p&gt;</comment>
                            <comment id="196269" author="morrone" created="Wed, 17 May 2017 23:41:31 +0000"  >&lt;p&gt;Here is what the man page for modprobe -r says:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; -r, --remove
 This option causes modprobe to remove rather than insert a module. If the modules it
 depends on are also unused, modprobe will try to remove them too. Unlike insertion,
 more than one module can be specified on the command line (it does not make sense to
 specify module parameters when removing modules).
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Note that it says &quot;If the modules &lt;em&gt;it depends on&lt;/em&gt; are also unused&quot;.  It does &lt;em&gt;not&lt;/em&gt; say &quot;If there are modules that depend upon it, but they are unused, it removes those first&quot;.  That is an important distinction.  If there are modules using ptlrpc, then ptlrpc will not walk &lt;em&gt;up&lt;/em&gt; (meaning in the in the direction of things that depend on ptlrpc) the dependency tree searching out a point where it finds a module that can be removed.&lt;/p&gt;

&lt;p&gt;lustre_rmmod walks the tree of modules that &lt;em&gt;depend upon&lt;/em&gt; the specified module, removing those first (if possible) so that will then become possible to remove the specified module.  lustre_rmmod can potentially use modprobe -r at the various removal steps, but modprobe -r as described in the man page does not do what lustre_rmmod does.&lt;/p&gt;

&lt;p&gt;In addition, lustre_rmmod was supposed to be smart enough to know that it may need to issue a command to stop networking before the lnet module can be removed.  It sounds to me (from comments from Bob and Cliff) that somewhere along the way lustre_rmmod was broken.&lt;/p&gt;</comment>
                            <comment id="196271" author="dinatale2" created="Wed, 17 May 2017 23:53:30 +0000"  >&lt;p&gt;I still can&apos;t reproduce this on an el6 based machine. Haven&apos;t tried sles11 yet.&lt;/p&gt;

&lt;p&gt;Also, I need more info. Are you doing these tests with ldiskfs? Are you bringing lnet up before any of this?&lt;/p&gt;

&lt;p&gt;Can you also change the unload_dep_modules_inclusive function in lustre_rmmod to be the following:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# Unload all modules dependent on $1 (include removal of $1)
unload_dep_modules_inclusive() {
&#160;&#160; &#160;local MODULE=$1

&#160;&#160; &#160;# if $MODULE not loaded, return 0
&#160;&#160; &#160;lsmod | egrep -q &quot;^\&amp;lt;$MODULE\&amp;gt;&quot; || return 0
&#160;&#160; &#160;unload_dep_modules_exclusive $MODULE || return 1
    echo &quot;Removing $MODULE&quot;
&#160;&#160; &#160;rmmod $MODULE || return 1
&#160;&#160; &#160;return 0
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;That will give me a good idea on what order the modules are being removed in so we can eliminate a potential ordering problem.&lt;/p&gt;</comment>
                            <comment id="196272" author="ashehata" created="Thu, 18 May 2017 00:08:00 +0000"  >&lt;p&gt;the issue here is that ptlrpc is not being removed. ptlrpc takes a reference on lnet. If it doesn&apos;t release that reference, lnet can not be unloaded.&lt;/p&gt;

&lt;p&gt;lsmod | grep lnet  &lt;br/&gt;
483919  3 ko2iblnd,obdclass,ptlrpc&lt;/p&gt;

&lt;p&gt;the current lustre_rmmod after the patch in this ticket grabs the list of modules which depend on lnet and tries to remove them first. However, it goes through them in the order listed above. So it tries to remove ko2iblnd first, but it can&apos;t, because networks are still loaded. When ptlrpc is removed first then it calls LNetNIFini() which decrements the reference counter. This brings the reference counter on LNet to 0. This triggers the cleanup code to cleanup the networks, routes, etc. allowing ko2iblnd to be unloaded and lustre_rmmod to succeed.&lt;/p&gt;

&lt;p&gt;The previous incarnation of lustre_rmmod took that into account and explicitly removed ptlrpc.&lt;/p&gt;

&lt;p&gt;Simply removing the network issuing &quot;lnetctl lnet unconfigure&quot; is not going to work either, because of the reference count taken by ptlrpc.&lt;/p&gt;

&lt;p&gt;In this case what you&apos;d need to do is:&lt;br/&gt;
1. Bring down all networks manually using &quot;lnetctl net del&quot;&lt;br/&gt;
-&amp;gt; This step essentially removes dependency between lnet module and ko2iblnd (or other lnds)&lt;br/&gt;
2. lustre_rmmod&lt;br/&gt;
-&amp;gt; This will succeed because there is nothing hindering ko2iblnd from being unloaded, and then ptlrpc will be unloaded as well, releasing the final reference hold on lnet, allowing lnet to be unloaded.&lt;/p&gt;

&lt;p&gt;I don&apos;t think this is a reasonable process to expect people to go through to unload lustre. That&apos;s why lustre_rmmod was created (I believe, although that predates me)&lt;/p&gt;

&lt;p&gt;What lustre_rmmod ought to do, is to know that ptlrpc needs to be unloaded to allow lnet and the lnds to be unloaded.&lt;/p&gt;</comment>
                            <comment id="196273" author="ashehata" created="Thu, 18 May 2017 00:15:19 +0000"  >&lt;p&gt;Giuseppe,&lt;br/&gt;
I think the function of interest is:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;  9 unload_dep_modules_exclusive() {
 10 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;local MODULE=$1
 11 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;local DEPS=&lt;span class=&quot;code-quote&quot;&gt;&quot;$(lsmod | awk &lt;span class=&quot;code-quote&quot;&gt;&apos;($1 == &quot;&lt;/span&gt;&apos;&lt;/span&gt;$MODULE&lt;span class=&quot;code-quote&quot;&gt;&apos;&lt;span class=&quot;code-quote&quot;&gt;&quot;) { print $4 }&apos;&lt;/span&gt;)&quot;&lt;/span&gt;
 12 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;&lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; SUBMOD in $(echo $DEPS | tr &lt;span class=&quot;code-quote&quot;&gt;&apos;,&apos;&lt;/span&gt; &lt;span class=&quot;code-quote&quot;&gt;&apos; &apos;&lt;/span&gt;); &lt;span class=&quot;code-keyword&quot;&gt;do&lt;/span&gt;
 13 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;&#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;unload_dep_modules_inclusive $SUBMOD || &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 1
 14 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;done
 15 &#187;&#183;&#183;&#183;&#183;&#183;&#183;&#183;&lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; 0
 16 }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This just grabs the output from lsmod, as I indicated above:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;lsmod | grep lnet 
483919 3 ko2iblnd,obdclass,ptlrpc
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Is that different in el6 or sles11?&lt;/p&gt;

&lt;p&gt;more detail on the order of removal&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;removing module:  libcfs
fid,fld,lmv,mdc,lov,lnet,ko2iblnd,lustre,obdclass,ptlrpc

removing module:  lnet
ko2iblnd,obdclass,ptlrpc
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="196286" author="bogl" created="Thu, 18 May 2017 03:01:24 +0000"  >&lt;blockquote&gt;
&lt;p&gt;I still can&apos;t reproduce this on an el6 based machine. Haven&apos;t tried sles11 yet.&lt;br/&gt;
Also, I need more info. Are you doing these tests with ldiskfs? Are you bringing lnet up before any of this?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I said it reproduces on a &lt;b&gt;client&lt;/b&gt;.   There is no ldiskfs modules loaded, those are only on servers.&lt;/p&gt;

&lt;p&gt;I am not bringing up or installing any modules before the &apos;mount&apos; command shown in the examples.  All the client lustre modules involved are loading only by the mount.  No modules are preloaded.  There is no manual load or startup of LNET.  No script based startup either, in init.d scripts for example.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;It sounds to me (from comments from Bob and Cliff) that somewhere along the way lustre_rmmod was broken.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;As I already said I&apos;m pretty sure lustre_rmmod was broken by the recent landing of &lt;a href=&quot;https://review.whamcloud.com/26959&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26959&lt;/a&gt;, &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&quot;.  Before that change it worked correctly.&lt;/p&gt;</comment>
                            <comment id="196301" author="gerrit" created="Thu, 18 May 2017 05:59:14 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/27181&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27181&lt;/a&gt;&lt;br/&gt;
Subject: Revert &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&quot;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 5e47c0de060f71f809dab69adafa1c814b4ad253&lt;/p&gt;</comment>
                            <comment id="196302" author="gerrit" created="Thu, 18 May 2017 05:59:39 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/27181/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27181/&lt;/a&gt;&lt;br/&gt;
Subject: Revert &quot;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&quot;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 0bc19643b94f0adf28db365a07bcefeff4ebc51d&lt;/p&gt;</comment>
                            <comment id="196306" author="adilger" created="Thu, 18 May 2017 06:35:34 +0000"  >&lt;p&gt;I suspect that all that was needed here was to include &lt;tt&gt;ptlrpc&lt;/tt&gt; into the list of modules being unloaded if no argument was given. That would ensure it is unloaded before LNet stop, and the rest of the unload could continue. &lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; [[ -z &lt;span class=&quot;code-quote&quot;&gt;&quot;$modules&quot;&lt;/span&gt; || &lt;span class=&quot;code-quote&quot;&gt;&quot;$modules&quot;&lt;/span&gt; == &lt;span class=&quot;code-quote&quot;&gt;&quot;ldiskfs&quot;&lt;/span&gt; ]; then
                 modules=&lt;span class=&quot;code-quote&quot;&gt;&quot;ptlrpc ldiskfs libcfs&quot;&lt;/span&gt;
        fi
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Bob, Cliff,&lt;br/&gt;
Could you please give this a try on the in-reverted script to see if it solves your problem?  If yes, please fix up and resubmit the patch for landing. &lt;/p&gt;</comment>
                            <comment id="196351" author="dinatale2" created="Thu, 18 May 2017 14:57:51 +0000"  >&lt;p&gt;Ok, after seeing the further details above, I agree with Andreas. It looks like a simple ordering problem that appears to be causing the problem. I agree with adding ptlrpc to the front of the modules list.&lt;/p&gt;

&lt;p&gt;Looking at the output of lsmod from my el6 machine, it appears ptlrpc always appears first in dependency listings.&lt;/p&gt;

&lt;p&gt;Please let me know if that fixes the issue and I&apos;ll fix up the patch.&lt;/p&gt;</comment>
                            <comment id="196352" author="bogl" created="Thu, 18 May 2017 15:01:02 +0000"  >&lt;p&gt;I tried out Andreas&apos; suggestion on el6 and sles11.  It does fix the problem there.  Haven&apos;t exhaustively tested elsewhere to make sure it doesn&apos;t break anything else.&lt;/p&gt;</comment>
                            <comment id="196380" author="dinatale2" created="Thu, 18 May 2017 18:40:40 +0000"  >&lt;p&gt;Quick question, should I be submitting fixes for the reverted patches as new patches to gerrit?&lt;/p&gt;</comment>
                            <comment id="196410" author="pjones" created="Thu, 18 May 2017 23:16:46 +0000"  >&lt;p&gt;Yes I think so&lt;/p&gt;</comment>
                            <comment id="196489" author="ashehata" created="Fri, 19 May 2017 17:25:18 +0000"  >&lt;p&gt;Please note the same issue exists with lnet_selftest module. lnet_selftest depends on lnet, but the same ordering issue impacts its removal.&lt;/p&gt;

&lt;p&gt;It&apos;s not as critical as ptlrpc, but would be nice to get lustre_rmmod to handle it as well.&lt;/p&gt;</comment>
                            <comment id="196494" author="dinatale2" created="Fri, 19 May 2017 17:55:49 +0000"  >&lt;p&gt;I&apos;ll add &quot;lnet_selftest&quot; to the list of modules.&lt;/p&gt;</comment>
                            <comment id="196504" author="gerrit" created="Fri, 19 May 2017 18:23:24 +0000"  >&lt;p&gt;Giuseppe Di Natale (dinatale2@llnl.gov) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/27213&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27213&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d71489e1d8f6da11e52f965e8cd7b6f87a17252d&lt;/p&gt;</comment>
                            <comment id="196505" author="gerrit" created="Fri, 19 May 2017 18:23:24 +0000"  >&lt;p&gt;Giuseppe Di Natale (dinatale2@llnl.gov) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/27214&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27214&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: lnet systemd service&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1656727e8ab2fa2b5d29d7f356f2c45131db6bae&lt;/p&gt;</comment>
                            <comment id="198018" author="gerrit" created="Sat, 3 Jun 2017 03:58:32 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/27213/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27213/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Change behavior of lustre_rmmod&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: c6e5f4069edaecb8461df2d03566bd5e333b8a5c&lt;/p&gt;</comment>
                            <comment id="198019" author="gerrit" created="Sat, 3 Jun 2017 03:58:47 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/26971/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/26971/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: Provide a sample lnet.conf file&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 25ee73e7733214f7a46f81b2540b7fca67b0eef1&lt;/p&gt;</comment>
                            <comment id="198047" author="pjones" created="Sat, 3 Jun 2017 04:36:55 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                            <comment id="198775" author="nathan.crawford@uci.edu" created="Fri, 9 Jun 2017 18:50:17 +0000"  >&lt;p&gt;I believe the redirect on line 16 of the systemd lnet.service.in file causes failure on startup. Switching &quot;lnetctl import &amp;lt; /etc/lnet.conf&quot; to &quot;lnetctl import /etc/lnet.conf&quot; on the installed lnet.service file seems to work fine.&lt;/p&gt;

&lt;p&gt;If redirection is necessary for systemd service files, I&apos;ve seen people do things like:&lt;br/&gt;
ExecStart=/bin/sh -c &apos;/usr/sbin/lnetctl import &amp;lt; /etc/lnet.conf&apos;&lt;/p&gt;

&lt;p&gt;-Nate&lt;/p&gt;</comment>
                            <comment id="198778" author="ashehata" created="Fri, 9 Jun 2017 19:34:31 +0000"  >&lt;p&gt;lnetctl handles both redirection and just providing it a file name directly. So that change proposed should work.&lt;/p&gt;</comment>
                            <comment id="198916" author="dinatale2" created="Mon, 12 Jun 2017 15:10:50 +0000"  >&lt;p&gt;I&apos;ll go ahead and submit a patch to correct that today.&lt;/p&gt;</comment>
                            <comment id="198927" author="dinatale2" created="Mon, 12 Jun 2017 15:53:41 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9655&quot; title=&quot;Remove file redirection in lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9655&quot;&gt;&lt;del&gt;LU-9655&lt;/del&gt;&lt;/a&gt; for the file redirection fix.&lt;/p&gt;</comment>
                            <comment id="202743" author="gerrit" created="Wed, 19 Jul 2017 14:01:25 +0000"  >&lt;p&gt;Dmitry Eremin (dmitry.eremin@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/28106&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28106&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: add lnet script in .gitignore&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 67305d237d75b608f49a7e264b6cef971e8c7494&lt;/p&gt;</comment>
                            <comment id="203869" author="gerrit" created="Sat, 29 Jul 2017 00:03:22 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/28106/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/28106/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9439&quot; title=&quot;Introduce an lnet systemd service&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9439&quot;&gt;&lt;del&gt;LU-9439&lt;/del&gt;&lt;/a&gt; scripts: add lnet script in .gitignore&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 6ca43d539b2856d68d330023f04643f9e09a8cfa&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="34457">LU-7736</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="46655">LU-9655</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="32588">LU-8384</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="28242">LU-6132</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzbqf:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>