<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:32:07 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17040] LFSCK hang with resending request on EINPROGRESS </title>
                <link>https://jira.whamcloud.com/browse/LU-17040</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;LFSCK hanged, no progress after 10+ hours. There was an attempt to stop it&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;UID          PID    PPID  C STIME TTY          TIME CMD
root     1241254 1241253  0 00:29 ?        00:00:00 lctl lfsck_stop --device snx11922-MDT0001 --all
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The stop did not help and node was crashed after stop+10h. Here is ps -m and stacks.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; ps -m | grep &quot;lfsck\|scrub\|lctl&quot;
[0 00:00:01.186] [RU]  PID: 164187   TASK: ffff90055862af80  CPU: 6    COMMAND: &quot;OI_scrub&quot;
[0 10:36:24.817] [ID]  PID: 1241254  TASK: ffff9002ab60c740  CPU: 19   COMMAND: &quot;lctl&quot;
[0 23:45:12.588] [ID]  PID: 164188   TASK: ffff90055862c740  CPU: 8    COMMAND: &quot;lfsck_layout&quot;
[0 23:45:12.623] [UN]  PID: 164186   TASK: ffff90055862df00  CPU: 9    COMMAND: &quot;lfsck&quot;
[0 23:45:16.187] [ID]  PID: 164189   TASK: ffff900558628000  CPU: 17   COMMAND: &quot;lfsck_namespace&quot;

PID: 164186   TASK: ffff90055862df00  CPU: 9    COMMAND: &quot;lfsck&quot;
 #0 [ffffa414250f7d08] __schedule at ffffffffa374e1d4
 #1 [ffffa414250f7d68] schedule at ffffffffa374e648
 #2 [ffffa414250f7d78] osd_otable_it_next at ffffffffc1db3983 [osd_ldiskfs]
 #3 [ffffa414250f7e08] lfsck_master_oit_engine at ffffffffc187f17e [lfsck]
 #4 [ffffa414250f7e78] lfsck_master_engine at ffffffffc188073e [lfsck]
 #5 [ffffa414250f7f10] kthread at ffffffffa2f043a6
 #6 [ffffa414250f7f50] ret_from_fork at ffffffffa380023f

PID: 164187   TASK: ffff90055862af80  CPU: 6    COMMAND: &quot;OI_scrub&quot;
 #0 [fffffe000013be48] crash_nmi_callback at ffffffffa2e54863
 #1 [fffffe000013be50] nmi_handle at ffffffffa2e24c83
 #2 [fffffe000013bea8] default_do_nmi at ffffffffa3741f89
 #3 [fffffe000013bec8] do_nmi at ffffffffa2e2518e
 #4 [fffffe000013bef0] end_repeat_nmi at ffffffffa38015c4
    [exception RIP: kthread_should_stop+33]
    RIP: ffffffffa2f04fc1  RSP: ffffa414250ffce0  RFLAGS: 00000246
    RAX: 0000000000000000  RBX: ffff900576b28000  RCX: ffffa414250ffdc8
    RDX: ffff900576b2d880  RSI: ffff900576b28000  RDI: ffff900513717000
    RBP: ffffa414250ffdc8   R8: 0000000000000001   R9: 0000000000000000
    R10: ffffa414250ffc68  R11: 0000000000000000  R12: 0000000000000001
    R13: ffff900576b2d880  R14: ffff900513717000  R15: ffff900576b2d820
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- &amp;lt;NMI exception stack&amp;gt; ---
 #5 [ffffa414250ffce0] kthread_should_stop at ffffffffa2f04fc1
 #6 [ffffa414250ffce0] osd_scrub_next at ffffffffc1dadeb4 [osd_ldiskfs]
 #7 [ffffa414250ffd78] osd_inode_iteration at ffffffffc1db27fb [osd_ldiskfs]
 #8 [ffffa414250ffe40] osd_scrub_main at ffffffffc1db3319 [osd_ldiskfs]
 #9 [ffffa414250fff10] kthread at ffffffffa2f043a6
#10 [ffffa414250fff50] ret_from_fork at ffffffffa380023f

PID: 164188   TASK: ffff90055862c740  CPU: 8    COMMAND: &quot;lfsck_layout&quot;
 #0 [ffffa41425127da8] __schedule at ffffffffa374e1d4
 #1 [ffffa41425127e08] schedule at ffffffffa374e648
 #2 [ffffa41425127e18] lfsck_assistant_engine at ffffffffc18816e2 [lfsck]
 #3 [ffffa41425127f10] kthread at ffffffffa2f043a6
 #4 [ffffa41425127f50] ret_from_fork at ffffffffa380023f

PID: 164189   TASK: ffff900558628000  CPU: 17   COMMAND: &quot;lfsck_namespace&quot;
 #0 [ffffa41425137da8] __schedule at ffffffffa374e1d4
 #1 [ffffa41425137e08] schedule at ffffffffa374e648
 #2 [ffffa41425137e18] lfsck_assistant_engine at ffffffffc18816e2 [lfsck]
 #3 [ffffa41425137f10] kthread at ffffffffa2f043a6
 #4 [ffffa41425137f50] ret_from_fork at ffffffffa380023f


PID: 1241254  TASK: ffff9002ab60c740  CPU: 19   COMMAND: &quot;lctl&quot;
 #0 [ffffa4142502fc28] __schedule at ffffffffa374e1d4
 #1 [ffffa4142502fc88] schedule at ffffffffa374e648
 #2 [ffffa4142502fc98] lfsck_stop at ffffffffc1876fb5 [lfsck]
 #3 [ffffa4142502fd08] mdd_iocontrol at ffffffffc1a2e265 [mdd]
 #4 [ffffa4142502fd58] mdt_iocontrol at ffffffffc1bc5c2f [mdt]
 #5 [ffffa4142502fdf0] class_handle_ioctl at ffffffffc12a86ff [obdclass]
 #6 [ffffa4142502fe68] obd_class_ioctl at ffffffffc12a9047 [obdclass]
 #7 [ffffa4142502fe80] do_vfs_ioctl at ffffffffa312e0c4
 #8 [ffffa4142502fef8] ksys_ioctl at ffffffffa312e700
 #9 [ffffa4142502ff30] __x64_sys_ioctl at ffffffffa312e746
#10 [ffffa4142502ff38] do_syscall_64 at ffffffffa2e0420b
 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I have found that scrub inconsistent list has entry with zero inode. And looks like scrub with LFSCK looped it. When LFSCK get EINPROGRESS it resend a request. After scrub process oii, it deletes oii from a list, and then LFSCK tries osp_attr_get again and adds it to scrub. So inconsistent list is not empty, scrub cannot finish inode iteration loop.&lt;br/&gt;
There is also lfsck stop bug, I guess LFSCK stop code doesn&apos;t have logic to stop scrub, but it requires additional ticket.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; osd_inconsistent_item ffff9001fb355340 -x
struct osd_inconsistent_item {
  oii_list = {
    next = 0xffff9001fb355900, 
    prev = 0xffff900576b2d488
  }, 
  oii_cache = {
    oic_fid = {
      f_seq = 0x240019e29, 
      f_oid = 0x1bac, 
      f_ver = 0x0
    }, 
    oic_lid = {
      oii_ino = 0x0, 
      oii_gen = 0x0
    }, 
    oic_dev = 0xffff900576b28000, 
    oic_remote = 0x0
  }, 
  oii_insert = 0x1
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>2MDTs cluster FOFB/IO tests</environment>
        <key id="77510">LU-17040</key>
            <summary>LFSCK hang with resending request on EINPROGRESS </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="aboyko">Alexander Boyko</assignee>
                                    <reporter username="aboyko">Alexander Boyko</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Fri, 18 Aug 2023 13:57:38 +0000</created>
                <updated>Mon, 13 Nov 2023 02:44:16 +0000</updated>
                            <resolved>Wed, 25 Oct 2023 19:48:00 +0000</resolved>
                                    <version>Upstream</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="382947" author="gerrit" created="Fri, 18 Aug 2023 14:02:25 +0000"  >&lt;p&gt;&quot;Alexander Boyko &amp;lt;alexander.boyko@hpe.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51997&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51997&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17040&quot; title=&quot;LFSCK hang with resending request on EINPROGRESS &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17040&quot;&gt;&lt;del&gt;LU-17040&lt;/del&gt;&lt;/a&gt; scrub: inconsistent item&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6b91a8f591f24aed3ef7cc0c8b730bc578dab3e0&lt;/p&gt;</comment>
                            <comment id="390570" author="gerrit" created="Wed, 25 Oct 2023 18:03:32 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51997/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51997/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17040&quot; title=&quot;LFSCK hang with resending request on EINPROGRESS &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17040&quot;&gt;&lt;del&gt;LU-17040&lt;/del&gt;&lt;/a&gt; scrub: inconsistent item&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 461e3867ea11240c77ccd1bb71a3758506cf882e&lt;/p&gt;</comment>
                            <comment id="390607" author="pjones" created="Wed, 25 Oct 2023 19:48:00 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="77882">LU-17114</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03tc7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>