<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:31:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16966] ofd_object_fallocate dead lock?</title>
                <link>https://jira.whamcloud.com/browse/LU-16966</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We have had multiple servers get dead lock with this stack trace.&lt;/p&gt;

&lt;p&gt;(attached longer console output)&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Jul 15 05:46:28 nbp11-srv3 kernel: INFO: task ll_ost07_000:9230 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Jul 15 05:46:28 nbp11-srv3 kernel:      Tainted: G           OE    --------- -  - 4.18.0-425.3.1.el8_lustre.x86_64 #1
Jul 15 05:46:28 nbp11-srv3 kernel: &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Jul 15 05:46:28 nbp11-srv3 kernel: task:ll_ost07_000    state:D stack:    0 pid: 9230 ppid:     2 flags:0x80004080
Jul 15 05:46:28 nbp11-srv3 kernel: Call Trace:
Jul 15 05:46:28 nbp11-srv3 kernel: __schedule+0x2d1/0x860
Jul 15 05:46:28 nbp11-srv3 kernel: schedule+0x35/0xa0
Jul 15 05:46:28 nbp11-srv3 kernel: wait_transaction_locked+0x89/0xd0 [jbd2]
Jul 15 05:46:28 nbp11-srv3 kernel: ? finish_wait+0x80/0x80
Jul 15 05:46:28 nbp11-srv3 kernel: add_transaction_credits+0xd4/0x290 [jbd2]
Jul 15 05:46:28 nbp11-srv3 kernel: ? ldiskfs_do_update_inode+0x604/0x800 [ldiskfs]
Jul 15 05:46:28 nbp11-srv3 kernel: start_this_handle+0x10a/0x520 [jbd2]
Jul 15 05:46:28 nbp11-srv3 kernel: ? osd_fallocate_preallocate.isra.38+0x275/0x760 [osd_ldiskfs]
Jul 15 05:46:28 nbp11-srv3 kernel: ? ldiskfs_mark_iloc_dirty+0x32/0x90 [ldiskfs]
Jul 15 05:46:28 nbp11-srv3 kernel: jbd2__journal_restart+0xb4/0x160 [jbd2]
Jul 15 05:46:28 nbp11-srv3 kernel: osd_fallocate_preallocate.isra.38+0x5a6/0x760 [osd_ldiskfs]
Jul 15 05:46:28 nbp11-srv3 kernel: osd_fallocate+0xfd/0x370 [osd_ldiskfs]
Jul 15 05:46:28 nbp11-srv3 kernel: ofd_object_fallocate+0x5dd/0xa30 [ofd]
Jul 15 05:46:28 nbp11-srv3 kernel: ofd_fallocate_hdl+0x467/0x730 [ofd]
Jul 15 05:46:28 nbp11-srv3 kernel: tgt_request_handle+0xc97/0x1a40 [ptlrpc]
Jul 15 05:46:28 nbp11-srv3 kernel: ? ptlrpc_nrs_req_get_nolock0+0xff/0x1f0 [ptlrpc]
Jul 15 05:46:28 nbp11-srv3 kernel: ptlrpc_server_handle_request+0x323/0xbe0 [ptlrpc]
Jul 15 05:46:28 nbp11-srv3 kernel: ptlrpc_main+0xc0f/0x1570 [ptlrpc]
Jul 15 05:46:28 nbp11-srv3 kernel: ? ptlrpc_wait_event+0x590/0x590 [ptlrpc]
Jul 15 05:46:28 nbp11-srv3 kernel: kthread+0x10a/0x120
Jul 15 05:46:28 nbp11-srv3 kernel: ? set_kthread_struct+0x50/0x50
Jul 15 05:46:28 nbp11-srv3 kernel: ret_from_fork+0x1f/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Lustre version: &lt;br/&gt;
lustre-iokit-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
kmod-lustre-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
pcp-lustre-0.4.16-2.noarch&lt;br/&gt;
lustre-devel-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
lustre-osd-ldiskfs-mount-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
lustre-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
lustre-tests-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
kmod-lustre-osd-ldiskfs-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
kmod-lustre-tests-2.15.2-1nas_mofed496el8_lustre_20230111v1.x86_64&lt;br/&gt;
&lt;br/&gt;
kernel:  4.18.0-425.3.1.el8_lustre.x86_64&lt;br/&gt;
mofed: mlnx-ofa_kernel-4.9-mofed496.x86_64</environment>
        <key id="77015">LU-16966</key>
            <summary>ofd_object_fallocate dead lock?</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="mhanafi">Mahmoud Hanafi</reporter>
                        <labels>
                    </labels>
                <created>Sat, 15 Jul 2023 16:37:29 +0000</created>
                <updated>Fri, 17 Nov 2023 00:47:08 +0000</updated>
                            <resolved>Mon, 16 Oct 2023 12:47:11 +0000</resolved>
                                    <version>Lustre 2.15.2</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                    <fixVersion>Lustre 2.15.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="378859" author="bzzz" created="Mon, 17 Jul 2023 06:07:43 +0000"  >&lt;p&gt;looks like a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; &lt;/p&gt;</comment>
                            <comment id="378888" author="pjones" created="Mon, 17 Jul 2023 12:48:23 +0000"  >&lt;p&gt;Thanks Alex!&#160;&lt;/p&gt;

&lt;p&gt;Xing&lt;/p&gt;

&lt;p&gt;Can you please port the fox from&#160;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; to b2_15?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="379040" author="JIRAUSER17900" created="Tue, 18 Jul 2023 01:27:49 +0000"  >&lt;p&gt;Yes, Peter, I&apos;ll today.&lt;/p&gt;</comment>
                            <comment id="381252" author="mhanafi" created="Thu, 3 Aug 2023 18:19:41 +0000"  >&lt;p&gt;We applied &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; but we say a filesystem hang. Longer console logs attached to the case.&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/49928/49928_nbp15.hang&quot; title=&quot;nbp15.hang attached to LU-16966&quot;&gt;nbp15.hang&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Aug &#160;2 23:09:33 nbp15-srv1 kernel: INFO: task ll_ost02_011:14622 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Aug &#160;2 23:09:33 nbp15-srv1 kernel: &#160; &#160; &#160;Tainted: G &#160; &#160; &#160; &#160; &#160; OE &#160; &#160;--------- - &#160;- 4.18.0-477.10.1.el8_lustre.x86_64 #1
Aug &#160;2 23:09:33 nbp15-srv1 kernel: &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Aug &#160;2 23:09:33 nbp15-srv1 kernel: task:ll_ost02_011 &#160; &#160;state:D stack: &#160; &#160;0 pid:14622 ppid: &#160; &#160; 2 flags:0x80004080
Aug &#160;2 23:09:33 nbp15-srv1 kernel: Call Trace:
Aug &#160;2 23:09:33 nbp15-srv1 kernel: __schedule+0x2d1/0x870
Aug &#160;2 23:09:33 nbp15-srv1 kernel: schedule+0x55/0xf0
Aug &#160;2 23:09:33 nbp15-srv1 kernel: wait_transaction_locked+0x89/0xd0 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? finish_wait+0x80/0x80
Aug &#160;2 23:09:33 nbp15-srv1 kernel: add_transaction_credits+0xd4/0x290 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? osd_declare_qid+0x398/0x4c0 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: start_this_handle+0x10a/0x520 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? jbd2__journal_start+0x8f/0x1f0 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? kmem_cache_alloc+0x13f/0x280
Aug &#160;2 23:09:33 nbp15-srv1 kernel: jbd2__journal_start+0xee/0x1f0 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? osd_trans_start+0x13b/0x500 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: __ldiskfs_journal_start_sb+0x6e/0x140 [ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: osd_trans_start+0x13b/0x500 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ofd_attr_set+0x546/0x1090 [ofd]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ofd_setattr_hdl+0x458/0x8e0 [ofd]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: tgt_request_handle+0xccd/0x1b10 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ptlrpc_nrs_req_get_nolock0+0xff/0x1f0 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ptlrpc_server_handle_request+0x323/0xbe0 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ptlrpc_main+0xc0f/0x1570 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ptlrpc_wait_event+0x590/0x590 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: kthread+0x134/0x150
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? set_kthread_struct+0x50/0x50
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ret_from_fork+0x1f/0x40
Aug &#160;2 23:09:33 nbp15-srv1 kernel: INFO: task ll_ost03_006:14631 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
Aug &#160;2 23:09:33 nbp15-srv1 kernel: &#160; &#160; &#160;Tainted: G &#160; &#160; &#160; &#160; &#160; OE &#160; &#160;--------- - &#160;- 4.18.0-477.10.1.el8_lustre.x86_64 #1
Aug &#160;2 23:09:33 nbp15-srv1 kernel: &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
Aug &#160;2 23:09:33 nbp15-srv1 kernel: task:ll_ost03_006 &#160; &#160;state:D stack: &#160; &#160;0 pid:14631 ppid: &#160; &#160; 2 flags:0x80004080
Aug &#160;2 23:09:33 nbp15-srv1 kernel: Call Trace:
Aug &#160;2 23:09:33 nbp15-srv1 kernel: __schedule+0x2d1/0x870
Aug &#160;2 23:09:33 nbp15-srv1 kernel: schedule+0x55/0xf0
Aug &#160;2 23:09:33 nbp15-srv1 kernel: wait_transaction_locked+0x89/0xd0 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? finish_wait+0x80/0x80
Aug &#160;2 23:09:33 nbp15-srv1 kernel: add_transaction_credits+0xd4/0x290 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ldiskfs_do_update_inode+0x604/0x800 [ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: start_this_handle+0x10a/0x520 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? osd_fallocate_preallocate.isra.37+0x275/0x760 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ldiskfs_mark_iloc_dirty+0x32/0x90 [ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: jbd2__journal_restart+0xb4/0x160 [jbd2]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: osd_fallocate_preallocate.isra.37+0x5a6/0x760 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: osd_fallocate+0xfd/0x370 [osd_ldiskfs]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ofd_object_fallocate+0x5dc/0xa30 [ofd]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ofd_fallocate_hdl+0x467/0x730 [ofd]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: tgt_request_handle+0xccd/0x1b10 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ptlrpc_nrs_req_get_nolock0+0xff/0x1f0 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ptlrpc_server_handle_request+0x323/0xbe0 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ptlrpc_main+0xc0f/0x1570 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? ptlrpc_wait_event+0x590/0x590 [ptlrpc]
Aug &#160;2 23:09:33 nbp15-srv1 kernel: kthread+0x134/0x150
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ? set_kthread_struct+0x50/0x50
Aug &#160;2 23:09:33 nbp15-srv1 kernel: ret_from_fork+0x1f/0x40 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="381284" author="mhanafi" created="Fri, 4 Aug 2023 02:40:23 +0000"  >&lt;p&gt;This looks more like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16691&quot; title=&quot;optimize ldiskfs prealloc (PA) under random read workloads&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16691&quot;&gt;&lt;del&gt;LU-16691&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="381326" author="mhanafi" created="Fri, 4 Aug 2023 10:33:28 +0000"  >&lt;p&gt;Filesystem hung again. attaching stack output.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/49934/49934_stack.out&quot; title=&quot;stack.out attached to LU-16966&quot;&gt;stack.out&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="381347" author="bzzz" created="Fri, 4 Aug 2023 12:38:07 +0000"  >&lt;blockquote&gt;&lt;p&gt;Filesystem hung again. attaching stack output.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;thanks, need some time to analyze the traces.&lt;/p&gt;</comment>
                            <comment id="381545" author="bzzz" created="Mon, 7 Aug 2023 07:19:03 +0000"  >&lt;blockquote&gt;&lt;p&gt;We applied &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; but we say a filesystem hang. Longer console logs attached to the case.nbp15.hang&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;I noticed OOM:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Aug  2 20:55:33 nbp15-srv1 kernel: mdt_rdpg02_001: page allocation failure: order:0, mode:0x40(__GFP_IO), nodemask=(&lt;span class=&quot;code-keyword&quot;&gt;null&lt;/span&gt;),cpuset=/,mems_allowed=0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="381548" author="bzzz" created="Mon, 7 Aug 2023 07:39:45 +0000"  >&lt;p&gt;can you please show output for lctl get_param osd*.&lt;b&gt;OST&lt;/b&gt;.brw_stats from that OST?&lt;/p&gt;</comment>
                            <comment id="381587" author="bzzz" created="Mon, 7 Aug 2023 14:55:45 +0000"  >&lt;p&gt;could you please clarify what exact version you&apos;re running? the ticket says 2.15.2, but you also siad &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; was added? anything else? I need to know exact state of the source.&lt;/p&gt;</comment>
                            <comment id="381591" author="pjones" created="Mon, 7 Aug 2023 15:00:21 +0000"  >&lt;p&gt;Alex&lt;/p&gt;

&lt;p&gt;I would expect that the answer will be the b2_15 branch from &lt;a href=&quot;https://github.com/champios/lustre-nas&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/champios/lustre-nas&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="381592" author="bzzz" created="Mon, 7 Aug 2023 15:03:59 +0000"  >&lt;p&gt;thanks Peter&lt;/p&gt;</comment>
                            <comment id="381594" author="bzzz" created="Mon, 7 Aug 2023 15:05:46 +0000"  >&lt;p&gt;b2_15-nas is the only &quot;15&quot; one, right?&lt;/p&gt;</comment>
                            <comment id="381602" author="mhanafi" created="Mon, 7 Aug 2023 15:28:05 +0000"  >&lt;p&gt;Initially we were running wc 2.15.2. After we got the patch we were running 2.15.3 + &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15800&quot; title=&quot;Fallocate causes transaction deadlock&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15800&quot;&gt;&lt;del&gt;LU-15800&lt;/del&gt;&lt;/a&gt; (2.15.3-2nas)&#160;&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://github.com/champios/lustre-nas&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://github.com/champios/lustre-nas&lt;/a&gt;)&lt;/p&gt;

&lt;p&gt;btw_stats attached.&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/49955/49955_brw_stats&quot; title=&quot;brw_stats attached to LU-16966&quot;&gt;brw_stats&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;</comment>
                            <comment id="381604" author="bzzz" created="Mon, 7 Aug 2023 15:32:08 +0000"  >&lt;p&gt;looking at the code in that tree I noticed couple things:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15117&quot; title=&quot;ofd_read_lock vs transaction deadlock while allocating buffers	&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15117&quot;&gt;&lt;del&gt;LU-15117&lt;/del&gt;&lt;/a&gt; is missing (not sure it&apos;s really related, but I saw few RPC timeouts and that could cause similar symptoms)&lt;/li&gt;
	&lt;li&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15894&quot; title=&quot;Range locking in ofd is no longer needed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15894&quot;&gt;&lt;del&gt;LU-15894&lt;/del&gt;&lt;/a&gt; is not reverted (and I see one trace stuck at the range lock)&lt;/li&gt;
&lt;/ul&gt;
</comment>
                            <comment id="381606" author="bzzz" created="Mon, 7 Aug 2023 15:34:53 +0000"  >&lt;blockquote&gt;&lt;p&gt;btw_stats attached&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;unfortunately, &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15564&quot; title=&quot;allocation stats in osd-ldiskfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15564&quot;&gt;&lt;del&gt;LU-15564&lt;/del&gt;&lt;/a&gt; has not been landed yet in that tree. that could help to understand whether it&apos;s block allocation related or not.&lt;/p&gt;</comment>
                            <comment id="381728" author="mhanafi" created="Tue, 8 Aug 2023 19:27:53 +0000"  >&lt;p&gt;Is there some additional debugging we can do to help with this issue.&lt;/p&gt;</comment>
                            <comment id="381790" author="bzzz" created="Wed, 9 Aug 2023 09:46:51 +0000"  >&lt;blockquote&gt;&lt;p&gt;Is there some additional debugging we can do to help with this issue.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;would it be possible to apply &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15564&quot; title=&quot;allocation stats in osd-ldiskfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15564&quot;&gt;&lt;del&gt;LU-15564&lt;/del&gt;&lt;/a&gt; so we can track allocation time?&lt;/p&gt;</comment>
                            <comment id="381891" author="mhanafi" created="Wed, 9 Aug 2023 21:35:45 +0000"  >&lt;p&gt;We are going to do a build with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15564&quot; title=&quot;allocation stats in osd-ldiskfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15564&quot;&gt;&lt;del&gt;LU-15564&lt;/del&gt;&lt;/a&gt;. Should we also pick up &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15117&quot; title=&quot;ofd_read_lock vs transaction deadlock while allocating buffers	&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15117&quot;&gt;&lt;del&gt;LU-15117&lt;/del&gt;&lt;/a&gt; and revert &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15894&quot; title=&quot;Range locking in ofd is no longer needed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15894&quot;&gt;&lt;del&gt;LU-15894&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;

&lt;p&gt;We would need a backport for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15117&quot; title=&quot;ofd_read_lock vs transaction deadlock while allocating buffers	&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15117&quot;&gt;&lt;del&gt;LU-15117&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="382581" author="mhanafi" created="Wed, 16 Aug 2023 00:19:30 +0000"  >&lt;p&gt;Still waiting for answer to above question.&lt;/p&gt;</comment>
                            <comment id="382887" author="JIRAUSER17312" created="Thu, 17 Aug 2023 22:25:00 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=mhanafi&quot; class=&quot;user-hover&quot; rel=&quot;mhanafi&quot;&gt;mhanafi&lt;/a&gt;&lt;br/&gt;
Sorry for the delay, you can try &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15117&quot; title=&quot;ofd_read_lock vs transaction deadlock while allocating buffers	&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15117&quot;&gt;&lt;del&gt;LU-15117&lt;/del&gt;&lt;/a&gt; (there is a port already for b2_15). Likely &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15894&quot; title=&quot;Range locking in ofd is no longer needed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15894&quot;&gt;&lt;del&gt;LU-15894&lt;/del&gt;&lt;/a&gt; may help but there is no port available, the patch &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=bzzz&quot; class=&quot;user-hover&quot; rel=&quot;bzzz&quot;&gt;bzzz&lt;/a&gt; is asking for you to try will provide additional statistics to help better understand the problem. I would suggest that you first start there, provide the requested statistics and allow us to better understand the problem before attempting other patches.&lt;/p&gt;</comment>
                            <comment id="383933" author="mhanafi" created="Mon, 28 Aug 2023 15:33:24 +0000"  >&lt;p&gt;We were able to get some of our servers patched with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-15564&quot; title=&quot;allocation stats in osd-ldiskfs&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-15564&quot;&gt;&lt;del&gt;LU-15564&lt;/del&gt;&lt;/a&gt; and got brw_stats after a hang, see below. btw, if we set debug=+trace it reduce the chance of hitting this bug.&#160;&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/50170/50170_brw_stats.save.1693236421&quot; title=&quot;brw_stats.save.1693236421 attached to LU-16966&quot;&gt;brw_stats.save.1693236421&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;nobr&quot;&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/attachment/50171/50171_stack1.out&quot; title=&quot;stack1.out attached to LU-16966&quot;&gt;stack1.out&lt;sup&gt;&lt;img class=&quot;rendericon&quot; src=&quot;https://jira.whamcloud.com/images/icons/link_attachment_7.gif&quot; height=&quot;7&quot; width=&quot;7&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/sup&gt;&lt;/a&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="384655" author="mhanafi" created="Sun, 3 Sep 2023 02:17:31 +0000"  >&lt;p&gt;Can we get an update please. We have filesystem hanging multiple times a day.&lt;/p&gt;</comment>
                            <comment id="384663" author="bzzz" created="Sun, 3 Sep 2023 04:55:35 +0000"  >&lt;blockquote&gt;&lt;p&gt;Can we get an update please. We have filesystem hanging multiple times a day.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=mhanafi&quot; class=&quot;user-hover&quot; rel=&quot;mhanafi&quot;&gt;mhanafi&lt;/a&gt;sorry for the delay, I&apos;m trying to reconstruct the problem using brw_stats you provided.&lt;/p&gt;</comment>
                            <comment id="384666" author="bzzz" created="Sun, 3 Sep 2023 16:07:42 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=mhanafi&quot; class=&quot;user-hover&quot; rel=&quot;mhanafi&quot;&gt;mhanafi&lt;/a&gt; could you please apply the patch just added? the patch reverts range locking.&lt;/p&gt;</comment>
                            <comment id="384676" author="mhanafi" created="Sun, 3 Sep 2023 19:19:20 +0000"  >&lt;p&gt;Thanks we&apos;ll get the patch applied this week and let you know the results.&lt;/p&gt;</comment>
                            <comment id="384709" author="bzzz" created="Mon, 4 Sep 2023 10:23:09 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=mhanafi&quot; class=&quot;user-hover&quot; rel=&quot;mhanafi&quot;&gt;mhanafi&lt;/a&gt; I&apos;m very very sorry, but ... &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/smile.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt; could you please instead apply the patch I&apos;m attaching. I&apos;ve gone through few code paths and now think there is another problem with fallocate and actually we better reuse range locking you still have in your tree to fix the problem.&lt;br/&gt;
the path is fallocate-range-locking.patch&lt;/p&gt;</comment>
                            <comment id="384724" author="gerrit" created="Mon, 4 Sep 2023 12:37:08 +0000"  >&lt;p&gt;&quot;Alex Zhuravlev &amp;lt;bzzz@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52264&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52264&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16966&quot; title=&quot;ofd_object_fallocate dead lock?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16966&quot;&gt;&lt;del&gt;LU-16966&lt;/del&gt;&lt;/a&gt; osd: take trunc_lock for fallocate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 48a6d52640fe716760d1a369f4bc53ebdba25e6d&lt;/p&gt;</comment>
                            <comment id="387148" author="mhanafi" created="Mon, 25 Sep 2023 18:22:58 +0000"  >&lt;p&gt;We applied the patch provided and we have not seeing the issue.&lt;/p&gt;</comment>
                            <comment id="389344" author="gerrit" created="Mon, 16 Oct 2023 05:47:14 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52264/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52264/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16966&quot; title=&quot;ofd_object_fallocate dead lock?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16966&quot;&gt;&lt;del&gt;LU-16966&lt;/del&gt;&lt;/a&gt; osd: take trunc_lock for fallocate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 51529fb57f85210e292a15c882cf25a4689ea77d&lt;/p&gt;</comment>
                            <comment id="389398" author="pjones" created="Mon, 16 Oct 2023 12:47:11 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                            <comment id="389403" author="bzzz" created="Mon, 16 Oct 2023 12:52:17 +0000"  >&lt;blockquote&gt;&lt;p&gt;Does this issue also affect b_es6_0 and b2_15?&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;yes, b_es6_0 needs that for sure, will check b2_15&lt;/p&gt;</comment>
                            <comment id="389428" author="gerrit" created="Mon, 16 Oct 2023 13:59:37 +0000"  >&lt;p&gt;&quot;Alex Zhuravlev &amp;lt;bzzz@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52710&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52710&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16966&quot; title=&quot;ofd_object_fallocate dead lock?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16966&quot;&gt;&lt;del&gt;LU-16966&lt;/del&gt;&lt;/a&gt; osd: take trunc_lock for fallocate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_15&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d6e549e9ea2eb6e7b141203dad0130cc8da5f1db&lt;/p&gt;</comment>
                            <comment id="392849" author="gerrit" created="Mon, 13 Nov 2023 02:07:44 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52710/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52710/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16966&quot; title=&quot;ofd_object_fallocate dead lock?&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16966&quot;&gt;&lt;del&gt;LU-16966&lt;/del&gt;&lt;/a&gt; osd: take trunc_lock for fallocate&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_15&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9c97d1969e2298fdfe5daa616e36cbe17a9b3d5e&lt;/p&gt;</comment>
                            <comment id="393331" author="pjones" created="Fri, 17 Nov 2023 00:47:08 +0000"  >&lt;p&gt;Will be included in 2.15.4&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="70058">LU-15800</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="49955" name="brw_stats" size="7751" author="mhanafi" created="Mon, 7 Aug 2023 15:27:03 +0000"/>
                            <attachment id="50170" name="brw_stats.save.1693236421" size="91315" author="mhanafi" created="Mon, 28 Aug 2023 15:30:30 +0000"/>
                            <attachment id="49702" name="dmesg.out" size="122020" author="mhanafi" created="Sat, 15 Jul 2023 16:35:35 +0000"/>
                            <attachment id="50239" name="fallocate-range-locking.patch" size="1055" author="bzzz" created="Mon, 4 Sep 2023 10:24:51 +0000"/>
                            <attachment id="49928" name="nbp15.hang" size="46257" author="mhanafi" created="Thu, 3 Aug 2023 18:19:37 +0000"/>
                            <attachment id="49934" name="stack.out" size="51820" author="mhanafi" created="Fri, 4 Aug 2023 10:33:23 +0000"/>
                            <attachment id="50172" name="stack1-1.out" size="56795" author="mhanafi" created="Mon, 28 Aug 2023 15:30:41 +0000"/>
                            <attachment id="50171" name="stack1.out" size="56795" author="mhanafi" created="Mon, 28 Aug 2023 15:30:30 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03qm7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10021"><![CDATA[2]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>