<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:05:14 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13908] ldlm_lock_put()) ASSERTION( (((( lock))-&gt;l_flags &amp; (1ULL &lt;&lt; 50)) != 0) ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-13908</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;On compute node, ASSERT fails and node crashes.  One node reports two failed ASSERTs in the dumped log:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 13759:0:(ldlm_lock.c:213:ldlm_lock_put()) ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed:
LustreError: 10188:0:(ldlm_lock.c:205:ldlm_lock_put()) ASSERTION( atomic_read(&amp;amp;lock-&amp;gt;l_refc) &amp;gt; 0 ) failed:
LustreError: 10188:0:(ldlm_lock.c:205:ldlm_lock_put()) LBUG
Pid: 10188, comm: ldlm_bl_16 3.10.0-1127.0.0.1chaos.ch6.x86_64 #1 SMP Fri Apr 3 08:56:52 PDT 2020
Call Trace:
 [&amp;lt;ffffffffc0a637ec&amp;gt;] libcfs_call_trace+0x8c/0xd0 [libcfs]
 [&amp;lt;ffffffffc0a638ac&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
 [&amp;lt;ffffffffc16cb366&amp;gt;] ldlm_lock_put+0x616/0x7b0 [ptlrpc]
 [&amp;lt;ffffffffc0c5828b&amp;gt;] osc_extent_put+0x6b/0x320 [osc]
 [&amp;lt;ffffffffc0c645fb&amp;gt;] osc_cache_wait_range+0x30b/0x960 [osc]
 [&amp;lt;ffffffffc0c655ce&amp;gt;] osc_cache_writeback_range+0x97e/0x1000 [osc]
 [&amp;lt;ffffffffc0c51195&amp;gt;] osc_lock_flush+0x195/0x290 [osc]
 [&amp;lt;ffffffffc0c51653&amp;gt;] osc_ldlm_blocking_ast+0x2e3/0x3a0 [osc]
 [&amp;lt;ffffffffc16d2dea&amp;gt;] ldlm_cancel_callback+0x8a/0x330 [ptlrpc]
 [&amp;lt;ffffffffc16ea620&amp;gt;] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc]
 [&amp;lt;ffffffffc16f03f7&amp;gt;] ldlm_cli_cancel+0x157/0x620 [ptlrpc]
 [&amp;lt;ffffffffc0c514ea&amp;gt;] osc_ldlm_blocking_ast+0x17a/0x3a0 [osc]
 [&amp;lt;ffffffffc16fc618&amp;gt;] ldlm_handle_bl_callback+0xf8/0x4f0 [ptlrpc]
 [&amp;lt;ffffffffc16fd230&amp;gt;] ldlm_bl_thread_main+0x820/0xa60 [ptlrpc]
 [&amp;lt;ffffffffbaccca01&amp;gt;] kthread+0xd1/0xe0
 [&amp;lt;ffffffffbb3bff5d&amp;gt;] ret_from_fork_nospec_begin+0x7/0x21
 [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
Kernel panic - not syncing: LBUG
CPU: 53 PID: 10188 Comm: ldlm_bl_16 Kdump: loaded Tainted: G           OE  ------------ T 3.10.0-1127.0.0.1chaos.ch6.x86_64 #1
Hardware name: Penguin Computing Relion OCP1930e/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The other reports the same ASSERT twice:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 20571:0:(ldlm_lock.c:213:ldlm_lock_put()) ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed:
LustreError: 36887:0:(ldlm_lock.c:213:ldlm_lock_put()) ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed:
LustreError: 36887:0:(ldlm_lock.c:213:ldlm_lock_put()) LBUG
Pid: 36887, comm: ldlm_bl_62 3.10.0-1127.0.0.1chaos.ch6.x86_64 #1 SMP Fri Apr 3 08:56:52 PDT 2020
Call Trace:
 [&amp;lt;ffffffffc0a727ec&amp;gt;] libcfs_call_trace+0x8c/0xd0 [libcfs]
 [&amp;lt;ffffffffc0a728ac&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
 [&amp;lt;ffffffffc176f3ca&amp;gt;] ldlm_lock_put+0x67a/0x7b0 [ptlrpc]
 [&amp;lt;ffffffffc1773058&amp;gt;] ldlm_lock_match_with_skip+0x3b8/0x860 [ptlrpc]
 [&amp;lt;ffffffffc0d982d2&amp;gt;] osc_match_base+0x102/0x290 [osc]
 [&amp;lt;ffffffffc0da3dfc&amp;gt;] osc_obj_dlmlock_at_pgoff+0x14c/0x2c0 [osc]
 [&amp;lt;ffffffffc0d9c358&amp;gt;] osc_req_attr_set+0x128/0x610 [osc]
 [&amp;lt;ffffffffc1549b13&amp;gt;] cl_req_attr_set+0x63/0x160 [obdclass]
 [&amp;lt;ffffffffc0d969f3&amp;gt;] osc_build_rpc+0x483/0x1080 [osc]
 [&amp;lt;ffffffffc0db1cbd&amp;gt;] osc_io_unplug0+0xecd/0x19c0 [osc]
 [&amp;lt;ffffffffc0db6620&amp;gt;] osc_cache_writeback_range+0x9d0/0x1000 [osc]
 [&amp;lt;ffffffffc0da2195&amp;gt;] osc_lock_flush+0x195/0x290 [osc]
 [&amp;lt;ffffffffc0da2653&amp;gt;] osc_ldlm_blocking_ast+0x2e3/0x3a0 [osc]
 [&amp;lt;ffffffffc1776dea&amp;gt;] ldlm_cancel_callback+0x8a/0x330 [ptlrpc]
 [&amp;lt;ffffffffc178e620&amp;gt;] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc]
 [&amp;lt;ffffffffc17943f7&amp;gt;] ldlm_cli_cancel+0x157/0x620 [ptlrpc]
 [&amp;lt;ffffffffc0da24ea&amp;gt;] osc_ldlm_blocking_ast+0x17a/0x3a0 [osc]
 [&amp;lt;ffffffffc17a0618&amp;gt;] ldlm_handle_bl_callback+0xf8/0x4f0 [ptlrpc]
 [&amp;lt;ffffffffc17a1230&amp;gt;] ldlm_bl_thread_main+0x820/0xa60 [ptlrpc]
 [&amp;lt;ffffffffab4cca01&amp;gt;] kthread+0xd1/0xe0
 [&amp;lt;ffffffffabbbff5d&amp;gt;] ret_from_fork_nospec_begin+0x7/0x21
 [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
Kernel panic - not syncing: LBUG
CPU: 20 PID: 36887 Comm: ldlm_bl_62 Kdump: loaded Tainted: G &#160; &#160; &#160; &#160; &#160; OE&#160; ------------ T 3.10.0-1127.0.0.1chaos.ch6.x86_64 #1&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;From /tftpboot/dumps/192.168.64.82-2020-08-12-13:23:27/vmcore-dmesg.txt&lt;br/&gt;
and /tftpboot/dumps/192.168.66.180-2020-08-12-16:39:36/vmcore-dmesg.txt&lt;/p&gt;</description>
                <environment>2.12.4_5.chaos&lt;br/&gt;
toss 3.6-2 (RHEL 7.8)</environment>
        <key id="60416">LU-13908</key>
            <summary>ldlm_lock_put()) ASSERTION( (((( lock))-&gt;l_flags &amp; (1ULL &lt;&lt; 50)) != 0) ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="ofaaland">Olaf Faaland</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Mon, 17 Aug 2020 22:20:37 +0000</created>
                <updated>Mon, 25 Jan 2021 17:37:39 +0000</updated>
                            <resolved>Mon, 25 Jan 2021 17:37:39 +0000</resolved>
                                    <version>Lustre 2.12.4</version>
                                                        <due></due>
                            <votes>1</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="277642" author="ofaaland" created="Mon, 17 Aug 2020 22:21:06 +0000"  >&lt;p&gt;Looks like dupe of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13089&quot; title=&quot;ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13089&quot;&gt;&lt;del&gt;LU-13089&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="277643" author="ofaaland" created="Mon, 17 Aug 2020 22:22:58 +0000"  >&lt;p&gt;For my reference, my local ticket is TOSS4864&lt;/p&gt;</comment>
                            <comment id="277644" author="ofaaland" created="Mon, 17 Aug 2020 22:24:39 +0000"  >&lt;p&gt;Average about 10 crashes per week, although it varies widely.  I do not know whether a particular workload triggers it.&lt;/p&gt;</comment>
                            <comment id="277682" author="pjones" created="Tue, 18 Aug 2020 14:35:27 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=ofaaland&quot; class=&quot;user-hover&quot; rel=&quot;ofaaland&quot;&gt;ofaaland&lt;/a&gt; has this issue ever been seen on earlier version of 2.12.x or or 2.10.x?&lt;/p&gt;</comment>
                            <comment id="277684" author="pjones" created="Tue, 18 Aug 2020 14:43:55 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please investigate?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="277686" author="jhammond" created="Tue, 18 Aug 2020 14:46:21 +0000"  >&lt;p&gt;I agree that this is likely a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13089&quot; title=&quot;ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13089&quot;&gt;&lt;del&gt;LU-13089&lt;/del&gt;&lt;/a&gt;. As Oleg notes there &quot;except this time it&apos;s glimpse cb vs cancel cb race&quot;. Based on the functions changed and the time that this was first noticed I suspect that this was introduced by&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;commit 2548cb9e32bfca897de577f88836629f72641369
Author:     Patrick Farrell &amp;lt;pfarrell@whamcloud.com&amp;gt;
AuthorDate: Mon Sep 9 11:56:07 2019 -0400
Commit:     Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;
CommitDate: Thu Dec 12 23:05:15 2019 +0000

    LU-11670 osc: glimpse - search for active lock
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Lustre-change: &lt;a href=&quot;https://review.whamcloud.com/33660&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/33660&lt;/a&gt;&lt;br/&gt;
Reviewed-on: &lt;a href=&quot;https://review.whamcloud.com/36406&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/36406&lt;/a&gt;&lt;/p&gt;
</comment>
                            <comment id="277696" author="ofaaland" created="Tue, 18 Aug 2020 18:10:53 +0000"  >&lt;p&gt;Peter,&lt;br/&gt;
We have never seen this under an earlier version of Lustre 2.12.x, but this is the first 2.12 we deployed widely.&lt;br/&gt;
We have never seen this under any 2.10.x version.&lt;br/&gt;
thanks&lt;/p&gt;</comment>
                            <comment id="277728" author="gerrit" created="Wed, 19 Aug 2020 12:09:39 +0000"  >&lt;p&gt;Bobi Jam (bobijam@hotmail.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/39693&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/39693&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13908&quot; title=&quot;ldlm_lock_put()) ASSERTION( (((( lock))-&amp;gt;l_flags &amp;amp; (1ULL &amp;lt;&amp;lt; 50)) != 0) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13908&quot;&gt;&lt;del&gt;LU-13908&lt;/del&gt;&lt;/a&gt; osc: revert &quot;glimpse - search for active lock&quot;&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6caffe3423c184b9717c9a5307da503ae0fbba4e&lt;/p&gt;</comment>
                            <comment id="277729" author="bobijam" created="Wed, 19 Aug 2020 12:15:43 +0000"  >&lt;p&gt;The commit mentioned looks like the culprit. Olaf, is it possible trying the revert patch?&lt;/p&gt;</comment>
                            <comment id="278088" author="ofaaland" created="Tue, 25 Aug 2020 23:58:52 +0000"  >&lt;p&gt;Yes, I can try.&#160; It may take a while.&#160; Sorry for the delay answering your question.&lt;/p&gt;</comment>
                            <comment id="278411" author="ofaaland" created="Mon, 31 Aug 2020 19:27:41 +0000"  >&lt;p&gt;Hi Zhenyu,&lt;br/&gt;
Can you get the revert patch to pass Maloo, so I can more confidently test this on the production system where we see the error?  We have not been able to reproduce this on our test systems.&lt;br/&gt;
thanks&lt;/p&gt;</comment>
                            <comment id="278940" author="pjones" created="Sat, 5 Sep 2020 14:48:48 +0000"  >&lt;p&gt;I just noticed that Gerrit did not post an update to Jira for the revert on b2_12 - &lt;a href=&quot;https://review.whamcloud.com/#/c/39819/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/39819/&lt;/a&gt;&#160;. Hopefully this is more convenient to test &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=ofaaland&quot; class=&quot;user-hover&quot; rel=&quot;ofaaland&quot;&gt;ofaaland&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="279066" author="ofaaland" created="Tue, 8 Sep 2020 19:45:23 +0000"  >&lt;p&gt;For my tracking purposes: my internal ticket is TOSS4864&lt;/p&gt;</comment>
                            <comment id="280952" author="spiechurski" created="Tue, 29 Sep 2020 14:28:56 +0000"  >&lt;p&gt;Hello,&#160;&lt;/p&gt;

&lt;p&gt;Do you have any feedback on whether the revert of the targeted commit has any effect on your production ?&lt;/p&gt;

&lt;p&gt;One of our customer has hit this 8 times in the past week.&#160;&lt;/p&gt;

&lt;p&gt;This would not be a problem if it affected only the crash client, but in our case, the oss handling the lock never releases it until the crashed client remounts the filesystem (the oss keeps retrying to send requests to the crashed client every 600 seconds even hours after the crash), which sometimes will result in a almost hung filesystem.&lt;/p&gt;

&lt;p&gt;I am surprised the client is never evicted, would there be a reason to this ?&lt;/p&gt;</comment>
                            <comment id="283787" author="pjones" created="Fri, 30 Oct 2020 14:40:09 +0000"  >&lt;p&gt;This is believed to be a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11719&quot; title=&quot;Refactor search_itree&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11719&quot;&gt;&lt;del&gt;LU-11719&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="284413" author="ofaaland" created="Thu, 5 Nov 2020 22:56:35 +0000"  >&lt;p&gt;Next week we will finally get a build with the revert installed on the machine where we&apos;ve seen the issue.&lt;/p&gt;</comment>
                            <comment id="284414" author="pjones" created="Thu, 5 Nov 2020 23:03:14 +0000"  >&lt;p&gt;Wouldn&apos;t you rather test the actual fix?&lt;/p&gt;</comment>
                            <comment id="284415" author="ofaaland" created="Thu, 5 Nov 2020 23:11:51 +0000"  >&lt;p&gt;Yes, but it&apos;s sadly complicated.&lt;/p&gt;</comment>
                            <comment id="290284" author="ofaaland" created="Mon, 25 Jan 2021 17:26:43 +0000"  >&lt;p&gt;In about 3 weeks we will have 2.12.6_3.llnl, which includes the fix from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11719&quot; title=&quot;Refactor search_itree&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11719&quot;&gt;&lt;del&gt;LU-11719&lt;/del&gt;&lt;/a&gt;, on the clusters where we&apos;ve seen this.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="57666">LU-13089</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="54157">LU-11719</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i017xj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>