<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:48:13 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11935] MDS hit LBUG: (lod_qos.c:862:lod_comp_ost_in_use()) LBUG</title>
                <link>https://jira.whamcloud.com/browse/LU-11935</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Soak has been running on b2_10-ib #98 over the weekend with no crash but many application failures, looking into the log found errors like below&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  267.880824] LustreError: 12558:0:(lod_dev.c:419:lod_sub_recovery_thread()) soaked-MDT0002-osp-MDT0001 getting update log failed: rc = -2
[  268.388707] LustreError: 12556:0:(lod_dev.c:419:lod_sub_recovery_thread()) soaked-MDT0001-osd getting update log failed: rc = -108
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;then remove the update log on all MDS &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;pdsh -w soak-8 &quot;mount -t ldiskfs /dev/disk/by-id/dm-name-360080e50002ff4f0000037dd59ba4f5f /mnt/soaked-mdt0&quot;
pdsh -w soak-9 &quot;mount -t ldiskfs /dev/disk/by-id/dm-name-360080e50002ff4f0000037e159ba4ffd /mnt/soaked-mdt1&quot;
pdsh -w soak-10 &quot;mount -t ldiskfs /dev/disk/by-id/dm-name-360080e50001fedb80000015752012949 /mnt/soaked-mdt2&quot;
pdsh -w soak-11 &quot;mount -t ldiskfs /dev/disk/by-id/dm-name-360080e50001fedb80000015952012962 /mnt/soaked-mdt3&quot;
pdsh -g mds &quot;df -h /mnt/soaked-mdt?&quot;
pdsh -g mds &quot;rm -rf /mnt/soaked-mdt?/update_log*&quot;
pdsh -g mds &quot;ls /mnt/soaked-mdt? | grep update_log&quot;
pdsh -g mds &quot;umount /mnt/soaked-mdt?&quot;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;and remount, restart soak, 1 of the MDs crash right away&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[15895.912762] sd 1:0:1:44: rdac: array soak-netapp5660-1, ctlr 1, MODE_SELECT completed
[15896.567246] LustreError: 13958:0:(lod_qos.c:862:lod_comp_ost_in_use()) ASSERTION( inuse-&amp;gt;op_count * sizeof(inuse-&amp;gt;op_array[0]) &amp;lt; inuse-&amp;gt;op_size )
 failed: 
[15896.582712] LustreError: 13958:0:(lod_qos.c:862:lod_comp_ost_in_use()) LBUG
[15896.590512] Pid: 13958, comm: mdt00_005 3.10.0-957.el7_lustre.x86_64 #1 SMP Mon Jan 7 20:06:41 UTC 2019
[15896.601014] Call Trace:
[15896.603759]  [&amp;lt;ffffffffc0b5c7cc&amp;gt;] libcfs_call_trace+0x8c/0xc0 [libcfs]
[15896.611129]  [&amp;lt;ffffffffc0b5c87c&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
[15896.618071]  [&amp;lt;ffffffffc1416f96&amp;gt;] lod_env_info.part.10+0x0/0x36 [lod]
[15896.625307]  [&amp;lt;ffffffffc140c31f&amp;gt;] lod_alloc_rr.constprop.18+0xf2f/0x1000 [lod]
[15896.633437]  [&amp;lt;ffffffffc1410749&amp;gt;] lod_qos_prep_create+0x12b9/0x17f0 [lod]
[15896.641097]  [&amp;lt;ffffffffc14111bd&amp;gt;] lod_prepare_create+0x25d/0x360 [lod]
[15896.648422]  [&amp;lt;ffffffffc14059de&amp;gt;] lod_declare_striped_create+0x1ee/0x970 [lod]
[15896.656559]  [&amp;lt;ffffffffc1407e54&amp;gt;] lod_declare_create+0x1e4/0x540 [lod]
[15896.663881]  [&amp;lt;ffffffffc1473b22&amp;gt;] mdd_declare_create_object_internal+0xe2/0x2f0 [mdd]
[15896.672667]  [&amp;lt;ffffffffc14651a3&amp;gt;] mdd_declare_create+0x53/0xe30 [mdd]
[15896.679936]  [&amp;lt;ffffffffc1469059&amp;gt;] mdd_create+0x879/0x1400 [mdd]
[15896.686573]  [&amp;lt;ffffffffc1339df5&amp;gt;] mdt_reint_open+0x2175/0x3190 [mdt]
[15896.693729]  [&amp;lt;ffffffffc132ec73&amp;gt;] mdt_reint_rec+0x83/0x210 [mdt]
[15896.700469]  [&amp;lt;ffffffffc131018b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
[15896.707825]  [&amp;lt;ffffffffc13106b2&amp;gt;] mdt_intent_reint+0x162/0x430 [mdt]
[15896.714956]  [&amp;lt;ffffffffc13134bb&amp;gt;] mdt_intent_opc+0x1eb/0xaf0 [mdt]
[15896.721881]  [&amp;lt;ffffffffc131bd28&amp;gt;] mdt_intent_policy+0x138/0x320 [mdt]
[15896.729102]  [&amp;lt;ffffffffc0e632cd&amp;gt;] ldlm_lock_enqueue+0x38d/0x980 [ptlrpc]
[15896.736653]  [&amp;lt;ffffffffc0e8cab3&amp;gt;] ldlm_handle_enqueue0+0x943/0x15b0 [ptlrpc]
[15896.744589]  [&amp;lt;ffffffffc0f12372&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
[15896.751463]  [&amp;lt;ffffffffc0f162aa&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
[15896.759197]  [&amp;lt;ffffffffc0ebed5b&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
[15896.767814]  [&amp;lt;ffffffffc0ec24a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[15896.774881]  [&amp;lt;ffffffff876c1c31&amp;gt;] kthread+0xd1/0xe0
[15896.780351]  [&amp;lt;ffffffff87d74c37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[15896.787380]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[15896.792969] Kernel panic - not syncing: LBUG
[15896.797750] CPU: 16 PID: 13958 Comm: mdt00_005 Kdump: loaded Tainted: G           OE  ------------   3.10.0-957.el7_lustre.x86_64 #1
[15896.811058] Hardware name: Intel Corporation S2600GZ ........../S2600GZ, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
[15896.823578] Call Trace:
[15896.826310]  [&amp;lt;ffffffff87d61dc1&amp;gt;] dump_stack+0x19/0x1b
[15896.832059]  [&amp;lt;ffffffff87d5b4d0&amp;gt;] panic+0xe8/0x21f
[15896.837412]  [&amp;lt;ffffffffc0b5c8cb&amp;gt;] lbug_with_loc+0x9b/0xa0 [libcfs]
[15896.844308]  [&amp;lt;ffffffffc1416f96&amp;gt;] lod_comp_ost_in_use.part.8+0x36/0x36 [lod]
[15896.852185]  [&amp;lt;ffffffffc140c31f&amp;gt;] lod_alloc_rr.constprop.18+0xf2f/0x1000 [lod]
[15896.860254]  [&amp;lt;ffffffffc1410749&amp;gt;] lod_qos_prep_create+0x12b9/0x17f0 [lod]
[15896.867835]  [&amp;lt;ffffffffc14111bd&amp;gt;] lod_prepare_create+0x25d/0x360 [lod]
[15896.875125]  [&amp;lt;ffffffffc14059de&amp;gt;] lod_declare_striped_create+0x1ee/0x970 [lod]
[15896.883191]  [&amp;lt;ffffffffc1407e54&amp;gt;] lod_declare_create+0x1e4/0x540 [lod]
[15896.890480]  [&amp;lt;ffffffffc1473b22&amp;gt;] mdd_declare_create_object_internal+0xe2/0x2f0 [mdd]
[15896.899224]  [&amp;lt;ffffffffc14651a3&amp;gt;] mdd_declare_create+0x53/0xe30 [mdd]
[15896.906416]  [&amp;lt;ffffffffc1469059&amp;gt;] mdd_create+0x879/0x1400 [mdd]
[15896.913034]  [&amp;lt;ffffffffc1339df5&amp;gt;] mdt_reint_open+0x2175/0x3190 [mdt]
[15896.920159]  [&amp;lt;ffffffffc0c9e4c1&amp;gt;] ? upcall_cache_get_entry+0x211/0x8f0 [obdclass]
[15896.928517]  [&amp;lt;ffffffffc131ece3&amp;gt;] ? ucred_set_jobid+0x53/0x70 [mdt]
[15896.935517]  [&amp;lt;ffffffffc132ec73&amp;gt;] mdt_reint_rec+0x83/0x210 [mdt]
[15896.942227]  [&amp;lt;ffffffffc131018b&amp;gt;] mdt_reint_internal+0x5fb/0x9c0 [mdt]
[15896.949517]  [&amp;lt;ffffffffc13106b2&amp;gt;] mdt_intent_reint+0x162/0x430 [mdt]
[15896.956613]  [&amp;lt;ffffffffc13134bb&amp;gt;] mdt_intent_opc+0x1eb/0xaf0 [mdt]
[15896.963542]  [&amp;lt;ffffffffc0eb4c90&amp;gt;] ? lustre_swab_ldlm_policy_data+0x30/0x30 [ptlrpc]
[15896.972095]  [&amp;lt;ffffffffc131bd28&amp;gt;] mdt_intent_policy+0x138/0x320 [mdt]
[15896.979306]  [&amp;lt;ffffffffc0e632cd&amp;gt;] ldlm_lock_enqueue+0x38d/0x980 [ptlrpc]
[15896.986810]  [&amp;lt;ffffffffc0e8cab3&amp;gt;] ldlm_handle_enqueue0+0x943/0x15b0 [ptlrpc]
[15896.994710]  [&amp;lt;ffffffffc0eb4d10&amp;gt;] ? lustre_swab_ldlm_lock_desc+0x30/0x30 [ptlrpc]
[15897.003095]  [&amp;lt;ffffffffc0f12372&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
[15897.009931]  [&amp;lt;ffffffffc0f162aa&amp;gt;] tgt_request_handle+0x92a/0x1370 [ptlrpc]
[15897.017635]  [&amp;lt;ffffffffc0ebed5b&amp;gt;] ptlrpc_server_handle_request+0x23b/0xaa0 [ptlrpc]
[15897.026209]  [&amp;lt;ffffffffc0ebb388&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
[15897.033809]  [&amp;lt;ffffffff876d67c2&amp;gt;] ? default_wake_function+0x12/0x20
[15897.040833]  [&amp;lt;ffffffff876cba9b&amp;gt;] ? __wake_up_common+0x5b/0x90
[15897.047368]  [&amp;lt;ffffffffc0ec24a2&amp;gt;] ptlrpc_main+0xa92/0x1e40 [ptlrpc]
[15897.054393]  [&amp;lt;ffffffffc0ec1a10&amp;gt;] ? ptlrpc_register_service+0xe30/0xe30 [ptlrpc]
[15897.062648]  [&amp;lt;ffffffff876c1c31&amp;gt;] kthread+0xd1/0xe0
[15897.068097]  [&amp;lt;ffffffff876c1b60&amp;gt;] ? insert_kthread_work+0x40/0x40
[15897.074896]  [&amp;lt;ffffffff87d74c37&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[15897.082179]  [&amp;lt;ffffffff876c1b60&amp;gt;] ? insert_kthread_work+0x40/0x40
[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>soak is running on b2_10-ib #98 with PFL enabled.</environment>
        <key id="54799">LU-11935</key>
            <summary>MDS hit LBUG: (lod_qos.c:862:lod_comp_ost_in_use()) LBUG</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Wed, 6 Feb 2019 17:32:01 +0000</created>
                <updated>Tue, 26 Feb 2019 22:48:54 +0000</updated>
                                            <version>Lustre 2.10.7</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="241485" author="adilger" created="Wed, 6 Feb 2019 18:52:19 +0000"  >&lt;p&gt;It looks like the LASSERT() is a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10429&quot; title=&quot;soak, LBUG lod_comp_ost_in_use()) ASSERTION( inuse-&amp;gt;op_count * sizeof(inuse-&amp;gt;op_array[0]) &amp;lt; inuse-&amp;gt;op_size ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10429&quot;&gt;&lt;del&gt;LU-10429&lt;/del&gt;&lt;/a&gt;, which is fixed in 2.11.0 but has not been backported to b2_10.&lt;/p&gt;

&lt;p&gt;The DNE recovery log problem should be handled automatically, instead of having to mount the filesystem to delete the DNE recovery logs. &lt;/p&gt;</comment>
                            <comment id="242841" author="pfarrell" created="Tue, 26 Feb 2019 18:13:57 +0000"  >&lt;p&gt;Andreas,&lt;/p&gt;

&lt;p&gt;I think you actually meant:&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10429&quot; title=&quot;soak, LBUG lod_comp_ost_in_use()) ASSERTION( inuse-&amp;gt;op_count * sizeof(inuse-&amp;gt;op_array[0]) &amp;lt; inuse-&amp;gt;op_size ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10429&quot;&gt;&lt;del&gt;LU-10429&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;But there is also:&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11195&quot; title=&quot;LBUG: (lod_lov.c:633:lod_free_comp_buffer()) ASSERTION( entry-&amp;gt;llc_stripe == ((void *)0) )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11195&quot;&gt;&lt;del&gt;LU-11195&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="49978">LU-10429</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00b4f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>