<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:23:01 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-15988] conf-sanity test_38 crash</title>
                <link>https://jira.whamcloud.com/browse/LU-15988</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for S Buisson &amp;lt;sbuisson@ddn.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/03680f4b-7efd-458f-ae96-3af76e0f1355&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/03680f4b-7efd-458f-ae96-3af76e0f1355&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_38 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-78vm4 crashed during conf-sanity test_38
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[11297.066787] LustreError: 166-1: MGC10.240.42.139@tcp: Connection to MGS (at 0@lo) was lost; in progress operations using this service will fail
[11297.069837] LustreError: 582898:0:(osp_object.c:629:osp_attr_get()) lustre-MDT0001-osp-MDT0002: osp_attr_get update error [0x24000cf22:0x1:0x0]: rc = -5
[11297.072504] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
[11297.074000] PGD 0 P4D 0 
[11297.074544] Oops: 0000 [#1] SMP PTI
[11297.075256] CPU: 1 PID: 582898 Comm: dist_txn-2 Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-348.23.1.el8_lustre.x86_64 #1
[11297.077597] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[11297.078838] RIP: 0010:osp_send_update_req+0x25b/0x6d0 [osp]
[11297.079924] Code: 00 00 00 68 04 00 00 c7 05 b6 02 02 00 00 00 08 00 48 89 05 a7 02 02 00 48 8b 85 e0 01 00 00 48 c7 05 a5 02 02 00 00 00 00 00 &amp;lt;4c&amp;gt; 8b 60 40 e8 4c 15 11 ff 48 8d 7b 1c ba 40 00 00 00 48 89 c6 e8
[11297.083371] RSP: 0018:ffffbc0445aefd10 EFLAGS: 00010202
[11297.084397] RAX: 0000000000000000 RBX: ffff976a76bac960 RCX: 0000000000000000
[11297.085762] RDX: ffff976a43d985b0 RSI: 00000000ffffff8c RDI: ffff976a43d98a00
[11297.087117] RBP: ffff976a6ffb8000 R08: 00000000000005bb R09: ffffbc0445aefb78
[11297.088478] R10: ffffbc0445aefd10 R11: ffff976a4458155a R12: ffff976a43d98580
[11297.089842] R13: 00000000ffffff8c R14: 0000000000000000 R15: 0000000000000001
[11297.091207] FS:  0000000000000000(0000) GS:ffff976affd00000(0000) knlGS:0000000000000000
[11297.092744] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[11297.093860] CR2: 0000000000000040 CR3: 000000006e210006 CR4: 00000000000606e0
[11297.095221] Call Trace:
[11297.095783]  osp_trans_stop+0x37c/0x434 [osp]
[11297.096936]  llog_cancel_arr_rec+0x7f0/0xc00 [obdclass]
[11297.098013]  llog_cat_cancel_arr_rec+0x1e3/0x460 [obdclass]
[11297.099118]  llog_cat_cancel_records+0x61/0x190 [obdclass]
[11297.100640]  distribute_txn_commit_thread+0x3cb/0xb50 [ptlrpc]
[11297.101835]  ? distribute_txn_commit_batchid_update+0x890/0x890 [ptlrpc]
[11297.103202]  kthread+0x116/0x130
[11297.103887]  ? kthread_flush_work_fn+0x10/0x10
[11297.104792]  ret_from_fork+0x35/0x40
[11297.105531] Modules linked in: dm_flakey ofd(OE) ost(OE) osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) osd_ldiskfs(OE) ldiskfs(OE) lquota(OE) lustre(OE) obdecho(OE) mgc(OE) mdc(OE) lov(OE) osc(OE) lmv(OE) fid(OE) fld(OE) ptlrpc_gss(OE) ptlrpc(OE) obdclass(OE) ksocklnd(OE) lnet(OE) libcfs(OE) nfsv3 nfs_acl loop rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul dm_mod ghash_clmulni_intel joydev virtio_balloon pcspkr i2c_piix4 ext4 mbcache jbd2 ata_generic ata_piix libata crc32c_intel virtio_net net_failover failover serio_raw virtio_blk [last unloaded: dm_flakey]
[11297.116029] CR2: 0000000000000040
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
conf-sanity test_38 - trevis-78vm4 crashed during conf-sanity test_38&lt;/p&gt;</description>
                <environment></environment>
        <key id="71003">LU-15988</key>
            <summary>conf-sanity test_38 crash</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 4 Jul 2022 06:09:25 +0000</created>
                <updated>Tue, 26 Sep 2023 09:11:36 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="387165" author="adilger" created="Mon, 25 Sep 2023 21:04:18 +0000"  >&lt;p&gt;+1 on master: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/7ab15c82-0b92-44f7-82b4-6e4f316d35c9&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/7ab15c82-0b92-44f7-82b4-6e4f316d35c9&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 6860.952778] Lustre: DEBUG MARKER: umount -d -f /mnt/lustre-mds2
[ 6861.121512] LustreError: 529868:0:(osp_object.c:637:osp_attr_get()) lustre-MDT0000-osp-MDT0001: osp_attr_get update error [0x20000ee62:0x1:0x0]: rc = -5
[ 6861.124105] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
[ 6861.126734] CPU: 0 PID: 529868 Comm: dist_txn-1 4.18.0-425.10.1.el8_lustre.x86_64
[ 6861.128934] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 6861.129982] RIP: 0010:osp_send_update_req+0x265/0x6e0 [osp]
[ 6861.145557] Call Trace:
[ 6861.146084]  osp_trans_stop+0x383/0x43b [osp]
[ 6861.146938]  llog_cancel_arr_rec+0x7f2/0xc30 [obdclass]
[ 6861.148189]  llog_cat_cancel_arr_rec+0x1e0/0x450 [obdclass]
[ 6861.149262]  llog_cat_cancel_records+0x61/0x1a0 [obdclass]
[ 6861.150318]  distribute_txn_commit_thread+0x40c/0xb10 [ptlrpc]
[ 6861.153283]  kthread+0x10b/0x130
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="387167" author="adilger" created="Mon, 25 Sep 2023 21:28:21 +0000"  >&lt;p&gt;Looks like a NULL pointer dereference in &lt;tt&gt;osp_send_update_req()&lt;/tt&gt;.  Checking the address on a different build (so may not be exactly the same):&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
(gdb) list *(osp_send_update_req+0x258)
0x27e18 is in osp_send_update_req (/usr/src/lustre-exa/lnet/include/uapi/linux/lnet/nidstr.h:85).
80                                      LNET_NIDSTR_SIZE);
81      }
82      &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *libcfs_nid2str_r(lnet_nid_t nid, &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *buf, __kernel_size_t buf_size);
83      &lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; inline &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *libcfs_nid2str(lnet_nid_t nid)
84      {
85              &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt; libcfs_nid2str_r(nid, libcfs_next_nidstring(),
86                                      LNET_NIDSTR_SIZE);
87      }
88      __u32 libcfs_str2net(&lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *str);
89      lnet_nid_t libcfs_str2nid(&lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;char&lt;/span&gt; *str);
(gdb) list *(osp_send_update_req+0x248)
0x27e08 is in osp_send_update_req (/usr/src/lustre-exa/lustre/osp/osp_trans.c:1124).
1119            &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ou &amp;amp;&amp;amp; ou-&amp;gt;ou_generation != our-&amp;gt;our_generation) {
1120                    &lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct lnet_process_id *peer =
1121                            &amp;amp;osp-&amp;gt;opd_obd-&amp;gt;u.cli.cl_import-&amp;gt;imp_connection-&amp;gt;c_peer;
1122                    rc = -ESTALE;
1123                    osp_trans_callback(env, oth, rc);
1124                    CDEBUG(D_HA, &lt;span class=&quot;code-quote&quot;&gt;&quot;%s: stale tx to %s: gen %llu != %llu: rc = %d\n&quot;&lt;/span&gt;,
1125                           osp-&amp;gt;opd_obd-&amp;gt;obd_name, libcfs_nid2str(peer-&amp;gt;nid),
1126                           osp-&amp;gt;opd_update-&amp;gt;ou_generation, our-&amp;gt;our_generation, rc);
1127                    RETURN(rc);
1128            }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;There are unfortunately a few different structure offsets at 0x40 that may apply here - &lt;tt&gt;opd_obd-&amp;gt;obd_name&lt;/tt&gt; and &lt;tt&gt;our-&amp;gt;our_generation&lt;/tt&gt;.&lt;/p&gt;

&lt;p&gt;Minor nit - &lt;tt&gt;osp-&amp;gt;opd_update-&amp;gt;ou_generation&lt;/tt&gt; in the debug message can be replaced by &lt;tt&gt;ou-&amp;gt;ou_generation&lt;/tt&gt;, which is already validated before the error is printed, so could not be the culprit.&lt;/p&gt;

&lt;p&gt;Crash has only been hit 4x since ticket was originally filed on 2022-07-22.  It might make sense to push a patch to validate these pointers before access, but the problem is hit so rarely that it wouldn&apos;t even be clear to know when the debugging was hit, so not a priority to investigate.&lt;/p&gt;</comment>
                            <comment id="387238" author="adilger" created="Tue, 26 Sep 2023 09:11:36 +0000"  >&lt;p&gt;May also relate to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17146&quot; title=&quot;sanity-lfsck test_38: read should fail&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17146&quot;&gt;LU-17146&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="78102">LU-17146</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i02tpb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>