<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:08:24 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14283] sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;</title>
                <link>https://jira.whamcloud.com/browse/LU-14283</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;sanity-sec test_52 crashes due to &#8216;divide error: 0000 &lt;a href=&quot;#1&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;1&lt;/a&gt; SMP PTI&#8217;. We&#8217;ve seen this test crash five times for review-dne-ssk or review-dne-selinux-ssk patch testing. Two recent crashes happened while testing RHEL8.3 clients and servers:&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/13b5e7cf-b908-4799-9d40-9ba4b21dbf00&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/13b5e7cf-b908-4799-9d40-9ba4b21dbf00&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/e4c617c1-30e1-4c44-8454-3a2a36ff1189&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/e4c617c1-30e1-4c44-8454-3a2a36ff1189&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Looking at the client console log, we see&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[37329.207145] Lustre: DEBUG MARKER: mount -t lustre -o user_xattr,flock,test_dummy_encryption,skpath=/tmp/test-framework-keys trevis-8vm4@tcp:/lustre /mnt/lustre
[37331.900380] Lustre: 1600717:0:(keysetup.c:472:llcrypt_get_encryption_info()) inode 144115188193296385: setting policy filenames_encryption_mode to null
[37331.901861] Lustre: 1600717:0:(keysetup.c:472:llcrypt_get_encryption_info()) Skipped 5 previous similar messages
[37334.491809] divide error: 0000 [#1] SMP PTI
[37334.492358] CPU: 1 PID: 1550060 Comm: ptlrpcd_00_00 Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-240.1.1.el8_3.x86_64 #1
[37334.493661] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[37334.494315] RIP: 0010:osc_announce_cached.isra.24+0xd4/0x3b0 [osc]
[37334.494970] Code: af d0 48 8b 43 50 48 39 ca 48 0f 42 d1 48 89 d1 48 c1 e1 0c f6 80 c5 02 00 00 10 74 1a 8b b3 f0 00 00 00 48 8d 44 32 ff 31 d2 &amp;lt;48&amp;gt; f7 f6 0f af 83 ec 00 00 00 48 01 c1 48 81 f9 ff ff ff 6f b8 ff
[37334.496814] RSP: 0018:ffffa4c700833a78 EFLAGS: 00010246
[37334.497361] RAX: 0000000000023fff RBX: ffff958f013085f0 RCX: 0000000024000000
[37334.498104] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff958f013086e4
[37334.498838] RBP: ffff958f1ba8c150 R08: 0000000000033c62 R09: ffff958f1ba8c248
[37334.499564] R10: ffff958efd282880 R11: 0000000000002000 R12: ffff958f013086e4
[37334.500297] R13: 0000000000000010 R14: 0000000000000002 R15: ffff958f013085f0
[37334.501029] FS:  0000000000000000(0000) GS:ffff958f3fd00000(0000) knlGS:0000000000000000
[37334.501848] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[37334.502442] CR2: 00007fc4a2b46000 CR3: 000000005a20a002 CR4: 00000000000606e0
[37334.503184] Call Trace:
[37334.503498]  osc_brw_prep_request+0xf2e/0x15b0 [osc]
[37334.504040]  osc_build_rpc+0xb82/0x10b0 [osc]
[37334.504514]  osc_check_rpcs+0x1703/0x1880 [osc]
[37334.505029]  ? __switch_to_asm+0x41/0x70
[37334.505454]  ? __switch_to_asm+0x35/0x70
[37334.505879]  ? __switch_to_asm+0x35/0x70
[37334.506299]  ? __switch_to_asm+0x41/0x70
[37334.506728]  osc_io_unplug0+0xc0/0x110 [osc]
[37334.507194]  brw_queue_work+0x2e/0xc0 [osc]
[37334.507801]  work_interpreter+0x32/0x110 [ptlrpc]
[37334.508337]  ptlrpc_check_set+0x51a/0x21b0 [ptlrpc]
[37334.508883]  ? finish_task_switch+0x77/0x2a0
[37334.509368]  ptlrpcd_check+0x3d5/0x5b0 [ptlrpc]
[37334.509889]  ptlrpcd+0x458/0x4c0 [ptlrpc]
[37334.510332]  ? finish_wait+0x80/0x80
[37334.510751]  ? ptlrpcd_check+0x5b0/0x5b0 [ptlrpc]
[37334.511262]  kthread+0x112/0x130
[37334.511622]  ? kthread_flush_work_fn+0x10/0x10
[37334.512106]  ret_from_fork+0x35/0x40
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="62186">LU-14283</key>
            <summary>sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="wshilong">Wang Shilong</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                            <label>rhel8.3</label>
                    </labels>
                <created>Wed, 30 Dec 2020 15:59:53 +0000</created>
                <updated>Tue, 13 Apr 2021 01:23:01 +0000</updated>
                            <resolved>Mon, 25 Jan 2021 19:38:29 +0000</resolved>
                                    <version>Lustre 2.14.0</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="288590" author="pjones" created="Mon, 4 Jan 2021 18:19:54 +0000"  >&lt;p&gt;S&#233;bastien&lt;/p&gt;

&lt;p&gt;Could you please advise?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="288786" author="gerrit" created="Wed, 6 Jan 2021 07:23:37 +0000"  >&lt;p&gt;Wang Shilong (wshilong@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41144&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41144&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14283&quot; title=&quot;sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14283&quot;&gt;&lt;del&gt;LU-14283&lt;/del&gt;&lt;/a&gt; osc: debug&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 0fe7cd0a9729a81377d811a5e8fc91decd740295&lt;/p&gt;</comment>
                            <comment id="288852" author="jhammond" created="Wed, 6 Jan 2021 19:17:43 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=sebastien&quot; class=&quot;user-hover&quot; rel=&quot;sebastien&quot;&gt;sebastien&lt;/a&gt; the logs from the debug patch contain a lot a concerning messages. Are these understood?&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  699.370905] Lustre: DEBUG MARKER: /usr/sbin/lctl mark == sanity-sec test 52: Mirrored encrypted file ======================================================= 15:06:50 \(1609945610\)
[  699.848730] Lustre: DEBUG MARKER: == sanity-sec test 52: Mirrored encrypted file ======================================================= 15:06:50 (1609945610)
[  699.960099] Lustre: DEBUG MARKER: grep -c /mnt/lustre&apos; &apos; /proc/mounts
[  699.972026] Lustre: DEBUG MARKER: lsof -t /mnt/lustre
[  700.133566] Lustre: DEBUG MARKER: umount /mnt/lustre 2&amp;gt;&amp;amp;1
[  700.171015] Lustre: DEBUG MARKER: mkdir -p /mnt/lustre
[  700.180844] Lustre: DEBUG MARKER: mount -t lustre -o user_xattr,flock,test_dummy_encryption trevis-25vm4@tcp:/lustre /mnt/lustre
[  700.605880] BUG: Bad rss-counter state mm:00000000b84532d3 idx:1 val:2
[  700.607197] BUG: Bad rss-counter state mm:00000000b84532d3 idx:3 val:-2
[  700.614727] BUG: Bad page state in process lfs  pfn:3ca01
[  700.615774] page:ffffd87ac0f28040 refcount:0 mapcount:-1 mapping:0000000000000000 index:0x0
[  700.617194] flags: 0xfffffc0000000()
[  700.617850] raw: 000fffffc0000000 0000000000000000 ffffffffc0f20903 0000000000000000
[  700.622001] raw: 0000000000000000 0000000000000000 00000000fffffffe 0000000000000000
[  700.623329] page dumped because: nonzero mapcount
[  700.624155] Modules linked in: lustre(OE) obdecho(OE) mgc(OE) mdc(OE) lov(OE) osc(OE) lmv(OE) fid(OE) fld(OE) ptlrpc_gss(OE) ptlrpc(OE) obdclass(OE) ksocklnd(OE) lnet(OE) libcfs(OE) rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp rpcrdma ib_iser ib_ipoib libiscsi scsi_transport_iscsi rdma_ucm ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_uverbs ib_core sunrpc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul ghash_clmulni_intel i2c_piix4 virtio_balloon joydev pcspkr ip_tables ext4 mbcache jbd2 mlx4_en ata_generic ata_piix 8139too mlx4_core libata crc32c_intel serio_raw 8139cp virtio_blk mii
[  700.633949] CPU: 1 PID: 45813 Comm: lfs Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-240.1.1.el8_3.x86_64 #1
[  700.635821] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[  700.636800] Call Trace:
[  700.637382]  dump_stack+0x5c/0x80
[  700.638015]  bad_page.cold.115+0xba/0xbf
[  700.638726]  __free_pages_ok+0x2c4/0x2d0
[  700.639436]  release_pages+0x305/0x430
[  700.640117]  ? cpumask_any_but+0x20/0x40
[  700.640819]  tlb_flush_mmu_free+0x3d/0x60
[  700.641539]  arch_tlb_finish_mmu+0x89/0x130
[  700.642277]  tlb_finish_mmu+0x1f/0x30
[  700.642931]  unmap_region+0xdd/0x110
[  700.643585]  ? __vma_rb_erase+0x127/0x250
[  700.644294]  do_munmap+0x256/0x440
[  700.644913]  vm_munmap+0x5f/0xa0
[  700.645499]  __x64_sys_munmap+0x22/0x30
[  700.646208]  do_syscall_64+0x5b/0x1a0
[  700.646887]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[  700.647778] RIP: 0033:0x7f4035528a0b
[  700.648418] Code: ff ff 0f 1f 44 00 00 48 8b 15 79 54 2c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff e9 6f ff ff ff f3 0f 1e fa b8 0b 00 00 00 0f 05 &amp;lt;48&amp;gt; 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4d 54 2c 00 f7 d8 64 89 01 48
[  700.651441] RSP: 002b:00007ffc550ab5f8 EFLAGS: 00000202 ORIG_RAX: 000000000000000b
[  700.652712] RAX: ffffffffffffffda RBX: 00007ffc550ab7e0 RCX: 00007f4035528a0b
[  700.653907] RDX: 0000000000000000 RSI: 0000000000402000 RDI: 00007f4034bfe000
[  700.655109] RBP: 0000000000000002 R08: 0000000000401010 R09: 00007ffc550ab4c0
[  700.656330] R10: 000000000000000a R11: 0000000000000202 R12: 0000000000000002
[  700.657530] R13: 00007ffc550ab6d0 R14: 00007f4034bff000 R15: 00007ffc550ab7e0
[  700.658764] Disabling lock debugging due to kernel taint
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.whamcloud.com/test_logs/36ab23fa-5569-491e-855a-66c41621cae8/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_logs/36ab23fa-5569-491e-855a-66c41621cae8/show_text&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://testing.whamcloud.com/test_sets/c31eb44e-b299-487e-99da-794b128f0cc5&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/c31eb44e-b299-487e-99da-794b128f0cc5&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="288885" author="wshilong" created="Thu, 7 Jan 2021 01:14:53 +0000"  >&lt;p&gt;It looks &quot;&#8216;divide error&quot; is not easily to reproduce, but bad memory problem is easily reproduced and should be addressed in seperate ticket.&lt;/p&gt;</comment>
                            <comment id="288912" author="sebastien" created="Thu, 7 Jan 2021 10:21:49 +0000"  >&lt;p&gt;Apparently, the message &lt;tt&gt;BUG: Bad rss-counter state&lt;/tt&gt; was printed for every single run of sanity-sec test_52. However, pages with nonzero mapcount were dumped &quot;only&quot; 4 times (out of 200 test runs).&lt;br/&gt;
Strangely, it does not make the test or Lustre to fail operations.&lt;/p&gt;

&lt;p&gt;I have created &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14306&quot; title=&quot;sanity-sec test_52: BUG: Bad rss-counter state&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14306&quot;&gt;&lt;del&gt;LU-14306&lt;/del&gt;&lt;/a&gt; to investigate this problem.&lt;/p&gt;</comment>
                            <comment id="288928" author="jhammond" created="Thu, 7 Jan 2021 14:01:42 +0000"  >&lt;p&gt;&amp;gt; It looks &quot;&#8216;divide error&quot; is not easily to reproduce, but bad memory problem is easily reproduced and should be addressed in seperate ticket.&lt;/p&gt;

&lt;p&gt;Likely it requires more than just running test_52.&lt;/p&gt;</comment>
                            <comment id="288939" author="jamesanunez" created="Thu, 7 Jan 2021 15:22:23 +0000"  >&lt;p&gt;Wang Shilong - &lt;br/&gt;
I can&apos;t reproduce this all the time, but patch &lt;a href=&quot;https://review.whamcloud.com/#/c/40884/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/40884/&lt;/a&gt; that runs review-dne-ssk and review-dne-selinux-ssk for RHEL 8.3 server/clients is where I see this crash. &lt;/p&gt;</comment>
                            <comment id="289491" author="gerrit" created="Thu, 14 Jan 2021 16:04:21 +0000"  >&lt;p&gt;Andreas Dilger (adilger@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41225&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41225&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14283&quot; title=&quot;sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14283&quot;&gt;&lt;del&gt;LU-14283&lt;/del&gt;&lt;/a&gt; osc: avoid crash if ocd reset&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6f8feca0bd84ddb42e2af4815d25a09fad9d5fd9&lt;/p&gt;</comment>
                            <comment id="289492" author="adilger" created="Thu, 14 Jan 2021 16:09:33 +0000"  >&lt;p&gt;The above patch is not really a fix for the root problem of (what appears to be) a case of the obd_connect_data being zeroed out during reconnect (or similar). It is only working around the crash to avoid taking out the client. Probably this patch needs to be reverted in order to reproduce the root cause. &lt;/p&gt;</comment>
                            <comment id="289500" author="jhammond" created="Thu, 14 Jan 2021 17:09:20 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=sebastien&quot; class=&quot;user-hover&quot; rel=&quot;sebastien&quot;&gt;sebastien&lt;/a&gt; could you look again?&lt;/p&gt;

&lt;p&gt;See &lt;a href=&quot;https://testing.whamcloud.com/test_sets/13b5e7cf-b908-4799-9d40-9ba4b21dbf00&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/13b5e7cf-b908-4799-9d40-9ba4b21dbf00&lt;/a&gt; and the client logs at &lt;a href=&quot;https://testing.whamcloud.com/test_logs/d8e65e1b-ad1c-4b27-9318-07ebeab40ea2/show_text&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_logs/d8e65e1b-ad1c-4b27-9318-07ebeab40ea2/show_text&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I see several messages of the form&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[13483.414663] LustreError: 809674:0:(gss_cli_upcall.c:270:gss_do_ctx_init_rpc()) obd lustre-OST0007-osc-ffff99b9f9160800 not setup
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The obd device found does not have the &lt;tt&gt;obd_set_up&lt;/tt&gt; bit set. In &lt;tt&gt;obd_import_event()&lt;/tt&gt; if that bit is not set then the event is ignored:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; inline void obd_import_event(struct obd_device *obd,
                                    struct obd_import *imp,
                                    &lt;span class=&quot;code-keyword&quot;&gt;enum&lt;/span&gt; obd_import_event event)
{
        ENTRY;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (!obd) {
                CERROR(&lt;span class=&quot;code-quote&quot;&gt;&quot;NULL device\n&quot;&lt;/span&gt;);
                EXIT;
                &lt;span class=&quot;code-keyword&quot;&gt;return&lt;/span&gt;;
        }

        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (obd-&amp;gt;obd_set_up &amp;amp;&amp;amp; OBP(obd, import_event))
                OBP(obd, import_event)(obd, imp, event);

        EXIT;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In &lt;tt&gt;osc_import_event()&lt;/tt&gt; we have:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
        &lt;span class=&quot;code-keyword&quot;&gt;case&lt;/span&gt; IMP_EVENT_OCD: {
                struct obd_connect_data *ocd = &amp;amp;imp-&amp;gt;imp_connect_data;

		&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ocd-&amp;gt;ocd_connect_flags &amp;amp; OBD_CONNECT_GRANT)
                        osc_init_grant(&amp;amp;obd-&amp;gt;u.cli, ocd);

                &lt;span class=&quot;code-comment&quot;&gt;/* See bug 7198 */&lt;/span&gt;
                &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (ocd-&amp;gt;ocd_connect_flags &amp;amp; OBD_CONNECT_REQPORTAL)
                        imp-&amp;gt;imp_client-&amp;gt;cli_request_portal =OST_REQUEST_PORTAL;

		rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD);
                &lt;span class=&quot;code-keyword&quot;&gt;break&lt;/span&gt;;
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Note:&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;osc_init_grant()&lt;/tt&gt; is responsible for initializing the &lt;tt&gt;cl_max_extent_pages&lt;/tt&gt; field to a non zero value (according to the OCD data):&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
                &lt;span class=&quot;code-comment&quot;&gt;/* determine maximum extent size, in #pages */&lt;/span&gt;
                size = (u64)ocd-&amp;gt;ocd_grant_max_blks &amp;lt;&amp;lt; ocd-&amp;gt;ocd_grant_blkbits;
		cli-&amp;gt;cl_max_extent_pages = size &amp;gt;&amp;gt; PAGE_SHIFT;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;&lt;tt&gt;cl_max_extent_pages&lt;/tt&gt; being zero is giving us the divide error in &lt;tt&gt;osc_announce_cached()&lt;/tt&gt;:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
                        /* take extent tax into account when asking &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more
                         * grant space */
                        nrextents = (nrpages + cli-&amp;gt;cl_max_extent_pages - 1)  /
                                     cli-&amp;gt;cl_max_extent_pages;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Could you determine why the OSC obd_device is not getting setup here?&lt;/p&gt;</comment>
                            <comment id="289509" author="sebastien" created="Thu, 14 Jan 2021 17:46:37 +0000"  >&lt;p&gt;Thanks for this analysis, this is helpful.&lt;/p&gt;

&lt;p&gt;The messages like:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[13483.414663] LustreError: 809674:0:(gss_cli_upcall.c:270:gss_do_ctx_init_rpc()) obd lustre-OST0007-osc-ffff99b9f9160800 not setup
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;occur because the client tries to initialize its GSS context before the obd device is actually set up. This GSS initialization operation is triggered from userspace by lgss_keyring upon key request, so it can happen completely asynchronously. In any case, I would not tend to consider this is harmful.&lt;/p&gt;

&lt;p&gt;The other question to ask is how can we end up in &lt;tt&gt;osc_announce_cached()&lt;/tt&gt; if the OSC obd_device is not setup.&lt;/p&gt;</comment>
                            <comment id="289531" author="jhammond" created="Thu, 14 Jan 2021 20:40:25 +0000"  >&lt;p&gt;&amp;gt; The other question to ask is how can we end up in osc_announce_cached() if the OSC obd_device is not setup.&lt;/p&gt;

&lt;p&gt;I do not think this is possible.&lt;/p&gt;</comment>
                            <comment id="289547" author="wshilong" created="Thu, 14 Jan 2021 23:17:00 +0000"  >&lt;p&gt;I did not find why we could hit this issue.&lt;/p&gt;

&lt;p&gt;Notice once @cl_max_extent_pages is inited ever, even reconnect won&apos;t clear it to be zero unless i miss something here, and there is a window&lt;br/&gt;
it could be zero but that was protected by spinlock.&lt;/p&gt;

&lt;p&gt;so big possibility is it was zero initially...&lt;/p&gt;</comment>
                            <comment id="289607" author="jhammond" created="Fri, 15 Jan 2021 14:29:57 +0000"  >&lt;p&gt;It seems very unlikely, but if if &lt;tt&gt;osd_connect_flags&lt;/tt&gt; includes &lt;tt&gt;GRANT_PARAM&lt;/tt&gt; but not &lt;tt&gt;GRANT&lt;/tt&gt; then we could get here.&lt;/p&gt;</comment>
                            <comment id="289692" author="gerrit" created="Sat, 16 Jan 2021 13:19:34 +0000"  >&lt;p&gt;Wang Shilong (wshilong@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/41256&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41256&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14283&quot; title=&quot;sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14283&quot;&gt;&lt;del&gt;LU-14283&lt;/del&gt;&lt;/a&gt; obdclass: connect vs disconnect race&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 43d0981915600fa6030ccc4d496d2824745633c7&lt;/p&gt;</comment>
                            <comment id="290165" author="gerrit" created="Fri, 22 Jan 2021 20:14:41 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/41256/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41256/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14283&quot; title=&quot;sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14283&quot;&gt;&lt;del&gt;LU-14283&lt;/del&gt;&lt;/a&gt; obdclass: connect vs disconnect race&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4f689a30fb5d8707270dfc9df9329b32fec8440f&lt;/p&gt;</comment>
                            <comment id="290306" author="gerrit" created="Mon, 25 Jan 2021 19:19:15 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/41225/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/41225/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14283&quot; title=&quot;sanity-sec test 52 crashes due to &#8216;divide error: 0000 [#1] SMP PTI&#8217;&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14283&quot;&gt;&lt;del&gt;LU-14283&lt;/del&gt;&lt;/a&gt; osc: avoid crash if ocd reset&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 1ec58a2e6ef98bacbb806f1b141ef38cdefe2226&lt;/p&gt;</comment>
                            <comment id="290311" author="pjones" created="Mon, 25 Jan 2021 19:38:29 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01id3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>