<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:52:14 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12397] osp: race around opd_new_connection</title>
                <link>https://jira.whamcloud.com/browse/LU-12397</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;There is a race around opd_new_connection causing osp_precreate_thread to infinite wait :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;                        l_wait_event(d-&amp;gt;opd_pre_waitq,
                                     !osp_precreate_running(d) ||
                                     d-&amp;gt;opd_new_connection,
                                     &amp;amp;lwi); 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="55870">LU-12397</key>
            <summary>osp: race around opd_new_connection</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="scherementsev">Sergey Cheremencev</assignee>
                                    <reporter username="scherementsev">Sergey Cheremencev</reporter>
                        <labels>
                            <label>osp</label>
                    </labels>
                <created>Thu, 6 Jun 2019 11:17:49 +0000</created>
                <updated>Thu, 23 Mar 2023 18:25:54 +0000</updated>
                            <resolved>Fri, 25 Sep 2020 03:57:54 +0000</resolved>
                                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="248519" author="sergey" created="Thu, 6 Jun 2019 11:18:36 +0000"  >&lt;p&gt;Below is scenario how this race can happen.&lt;/p&gt;

&lt;p&gt;Disk error caused to hung one of OSTs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Mar 11 19:59:39 lstrn09 kernel: Call Trace:
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff8163a089&amp;gt;] schedule+0x29/0x70
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff814ba2a5&amp;gt;] md_make_request+0xb5/0x370
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff812c7342&amp;gt;] generic_make_request+0xe2/0x130
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff812c7401&amp;gt;] submit_bio+0x71/0x150
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa157612c&amp;gt;] osd_submit_bio+0x1c/0x60 [osd_ldiskfs]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa1578577&amp;gt;] osd_do_bio.isra.26+0x3d7/0x830 [osd_ldiskfs]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa15790f2&amp;gt;] osd_write_commit+0x322/0x900 [osd_ldiskfs]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa15f1ec0&amp;gt;] ofd_commitrw_write.isra.32+0xc60/0x1c20 [ofd]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa15f5d42&amp;gt;] ofd_commitrw+0x512/0xac0 [ofd]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa0ef3a9b&amp;gt;] obd_commitrw.constprop.39+0x2f8/0x33b [ptlrpc]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa0ee0530&amp;gt;] tgt_brw_write+0x1010/0x1720 [ptlrpc]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa0edc3db&amp;gt;] tgt_request_handle+0x8fb/0x11f0 [ptlrpc]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa0e7edab&amp;gt;] ptlrpc_server_handle_request+0x21b/0xa90 [ptlrpc]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffffa0e826db&amp;gt;] ptlrpc_main+0xc0b/0x2060 [ptlrpc]
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff810a5b8f&amp;gt;] kthread+0xcf/0xe0
Mar 11 19:59:39 lstrn09 kernel:  [&amp;lt;ffffffff81644fd8&amp;gt;] ret_from_fork+0x58/0x90
 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Write and read requests couldn&apos;t be finished after disk failure.&lt;/p&gt;

&lt;p&gt;At the same time MDT000 and MDT0001 started reporting hung tasks traces like below:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Mar 11 20:11:36 lstrn02 kernel: &quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
Mar 11 20:11:36 lstrn02 kernel: mdt00_001       D ffff880f765211d8     0 24438      2 0x00000000
Mar 11 20:11:36 lstrn02 kernel:  ffff880f77b635a8 0000000000000046 ffff88102b0d6780 ffff880f77b63fd8
Mar 11 20:11:36 lstrn02 kernel:  ffff880f77b63fd8 ffff880f77b63fd8 ffff88102b0d6780 ffff88102b0d6780
Mar 11 20:11:36 lstrn02 kernel:  ffff880f765211c8 ffff880f765211d0 ffffffff00000000 ffff880f765211d8
Mar 11 20:11:36 lstrn02 kernel: Call Trace:
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffff8163a089&amp;gt;] schedule+0x29/0x70
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffff8163b845&amp;gt;] rwsem_down_write_failed+0x115/0x220
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffff813019f3&amp;gt;] call_rwsem_down_write_failed+0x13/0x20
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa15d9d1f&amp;gt;] lod_qos_prep_create+0x111f/0x1fbc [lod]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa15d35dc&amp;gt;] lod_declare_striped_object+0x1ec/0x790 [lod]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa15d4bf1&amp;gt;] lod_declare_object_create+0x231/0x4b0 [lod]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa13372ff&amp;gt;] mdd_declare_object_create_internal+0xdf/0x2f0 [mdd]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa132b328&amp;gt;] mdd_declare_create+0x48/0xef0 [mdd]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa132c959&amp;gt;] mdd_create+0x789/0x12a0 [mdd]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa13b1dd2&amp;gt;] mdt_reint_open+0x1f92/0x2e00 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa13a4e60&amp;gt;] mdt_reint_rec+0x80/0x210 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa1385281&amp;gt;] mdt_reint_internal+0x5e1/0xb40 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa1385942&amp;gt;] mdt_intent_reint+0x162/0x430 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa1389455&amp;gt;] mdt_intent_opc+0x215/0xb50 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa13911a8&amp;gt;] mdt_intent_policy+0x138/0x320 [mdt]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0df955a&amp;gt;] ldlm_lock_enqueue+0x35a/0x9c0 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0e241b3&amp;gt;] ldlm_handle_enqueue0+0x9c3/0x1830 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0eb1712&amp;gt;] tgt_enqueue+0x62/0x210 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0eb63db&amp;gt;] tgt_request_handle+0x8fb/0x11f0 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0e58dab&amp;gt;] ptlrpc_server_handle_request+0x21b/0xa90 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffffa0e5c6db&amp;gt;] ptlrpc_main+0xc0b/0x2060 [ptlrpc]
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffff810a5b8f&amp;gt;] kthread+0xcf/0xe0
Mar 11 20:11:36 lstrn02 kernel:  [&amp;lt;ffffffff81644fd8&amp;gt;] ret_from_fork+0x58/0x90
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;It is the result of inability to create new objects on OST000a(from lstrn09).&lt;/p&gt;

&lt;p&gt;Hung task&apos;s messages were supposed to disappear after lstrn09 failover. But continued to present in messages until lstrn02 crash due to a bug in osp around opd_new_connection.&#160;&lt;/p&gt;</comment>
                            <comment id="248523" author="gerrit" created="Thu, 6 Jun 2019 12:23:38 +0000"  >&lt;p&gt;Sergey Cheremencev (c17829@cray.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/35078&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35078&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12397&quot; title=&quot;osp: race around opd_new_connection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12397&quot;&gt;&lt;del&gt;LU-12397&lt;/del&gt;&lt;/a&gt; osp: always set opd_new_connection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 83381358299125d4b125c017d74172fe3a130d85&lt;/p&gt;</comment>
                            <comment id="280578" author="gerrit" created="Fri, 25 Sep 2020 03:12:44 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/35078/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/35078/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12397&quot; title=&quot;osp: race around opd_new_connection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12397&quot;&gt;&lt;del&gt;LU-12397&lt;/del&gt;&lt;/a&gt; osp: always set opd_new_connection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 1b5abf625462a2b66820b2d07e25619afba504c6&lt;/p&gt;</comment>
                            <comment id="280594" author="pjones" created="Fri, 25 Sep 2020 03:57:54 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                            <comment id="338960" author="gerrit" created="Tue, 28 Jun 2022 05:42:25 +0000"  >&lt;p&gt;&quot;Yang Sheng &amp;lt;ys@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/47805&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/47805&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12397&quot; title=&quot;osp: race around opd_new_connection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12397&quot;&gt;&lt;del&gt;LU-12397&lt;/del&gt;&lt;/a&gt; osp: remove opd_new_connection&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 2d06a167ac46715b45c8677a1a53b64d95e3e5ff&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="74743">LU-16578</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is duplicated by">
                                                        </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00hpr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>