<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:22:58 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9066] ior ERROR: read() failed, Input/output error; client was evicted after OST failover</title>
                <link>https://jira.whamcloud.com/browse/LU-9066</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;During the OST failover, it was reported:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2016-12-05 02:42:07 [115035.688184] LustreError: 138-a: fs1-OST0001: A client on nid 172.18.1.103@o2ib was evicted due to a lock completion callback time out: rc -19
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On the client side, IOR got I/O failure:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;bluepill-client03: IOR-3.0.1: MPI Coordinated Test of Parallel I/O
bluepill-client03: 
bluepill-client03: Began: Sun Dec  4 18:28:38 2016
bluepill-client03: Command line used: /usr/local/bin/IOR -o /mnt/fs1//ha.sh-111769/bluepill-client03-ior/f.ior -f /test-tools/grev/Cray/2016_snx2k_fvt.ior.shared_file.p
...
bluepill-client03: 	clients            = 88 (8 per node)
...
bluepill-client03: Commencing read performance test: Mon Dec  5 02:34:58 2016
bluepill-client03: ior ERROR: read() failed, errno 5, Input/output error (aiori-POSIX.c:250)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="43445">LU-9066</key>
            <summary>ior ERROR: read() failed, Input/output error; client was evicted after OST failover</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="yong.fan">nasf</reporter>
                        <labels>
                    </labels>
                <created>Tue, 31 Jan 2017 13:45:31 +0000</created>
                <updated>Wed, 14 Jun 2017 11:11:15 +0000</updated>
                            <resolved>Thu, 23 Mar 2017 01:46:06 +0000</resolved>
                                    <version>Lustre 2.9.0</version>
                    <version>Lustre 2.10.0</version>
                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="182747" author="gerrit" created="Tue, 31 Jan 2017 13:54:27 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/25173&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/25173&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9066&quot; title=&quot;ior ERROR: read() failed, Input/output error; client was evicted after OST failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9066&quot;&gt;&lt;del&gt;LU-9066&lt;/del&gt;&lt;/a&gt; ldlm: NOT evict client when target stopping&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 1495b69d8bf5d901c372ec91fa316e9649b31866&lt;/p&gt;</comment>
                            <comment id="183520" author="tappro" created="Mon, 6 Feb 2017 08:33:07 +0000"  >&lt;p&gt;It seems this is the same as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8359&quot; title=&quot;Wrong evict during failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8359&quot;&gt;LU-8359&lt;/a&gt;, isn&apos;t it?&#160;&lt;/p&gt;</comment>
                            <comment id="183522" author="yong.fan" created="Mon, 6 Feb 2017 08:40:17 +0000"  >&lt;p&gt;The issue will be handled in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8860&quot; title=&quot;lock callback errors after client umount&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8860&quot;&gt;&lt;del&gt;LU-8860&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="183524" author="yong.fan" created="Mon, 6 Feb 2017 08:42:31 +0000"  >&lt;blockquote&gt;
&lt;p&gt;It seems this is the same as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8359&quot; title=&quot;Wrong evict during failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8359&quot;&gt;LU-8359&lt;/a&gt;, isn&apos;t it? &lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;I think your patch &lt;a href=&quot;https://review.whamcloud.com/#/c/23921&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/23921&lt;/a&gt; has already handled this case.&lt;/p&gt;</comment>
                            <comment id="183528" author="tappro" created="Mon, 6 Feb 2017 09:28:02 +0000"  >&lt;p&gt;reopen to keep patch tracking under this ticket&lt;/p&gt;</comment>
                            <comment id="189344" author="gerrit" created="Thu, 23 Mar 2017 01:41:18 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/23921/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/23921/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9066&quot; title=&quot;ior ERROR: read() failed, Input/output error; client was evicted after OST failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9066&quot;&gt;&lt;del&gt;LU-9066&lt;/del&gt;&lt;/a&gt; ldlm: don&apos;t evict client on umount if AST fails&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: eda984e7cb4e6a97310ed0f5e81f398dc48b56bf&lt;/p&gt;</comment>
                            <comment id="199036" author="askulysh" created="Tue, 13 Jun 2017 13:20:48 +0000"  >&lt;p&gt;We observe same error in 2.7 with &lt;a href=&quot;https://review.whamcloud.com/23921/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/23921/&lt;/a&gt; applied&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2017-06-09 14:53:58 [77116.822290] LustreError: 138-a: fs1-OST0000: A client on nid 172.18.1.104@o2ib was evicted due to a lock blocking callback time out: rc -19
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;with patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8359&quot; title=&quot;Wrong evict during failover&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8359&quot;&gt;LU-8359&lt;/a&gt; it isn&apos;t reproducible.&lt;/p&gt;</comment>
                            <comment id="199046" author="yong.fan" created="Tue, 13 Jun 2017 15:14:50 +0000"  >&lt;p&gt;Sorry, I cannot imagine how this message can be printed with the &lt;a href=&quot;https://review.whamcloud.com/23921/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/23921/&lt;/a&gt; applied. The logic of ldlm_handle_ast_error() with such patch is as following:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;static int ldlm_handle_ast_error(struct ldlm_lock *lock,
                                 struct ptlrpc_request *req, int rc,
                                 const char *ast_type)
{
...
                } else if (rc == -ENODEV || rc == -ESHUTDOWN ||
                           (rc == -EIO &amp;amp;&amp;amp;
                            req-&amp;gt;rq_import-&amp;gt;imp_state == LUSTRE_IMP_CLOSED)) {
                        /* Upon umount process the AST fails because cannot be
                         * sent. This shouldn&apos;t lead to the client eviction.
                         * -ENODEV error is returned by ptl_send_rpc() for
                         *  new request in such import.
                         * -SHUTDOWN is returned by ptlrpc_import_delay_req()
                         *  if imp_invalid is set or obd_no_recov.
                         * Meanwhile there is also check for LUSTRE_IMP_CLOSED
                         * in ptlrpc_import_delay_req() as well with -EIO code.
                         * In all such cases errors are ignored.
                         */
                        LDLM_DEBUG(lock, &quot;%s AST can&apos;t be sent due to a server&quot;
                                         &quot; %s failure or umount process: rc = %d\n&quot;,
                                         ast_type,
                                         req-&amp;gt;rq_import-&amp;gt;imp_obd-&amp;gt;obd_name, rc);
                } else {
                        LDLM_ERROR(lock,
                                   &quot;client (nid %s) %s %s AST (req@%p x%llu status %d rc %d), evict it&quot;,
                                   libcfs_nid2str(peer.nid),
                                   req-&amp;gt;rq_replied ? &quot;returned error from&quot; :
                                   &quot;failed to reply to&quot;,
                                   ast_type, req, req-&amp;gt;rq_xid,
                                   (req-&amp;gt;rq_repmsg != NULL) ?
                                   lustre_msg_get_status(req-&amp;gt;rq_repmsg) : 0,
                                   rc);
                        ldlm_failed_ast(lock, rc, ast_type);
                }
                return rc;
...
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Please note that when the &quot;rc == -19 (ENODEV)&quot;,  if will goto the branch LDLM_DEBUG(), but not the branch ldlm_failed_ast().  Please correct me if I missed anything.&lt;/p&gt;</comment>
                            <comment id="199191" author="askulysh" created="Wed, 14 Jun 2017 11:11:15 +0000"  >&lt;p&gt;Ah, in fact the fix wasn&apos;t applied during our testing. Sorry for the confusion.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="41730">LU-8860</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="37935">LU-8359</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="41730">LU-8860</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="is related to">
                                                        </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzz253:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>