<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:22:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2110] Unable to mount (-17) MDT</title>
                <link>https://jira.whamcloud.com/browse/LU-2110</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;After updating to 2.3.53-2chaos, the MDS is no longer able to mount its MDT. The relevant console messages:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: Found index 0 for lstest-MDT0000, updating log
LustreError: 33410:0:(sec_config.c:1024:sptlrpc_target_local_copy_conf()) missing llog context
LustreError: 33836:0:(genops.c:316:class_newdev()) Device lstest-MDT0000-osp-MDT0000 already exists at 136, won&apos;t add
LustreError: 33836:0:(obd_config.c:374:class_attach()) Cannot create device lstest-MDT0000-osp-MDT0000 of type osp : -17
Lustre: lstest-MDT0000: Temporarily refusing client connection from 0@lo
LustreError: 11-0: lstest-MDT0000-osp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11
LustreError: 33836:0:(obd_mount.c:373:lustre_start_simple()) lstest-MDT0000-osp-MDT0000 attach error -17
LustreError: 33836:0:(obd_mount.c:1135:lustre_osp_setup()) lstest-MDT0000-osp-MDT0000: setup up failed: rc -17
LustreError: 15c-8: MGC172.20.5.2@o2ib500: The configuration from log &apos;lstest-client&apos; failed (-17). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.
LustreError: 33405:0:(obd_mount.c:1865:server_start_targets()) lstest-MDT0000: failed to start OSP: -17
Lustre: lstest-MDT0000: Unable to start target: -17
Lustre: Failing over lstest-MDT0000
LustreError: 32689:0:(client.c:1116:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880faa7f0800 x1415277549978464/t0(0) o13-&amp;gt;lstest-OST0181-osc-MDT0000@172.20.2.185@o2ib500:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
LustreError: 32690:0:(osp_precreate.c:116:osp_statfs_interpret()) lstest-OST0182-osc-MDT0000: couldn&apos;t update statfs: rc = -5
LustreError: 32689:0:(client.c:1116:ptlrpc_import_delay_req()) Skipped 253 previous similar messages
Lustre: server umount lstest-MDT0000 complete
LustreError: 33405:0:(obd_mount.c:2985:lustre_fill_super()) Unable to mount  (-17)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;m just about to start looking into the root cause.&lt;/p&gt;</description>
                <environment>Lustre: 2.3.53-2chaos</environment>
        <key id="16280">LU-2110</key>
            <summary>Unable to mount (-17) MDT</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="prakash">Prakash Surya</reporter>
                        <labels>
                            <label>topsequoia</label>
                    </labels>
                <created>Mon, 8 Oct 2012 12:43:59 +0000</created>
                <updated>Fri, 19 Apr 2013 16:25:04 +0000</updated>
                            <resolved>Fri, 19 Apr 2013 16:25:04 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="46181" author="bzzz" created="Mon, 8 Oct 2012 12:50:46 +0000"  >&lt;p&gt;is this on the clean node (after reboot) ? can you unload all lustre modules (may take some time) and try again ?&lt;/p&gt;</comment>
                            <comment id="46182" author="prakash" created="Mon, 8 Oct 2012 12:58:27 +0000"  >&lt;p&gt;Originally, this this was on a clean reboot. But the messages I pasted in the description were from a manually retried mount, after the first failed.&lt;/p&gt;

&lt;p&gt;Here are all the messages from the console:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: Lustre: Build Version: 2.3.53-2chaos-2chaos--PRISTINE-2.6.32-220.23.1.1chaos.ch5.x86_64
Lustre: Found index 0 for lstest-MDT0000, updating log
LustreError: 32758:0:(mgc_request.c:248:do_config_log_add()) failed processing sptlrpc log: -2
LustreError: 32761:0:(sec_config.c:1024:sptlrpc_target_local_copy_conf()) missing llog context
LustreError: 33225:0:(genops.c:316:class_newdev()) Device lstest-MDT0000-osp-MDT0000 already exists at 136, won&apos;t add
LustreError: 33225:0:(obd_config.c:374:class_attach()) Cannot create device lstest-MDT0000-osp-MDT0000 of type osp : -17
Lustre: lstest-MDT0000: Temporarily refusing client connection from 0@lo
LustreError: 11-0: lstest-MDT0000-osp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11
LustreError: 33225:0:(obd_mount.c:373:lustre_start_simple()) lstest-MDT0000-osp-MDT0000 attach error -17
LustreError: 33225:0:(obd_mount.c:1135:lustre_osp_setup()) lstest-MDT0000-osp-MDT0000: setup up failed: rc -17
LustreError: 15c-8: MGC172.20.5.2@o2ib500: The configuration from log &apos;lstest-client&apos; failed (-17). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.
LustreError: 32758:0:(obd_mount.c:1865:server_start_targets()) lstest-MDT0000: failed to start OSP: -17
Lustre: lstest-MDT0000: Unable to start target: -17
Lustre: Failing over lstest-MDT0000
LustreError: 32680:0:(client.c:1116:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880f977bb800 x1415277549977961/t0(0) o13-&amp;gt;lstest-OST0181-osc-MDT0000@172.20.2.185@o2ib500:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
LustreError: 32680:0:(osp_precreate.c:116:osp_statfs_interpret()) lstest-OST0181-osc-MDT0000: couldn&apos;t update statfs: rc = -5
LustreError: 32681:0:(client.c:1116:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff882016d06c00 x1415277549977962/t0(0) o13-&amp;gt;lstest-OST0182-osc-MDT0000@172.20.2.186@o2ib500:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
LustreError: 32682:0:(osp_precreate.c:116:osp_statfs_interpret()) lstest-OST0183-osc-MDT0000: couldn&apos;t update statfs: rc = -5
LustreError: 32682:0:(osp_precreate.c:116:osp_statfs_interpret()) Skipped 1 previous similar message
LustreError: 32680:0:(osp_precreate.c:116:osp_statfs_interpret()) Skipped 125 previous similar messages
Lustre: server umount lstest-MDT0000 complete
LustreError: 32758:0:(obd_mount.c:2985:lustre_fill_super()) Unable to mount  (-17)
Lustre: Found index 0 for lstest-MDT0000, updating log
LustreError: 33410:0:(sec_config.c:1024:sptlrpc_target_local_copy_conf()) missing llog context
LustreError: 33836:0:(genops.c:316:class_newdev()) Device lstest-MDT0000-osp-MDT0000 already exists at 136, won&apos;t add
LustreError: 33836:0:(obd_config.c:374:class_attach()) Cannot create device lstest-MDT0000-osp-MDT0000 of type osp : -17
Lustre: lstest-MDT0000: Temporarily refusing client connection from 0@lo
LustreError: 11-0: lstest-MDT0000-osp-MDT0000: Communicating with 0@lo, operation mds_connect failed with -11
LustreError: 33836:0:(obd_mount.c:373:lustre_start_simple()) lstest-MDT0000-osp-MDT0000 attach error -17
LustreError: 33836:0:(obd_mount.c:1135:lustre_osp_setup()) lstest-MDT0000-osp-MDT0000: setup up failed: rc -17
LustreError: 15c-8: MGC172.20.5.2@o2ib500: The configuration from log &apos;lstest-client&apos; failed (-17). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.
LustreError: 33405:0:(obd_mount.c:1865:server_start_targets()) lstest-MDT0000: failed to start OSP: -17
Lustre: lstest-MDT0000: Unable to start target: -17
Lustre: Failing over lstest-MDT0000
LustreError: 32689:0:(client.c:1116:ptlrpc_import_delay_req()) @@@ IMP_CLOSED   req@ffff880faa7f0800 x1415277549978464/t0(0) o13-&amp;gt;lstest-OST0181-osc-MDT0000@172.20.2.185@o2ib500:7/4 lens 224/368 e 0 to 0 dl 0 ref 1 fl Rpc:/0/ffffffff rc 0/-1
LustreError: 32690:0:(osp_precreate.c:116:osp_statfs_interpret()) lstest-OST0182-osc-MDT0000: couldn&apos;t update statfs: rc = -5
LustreError: 32689:0:(client.c:1116:ptlrpc_import_delay_req()) Skipped 253 previous similar messages
Lustre: server umount lstest-MDT0000 complete
LustreError: 33405:0:(obd_mount.c:2985:lustre_fill_super()) Unable to mount  (-17)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Is is still worth rebooting, and trying again?&lt;/p&gt;</comment>
                            <comment id="46184" author="bzzz" created="Mon, 8 Oct 2012 13:04:22 +0000"  >&lt;p&gt;well, sorry you&apos;re seeing this... could you please try again and attach lustre log to the ticket ?&lt;/p&gt;
</comment>
                            <comment id="46186" author="prakash" created="Mon, 8 Oct 2012 13:13:22 +0000"  >&lt;p&gt;Rebooted and collected the lustre log file.&lt;/p&gt;</comment>
                            <comment id="46187" author="bzzz" created="Mon, 8 Oct 2012 13:21:05 +0000"  >&lt;p&gt;thanks. I see the root cause.. working on the fix.&lt;/p&gt;</comment>
                            <comment id="46199" author="bzzz" created="Mon, 8 Oct 2012 15:36:35 +0000"  >&lt;p&gt;Prakash, please try with &lt;a href=&quot;http://review.whamcloud.com/#change,4227&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4227&lt;/a&gt; &lt;/p&gt;

&lt;p&gt;if I understand right, failover nid for MDS was specified at mkfs.lustre time, not added later ?&lt;/p&gt;</comment>
                            <comment id="46201" author="prakash" created="Mon, 8 Oct 2012 15:57:18 +0000"  >&lt;p&gt;Actually, I&apos;m not certain of that. At one point a failover NID was added using a writeconf, but the filesystem was reformatted since then. During the reformat, I&apos;m unsure if both the failover NIDs were specified at mkfs time, or the writeconf method was used after mkfs. I can try to track down that information if it is useful..?&lt;/p&gt;</comment>
                            <comment id="46202" author="bzzz" created="Mon, 8 Oct 2012 16:09:35 +0000"  >&lt;p&gt;one way is to fetch /CONFIGS/lstest-client file from MDS and parse it with llog_reader utility.&lt;br/&gt;
it would help us if you attach it to the ticket as well. thanks.&lt;/p&gt;</comment>
                            <comment id="46205" author="prakash" created="Mon, 8 Oct 2012 16:20:51 +0000"  >&lt;p&gt;Here you go. The dump of&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grove-mds2 /mnt/grove-mds2/mgs &amp;gt; llog_reader CONFIGS/lstest-client &amp;gt; lstest-client.llogreader
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="46208" author="bzzz" created="Mon, 8 Oct 2012 16:59:33 +0000"  >&lt;p&gt;#12 (128)attach    0:lstest-MDT0000-mdc  1:mdc  2:lstest-clilmv_UUID&lt;br/&gt;
...  &lt;br/&gt;
#20 (088)add_uuid  nid=172.20.2.185@o2ib500(0x501f4ac1402b9)  0:  1:172.20.2.185@o2ib500  &lt;br/&gt;
#21 (088)add_uuid  nid=172.20.2.185@tcp(0x20000ac1402b9)  0:  1:172.20.2.185@o2ib500  &lt;/p&gt;

&lt;p&gt;#21 resulted in a second instance of OSP device.&lt;/p&gt;

&lt;p&gt;I think the patch above should help with the issue.&lt;/p&gt;</comment>
                            <comment id="46211" author="morrone" created="Mon, 8 Oct 2012 17:25:26 +0000"  >&lt;p&gt;Ok, we&apos;ve pulled in patch  &lt;a href=&quot;http://review.whamcloud.com/#change,4227&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4227&lt;/a&gt; and will give it a try.&lt;/p&gt;</comment>
                            <comment id="46276" author="ian" created="Tue, 9 Oct 2012 16:26:22 +0000"  >&lt;p&gt;Patch landed to master.&lt;/p&gt;</comment>
                            <comment id="46426" author="bzzz" created="Fri, 12 Oct 2012 01:28:06 +0000"  >&lt;p&gt;can we close the ticket?&lt;/p&gt;</comment>
                            <comment id="46476" author="prakash" created="Fri, 12 Oct 2012 12:53:49 +0000"  >&lt;p&gt;Sure.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11955" name="LU-2110.llog.bz2" size="163042" author="prakash" created="Mon, 8 Oct 2012 13:13:22 +0000"/>
                            <attachment id="11956" name="lstest-client.llogreader.bz2" size="55251" author="prakash" created="Mon, 8 Oct 2012 16:20:51 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv6u7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4642</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>