<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:49:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12063] mktemp fails with ENOENT and MDS log reports lod_gen_component_ea() Can not locate [0x700000bd9:0x56:0x0]: rc = -2</title>
                <link>https://jira.whamcloud.com/browse/LU-12063</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;file create fails intermittently, errno is ENOENT.&lt;/p&gt;

&lt;p&gt;bash-4.2$ mktemp /p/lquake/faaland1/make-busy/mdt7/mdtest.enoent.XXXX  &lt;br/&gt;
mktemp: failed to create file via template &apos;/p/lquake/faaland1/make-busy/mdt7/mdtest.enoent.XXXX&apos;: No such file or directory&lt;br/&gt;
bash-4.2$ ls -l /p/lquake/faaland1/make-busy/mdt7&lt;br/&gt;
total 65&lt;br/&gt;
drwx------ 3 faaland1 faaland1 33280 Mar 12 12:24 mdtest.6qUohi&lt;br/&gt;
drwx------ 3 faaland1 faaland1 33280 Mar 12 12:24 mdtest.ldXmqW&lt;/p&gt;

&lt;p&gt;MDS console log reports:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[Tue Mar 12 13:42:47 2019] LustreError: 57771:0:(lod_lov.c:896:lod_gen_component_ea()) lquake-MDT0007-mdtlov: Can not locate [0x700000bd9:0x56:0x0]: rc = -2
[Tue Mar 12 13:42:47 2019] LustreError: 57771:0:(lod_lov.c:896:lod_gen_component_ea()) Skipped 6 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>Lustre 2.12.0_1.chaos_2_g3ee692e&lt;br/&gt;
kernel 3.10.0-957.5.1.3chaos.ch6.x86_64&lt;br/&gt;
distro RHEL 7.6 derivative&lt;br/&gt;
backend zfs v0.7.11-5llnl</environment>
        <key id="55144">LU-12063</key>
            <summary>mktemp fails with ENOENT and MDS log reports lod_gen_component_ea() Can not locate [0x700000bd9:0x56:0x0]: rc = -2</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="pfarrell">Patrick Farrell</assignee>
                                    <reporter username="ofaaland">Olaf Faaland</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Tue, 12 Mar 2019 21:05:14 +0000</created>
                <updated>Mon, 15 Jul 2019 21:50:26 +0000</updated>
                            <resolved>Mon, 15 Jul 2019 21:50:26 +0000</resolved>
                                    <version>Lustre 2.12.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="243768" author="ofaaland" created="Tue, 12 Mar 2019 21:08:00 +0000"  >&lt;p&gt;The lustre patch stack on both client and server is:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;* 3ee692e (HEAD, 2.12.0-llnl) TOSS-4431 build: build ldiskfs only for x86_64
* e3844bf LU-11827 llog: protect cathandle in llog_cat_declare_add_rec
* 13a3da2 (tag: 2.12.0_1.chaos, llnlstash/2.12.0-llnl) llnl: disable ldiskfs build under rpmbuild
* 7308687 build: no zlib check during configure --enable-dist
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="243769" author="ofaaland" created="Tue, 12 Mar 2019 21:10:38 +0000"  >&lt;p&gt;lfs getdirstripe and getstripe output:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;bash-4.2$ lfs getdirstripe /p/lquake/faaland1/make-busy/mdt7
lmv_stripe_count: 0 lmv_stripe_offset: 7 lmv_hash_type: none
bash-4.2$ lfs getstripe /p/lquake/faaland1/make-busy/mdt7
/p/lquake/faaland1/make-busy/mdt7
stripe_count:  1 stripe_size:   1048576 pattern:       0 stripe_offset: -1

/p/lquake/faaland1/make-busy/mdt7/mdtest.6qUohi
stripe_count:  1 stripe_size:   1048576 pattern:       0 stripe_offset: -1

/p/lquake/faaland1/make-busy/mdt7/mdtest.ldXmqW
stripe_count:  1 stripe_size:   1048576 pattern:       0 stripe_offset: -1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="243770" author="ofaaland" created="Tue, 12 Mar 2019 21:11:56 +0000"  >&lt;p&gt;Note that neither of the two subdirs of mdt7 are the one mktemp tried to create - they both already existed, as show by the mdtest artifacts they contain:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;bash-4.2$ ls /p/lquake/faaland1/make-busy/mdt7/*/
/p/lquake/faaland1/make-busy/mdt7/mdtest.6qUohi/:
#test-dir.0

/p/lquake/faaland1/make-busy/mdt7/mdtest.ldXmqW/:
#test-dir.0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="243774" author="pfarrell" created="Tue, 12 Mar 2019 21:27:46 +0000"  >&lt;p&gt;Olaf,&lt;/p&gt;

&lt;p&gt;Anything special about that mktemp script?&lt;/p&gt;

&lt;p&gt;And can you provide dmesg from the MDSes serving up the root (MDT0) and MDT0007?&lt;/p&gt;</comment>
                            <comment id="243775" author="ofaaland" created="Tue, 12 Mar 2019 21:52:07 +0000"  >&lt;p&gt;Hi Patrick,&lt;/p&gt;

&lt;p&gt;The mktemp used is the utility packaged with RHEL.  The tar file attached, lu-12063-2.tar.gz, has dmesg for the client (opal110), MDS with MDT0 (jet1), and MDT0007 (jet8), as well as the debug logs for each of those.  The debug mask was default on the client and -1 on the servers, I believe.&lt;/p&gt;</comment>
                            <comment id="243849" author="pfarrell" created="Wed, 13 Mar 2019 15:49:44 +0000"  >&lt;p&gt;Thanks, Olaf!&lt;/p&gt;

&lt;p&gt;Can you check your other MDS/MDT dmesg logs for this sort of error?&#160; Searching for LustreError and then lod_gen_component_ea should do the trick.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt; [Tue Mar 12 13:42:47 2019] LustreError: 57771:0:(lod_lov.c:896:lod_gen_component_ea()) lquake-MDT0007-mdtlov: Can not locate [0x700000bd9:0x56:0x0]: rc = -2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The MDT0 dmesg leads me to think we&apos;ve got failures that are probably on other MDTs (ie other than MDT7), would be interesting to see.&lt;/p&gt;</comment>
                            <comment id="243850" author="pfarrell" created="Wed, 13 Mar 2019 15:57:37 +0000"  >&lt;p&gt;Hmm, actually, we probably &lt;b&gt;don&apos;t&lt;/b&gt; have such errors on the other MDTs&#160;- But I&apos;d love to know if we do.&lt;/p&gt;</comment>
                            <comment id="243853" author="pfarrell" created="Wed, 13 Mar 2019 16:30:04 +0000"  >&lt;p&gt;Olaf,&lt;/p&gt;

&lt;p&gt;mktemp without an option is not mkdir - it&apos;s creating a file.&#160; That matches with some of what I&apos;m seeing in the logs, and your previous report in the earlier ticket for the EINVAL (sorry for missing that).&lt;/p&gt;

&lt;p&gt;Do you have other cause to think you&apos;ve seen this with mkdir?&lt;/p&gt;</comment>
                            <comment id="243854" author="pfarrell" created="Wed, 13 Mar 2019 16:33:05 +0000"  >&lt;p&gt;Sorry for the flurry of updates...&lt;/p&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00080000:8.0:1552423369.592170:0:57771:0:(osp_object.c:1592:osp_create()) lquake-OST0004-osc-MDT0007: Wrote last used FID: [0x700000bd9:0x56:0x0], index 4: 0 &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Do you have logs, even just dmesg, from OST0004?&lt;/p&gt;</comment>
                            <comment id="243856" author="pfarrell" created="Wed, 13 Mar 2019 16:40:29 +0000"  >&lt;p&gt;Can you do fid2path and getstripe on a file on OST0004, and on a few other OSTs in the file system?&#160; (If possible, files on both MDT0 and another MDT.&#160; MDT7 would be great.)&lt;/p&gt;

&lt;p&gt;Basically, at least some of the time, we&apos;re failing to find the sequence associated with certain OSTs.&#160; Might just be OST0004 - I&apos;m still working at decoding the FID.&lt;/p&gt;</comment>
                            <comment id="243858" author="pfarrell" created="Wed, 13 Mar 2019 17:04:50 +0000"  >&lt;p&gt;New theory, based on errors seen so far:&lt;br/&gt;
Problem is specific to OST0004.&#160; Curious to know if you have persistent issues creating files there from some or all MDTs.&lt;/p&gt;</comment>
                            <comment id="243859" author="pfarrell" created="Wed, 13 Mar 2019 17:10:36 +0000"  >&lt;p&gt;OK, one more... Let&apos;s dump the FID sequence tables as viewed from MDT0, and also at least one remote MDT.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lctl get_param seq.*.*&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;That&apos;s going to give a lot&#160;of output, sorry.&lt;/p&gt;</comment>
                            <comment id="243865" author="ofaaland" created="Wed, 13 Mar 2019 20:01:25 +0000"  >&lt;p&gt;Hi Patrick,&lt;/p&gt;

&lt;p&gt;I lost the cluster again for a little while.  I hope to get it back within a couple days, and I&apos;ll fetch the FID sequence tables and try creates using specified individual OSTs then.&lt;/p&gt;

&lt;p&gt;I was mistaken about mkdir failing.  All slurm job logs report create failures, &lt;b&gt;no&lt;/b&gt; mkdir errors.  I mixed up the two problems.  I&apos;ve updated the summary to reflect that.&lt;/p&gt;

&lt;p&gt;I&apos;m attaching dmesg and lctl dk output from jet21, where OST0004 was running, as lu-12063-3.tar.gz.&lt;/p&gt;

&lt;p&gt;There is some noise in the logs from two routers which are down, NIDs with IPs 172.19.1.22 and 172.19.1.23.  They are for a system not actually running LNet or Lustre at the moment and they are not between jet and opal, so should be unrelated to this issue.&lt;/p&gt;

&lt;p&gt;thanks,&lt;br/&gt;
Olaf&lt;/p&gt;</comment>
                            <comment id="243867" author="pfarrell" created="Wed, 13 Mar 2019 21:08:30 +0000"  >&lt;p&gt;Thanks, Olaf.&#160; Too bad you&apos;re not able to get those tables &amp;amp; do that testing, it would tell us a lot.&#160; I&apos;ll look at the logs, but I suspect they&apos;re going to be clean.&#160; I think it&apos;s more likely there&apos;s something wrong on the MDS(es), but it&apos;s a little tricky to say what.&lt;/p&gt;</comment>
                            <comment id="251301" author="pfarrell" created="Fri, 12 Jul 2019 20:46:35 +0000"  >&lt;p&gt;Olaf,&lt;/p&gt;

&lt;p&gt;Have you seen this issue recently and/or had another chance to run this test?&lt;/p&gt;</comment>
                            <comment id="251441" author="ofaaland" created="Mon, 15 Jul 2019 21:50:15 +0000"  >&lt;p&gt;I am not seeing this anymore on Lustre 2.12.2.  Closing.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="32223" name="lu-12063-2.tar.gz" size="10416589" author="ofaaland" created="Tue, 12 Mar 2019 21:48:55 +0000"/>
                            <attachment id="32227" name="lu-12063-3.tar.gz" size="1542356" author="ofaaland" created="Wed, 13 Mar 2019 20:01:44 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00d8n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>