<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:48:12 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5062] LBUG: osc_req_attr_set</title>
                <link>https://jira.whamcloud.com/browse/LU-5062</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Cray has recently begun stress testing master clients in preparation for 2.6.0.&lt;/p&gt;

&lt;p&gt;We hit this bug on several nodes during a stress run last night:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;2014-05-13T23:00:06.485598-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) page@ffff880221781c00[4 ffff880222618600 3 0 1           (&lt;span class=&quot;code-keyword&quot;&gt;null&lt;/span&gt;) ffff88022328d740 0x0]
2014-05-13T23:00:06.485630-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) vvp-page@ffff880221781ca0(0:0:0) vm@ffffea000e1502a0 200000000000801 4:0 ffff880221781c00 0 lru
2014-05-13T23:00:06.485637-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) lov-page@ffff880221781cf8, raid0
2014-05-13T23:00:06.485672-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) osc-page@ffff880221781d60 0: 1&amp;lt; 0x845fed 1 0 + + &amp;gt; 2&amp;lt; 0 0 4096 0x7 0x8 |           (&lt;span class=&quot;code-keyword&quot;&gt;null&lt;/span&gt;) ffff88044ad844f8 ffff88044d7a59e8 &amp;gt; 3&amp;lt; + ffff88044e055080 1 16 0 &amp;gt; 4&amp;lt; 0 0 8 35586048 - | - - - + &amp;gt; 5&amp;lt; - - - + | 0 - | 0 - -&amp;gt;
2014-05-13T23:00:06.485681-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) end page@ffff880221781c00
2014-05-13T23:00:06.485689-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:824:osc_req_attr_set()) dump uncover page!
2014-05-13T23:00:06.485700-05:00 c1-0c0s2n1 Pid: 14298, comm: nsystst
2014-05-13T23:00:06.485710-05:00 c1-0c0s2n1 Call Trace:
2014-05-13T23:00:06.485724-05:00 c1-0c0s2n1 [&amp;lt;ffffffff81005eb9&amp;gt;] try_stack_unwind+0x169/0x1b0
2014-05-13T23:00:06.485746-05:00 c1-0c0s2n1 [&amp;lt;ffffffff81004919&amp;gt;] dump_trace+0x89/0x450
2014-05-13T23:00:06.485757-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa023b8d7&amp;gt;] libcfs_debug_dumpstack+0x57/0x80 [libcfs]
2014-05-13T23:00:06.485784-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa092f348&amp;gt;] osc_req_attr_set+0x6b8/0x740 [osc]
2014-05-13T23:00:06.485796-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa038dbf1&amp;gt;] cl_req_attr_set+0xd1/0x220 [obdclass]
2014-05-13T23:00:06.485807-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa091a6da&amp;gt;] osc_build_rpc+0x4fa/0x15d0 [osc]
2014-05-13T23:00:06.485817-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa093c199&amp;gt;] osc_io_unplug0+0x1419/0x2020 [osc]
2014-05-13T23:00:06.485837-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa093ddb1&amp;gt;] osc_io_unplug+0x11/0x20 [osc]
2014-05-13T23:00:06.485844-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa093dfa1&amp;gt;] osc_queue_sync_pages+0x1e1/0x380 [osc]
2014-05-13T23:00:06.485852-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa092f90d&amp;gt;] osc_io_submit+0x2dd/0x4e0 [osc]
2014-05-13T23:00:06.485871-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa038d40e&amp;gt;] cl_io_submit_rw+0x6e/0x170 [obdclass]
2014-05-13T23:00:06.485881-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa076123e&amp;gt;] lov_io_submit+0x2ee/0x4d0 [lov]
2014-05-13T23:00:06.485894-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa038d40e&amp;gt;] cl_io_submit_rw+0x6e/0x170 [obdclass]
2014-05-13T23:00:06.485900-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa038f3fa&amp;gt;] cl_io_read_page+0x19a/0x1b0 [obdclass]
2014-05-13T23:00:06.485907-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa081bf7c&amp;gt;] ll_readpage+0x16c/0x1b0 [lustre]
2014-05-13T23:00:06.485920-05:00 c1-0c0s2n1 [&amp;lt;ffffffff810fdc68&amp;gt;] generic_file_aio_read+0x268/0x740
2014-05-13T23:00:06.485932-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa084e0b0&amp;gt;] vvp_io_read_start+0x2a0/0x430 [lustre]
2014-05-13T23:00:06.485938-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa038d6b2&amp;gt;] cl_io_start+0x72/0x140 [obdclass]
2014-05-13T23:00:06.485947-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa0391234&amp;gt;] cl_io_loop+0xb4/0x1b0 [obdclass]
2014-05-13T23:00:06.485959-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa07e9ccc&amp;gt;] ll_file_io_generic+0x58c/0x8a0 [lustre]
2014-05-13T23:00:06.485967-05:00 c1-0c0s2n1 [&amp;lt;ffffffffa07ea49c&amp;gt;] ll_file_aio_read+0x22c/0x290 [lustre]
2014-05-13T23:00:06.485994-05:00 c1-0c0s2n1 [&amp;lt;ffffffff8115517b&amp;gt;] do_sync_readv_writev+0xdb/0x120
2014-05-13T23:00:06.486000-05:00 c1-0c0s2n1 [&amp;lt;ffffffff81156084&amp;gt;] do_readv_writev+0xd4/0x1e0
2014-05-13T23:00:06.486010-05:00 c1-0c0s2n1 [&amp;lt;ffffffff811563c5&amp;gt;] vfs_readv+0x45/0x60
2014-05-13T23:00:06.486016-05:00 c1-0c0s2n1 [&amp;lt;ffffffff81156505&amp;gt;] sys_readv+0x55/0xc0
2014-05-13T23:00:06.486023-05:00 c1-0c0s2n1 [&amp;lt;ffffffff8142286b&amp;gt;] system_call_fastpath+0x16/0x1b
2014-05-13T23:00:06.486031-05:00 c1-0c0s2n1 [&amp;lt;000000002006d41b&amp;gt;] 0x2006d41b
2014-05-13T23:00:06.510785-05:00 c1-0c0s2n1 LustreError: 14298:0:(osc_io.c:826:osc_req_attr_set()) LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I&apos;ll make the dump+logs available shortly.&lt;/p&gt;

&lt;p&gt;I note this bug was seen by Oleg during patch review for the readahead changes in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3321&quot; title=&quot;2.x single thread/process throughput degraded from 1.8&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3321&quot;&gt;&lt;del&gt;LU-3321&lt;/del&gt;&lt;/a&gt;, see his comments here:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/8523/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/8523/&lt;/a&gt;&lt;/p&gt;


&lt;p&gt;I also note a previous instance of this assertion was fixed in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1650&quot; title=&quot;crash of lustre clients in osc_req_attr_set() routine&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1650&quot;&gt;&lt;del&gt;LU-1650&lt;/del&gt;&lt;/a&gt;:&lt;br/&gt;
&lt;a href=&quot;https://jira.hpdd.intel.com/browse/LU-1650&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://jira.hpdd.intel.com/browse/LU-1650&lt;/a&gt;&lt;/p&gt;</description>
                <environment>SLES11SP3 clients, master (v2_5_58_0-105-gb7f1952), Cray stress testing.</environment>
        <key id="24715">LU-5062</key>
            <summary>LBUG: osc_req_attr_set</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jay">Jinshan Xiong</assignee>
                                    <reporter username="paf">Patrick Farrell</reporter>
                        <labels>
                            <label>osc</label>
                            <label>readahead</label>
                    </labels>
                <created>Wed, 14 May 2014 20:10:59 +0000</created>
                <updated>Tue, 20 Jan 2015 21:58:51 +0000</updated>
                            <resolved>Tue, 24 Jun 2014 17:13:32 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                    <fixVersion>Lustre 2.5.4</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="84115" author="pjones" created="Wed, 14 May 2014 20:18:51 +0000"  >&lt;p&gt;Patrick&lt;/p&gt;

&lt;p&gt;How up to date is your master? Are any patches applied?&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="84119" author="paf" created="Wed, 14 May 2014 20:34:57 +0000"  >&lt;p&gt;Peter -&lt;/p&gt;

&lt;p&gt;Current as of mid-day yesterday, no patches.  We&apos;re doing this stress testing to try to help improve release quality, so we&apos;re doing it unmodified.&lt;/p&gt;

&lt;p&gt;Most recent commit is:&lt;/p&gt;

&lt;p&gt;commit b7f1952882e9e133b43acb22d010767c664539b9&lt;br/&gt;
Author: Mikhail Pershin &amp;lt;mike.pershin@intel.com&amp;gt;&lt;br/&gt;
Date:   Thu May 8 22:29:24 2014 +0400&lt;/p&gt;

&lt;p&gt;    &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4916&quot; title=&quot;mount failure when adding failover node to mkfs.lustre&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4916&quot;&gt;&lt;del&gt;LU-4916&lt;/del&gt;&lt;/a&gt; lwp: don&apos;t add connection for missed LWP&lt;/p&gt;

&lt;p&gt;    The LWP isn&apos;t needed for some MDT-MDT connections,&lt;br/&gt;
    e.g. for MDT0-MDT1, but lustre_lwp_add_conn() is called&lt;br/&gt;
    for any &apos;add mdc&apos; marker and cause error.&lt;/p&gt;

&lt;p&gt;    Patch add LWP connection only where it is needed.&lt;br/&gt;
&amp;#8212;&lt;/p&gt;

&lt;p&gt;Looking at commits since then, the only possibly related commit is one for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4977&quot; title=&quot;Deadlock in balance_dirty_pages()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4977&quot;&gt;&lt;del&gt;LU-4977&lt;/del&gt;&lt;/a&gt;, but that&apos;s not really down this code path.&lt;/p&gt;</comment>
                            <comment id="84120" author="paf" created="Wed, 14 May 2014 20:38:24 +0000"  >&lt;p&gt;Dump is up at:&lt;br/&gt;
ftp.whamcloud.com:/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5062&quot; title=&quot;LBUG: osc_req_attr_set&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5062&quot;&gt;&lt;del&gt;LU-5062&lt;/del&gt;&lt;/a&gt;/LU_5062_140514.tar.gz&lt;/p&gt;</comment>
                            <comment id="84195" author="jlevi" created="Thu, 15 May 2014 17:14:21 +0000"  >&lt;p&gt;Jinshan,&lt;br/&gt;
Can you please comment on this one?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="84203" author="jay" created="Thu, 15 May 2014 17:32:28 +0000"  >&lt;p&gt;Do you have patch &lt;a href=&quot;http://review.whamcloud.com/10220&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10220&lt;/a&gt; applied on your tree? Probably this is a known problem.&lt;/p&gt;</comment>
                            <comment id="84206" author="paf" created="Thu, 15 May 2014 18:09:17 +0000"  >&lt;p&gt;Jinshan,&lt;/p&gt;

&lt;p&gt;From the git history, it looks like we had that commit...  But it does look like it would fix the problem, so it&apos;s possible I had older source than I thought I did.  I will test again and if we continue to see this problem, I&apos;ll let you know.&lt;/p&gt;</comment>
                            <comment id="84208" author="jay" created="Thu, 15 May 2014 18:49:21 +0000"  >&lt;p&gt;Patrick, yes, please try it. Thank you very much for helping us to improve our release quality.&lt;/p&gt;</comment>
                            <comment id="84221" author="paf" created="Thu, 15 May 2014 23:23:13 +0000"  >&lt;p&gt;OK, same software environment (SLES11SP3 client, Cray stress testing), current master - 2.5.59 tag.&lt;/p&gt;

&lt;p&gt;Still hit it, and with the same test:&lt;/p&gt;

&lt;p&gt;2014-05-15T17:18:35.970514-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) page@ffff880b2372cc00&lt;span class=&quot;error&quot;&gt;&amp;#91;4 ffff880b2040eb10 3 0 1           (null) ffff881037c8ac00 0x0&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:35.997237-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) vvp-page@ffff880b2372cca0(0:0:0) vm@ffffea0038eaadb8 600000000000801 4:0 ffff880b2372cc00 0 lru&lt;br/&gt;
2014-05-15T17:18:36.022514-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) lov-page@ffff880b2372ccf8, raid0&lt;br/&gt;
2014-05-15T17:18:36.054138-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) osc-page@ffff880b2372cd60 0: 1&amp;lt; 0x845fed 1 0 + + &amp;gt; 2&amp;lt; 0 0 4096 0x7 0x8 |           (null) ffff881837716578 ffff880200914ea8 &amp;gt; 3&amp;lt; + ffff8810379f7800 1 16 0 &amp;gt; 4&amp;lt; 0 0 8 43520000 - | - - - + &amp;gt; 5&amp;lt; - - - + | 0 - | 0 - -&amp;gt;&lt;br/&gt;
2014-05-15T17:18:36.079394-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) end page@ffff880b2372cc00&lt;br/&gt;
2014-05-15T17:18:36.104691-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:824:osc_req_attr_set()) dump uncover page!&lt;br/&gt;
2014-05-15T17:18:36.104732-05:00 c0-0c2s0n3 Pid: 28354, comm: nsystst&lt;br/&gt;
2014-05-15T17:18:36.104746-05:00 c0-0c2s0n3 Call Trace:&lt;br/&gt;
2014-05-15T17:18:36.104756-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81005eb9&amp;gt;&amp;#93;&lt;/span&gt; try_stack_unwind+0x169/0x1b0&lt;br/&gt;
2014-05-15T17:18:36.129940-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81004939&amp;gt;&amp;#93;&lt;/span&gt; dump_trace+0x89/0x450&lt;br/&gt;
2014-05-15T17:18:36.129981-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa02218c7&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x57/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.129993-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0918348&amp;gt;&amp;#93;&lt;/span&gt; osc_req_attr_set+0x6b8/0x740 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.155180-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0374be1&amp;gt;&amp;#93;&lt;/span&gt; cl_req_attr_set+0xd1/0x220 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.155220-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa09036da&amp;gt;&amp;#93;&lt;/span&gt; osc_build_rpc+0x4fa/0x15d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.180416-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0925199&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug0+0x1419/0x2020 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.180456-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0926db1&amp;gt;&amp;#93;&lt;/span&gt; osc_io_unplug+0x11/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.180467-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0926fa1&amp;gt;&amp;#93;&lt;/span&gt; osc_queue_sync_pages+0x1e1/0x380 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.205619-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa091890d&amp;gt;&amp;#93;&lt;/span&gt; osc_io_submit+0x2dd/0x4e0 &lt;span class=&quot;error&quot;&gt;&amp;#91;osc&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.205659-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03743fe&amp;gt;&amp;#93;&lt;/span&gt; cl_io_submit_rw+0x6e/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.230909-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa074a23e&amp;gt;&amp;#93;&lt;/span&gt; lov_io_submit+0x2ee/0x4d0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lov&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.230948-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03743fe&amp;gt;&amp;#93;&lt;/span&gt; cl_io_submit_rw+0x6e/0x170 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.256143-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03763ea&amp;gt;&amp;#93;&lt;/span&gt; cl_io_read_page+0x19a/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.256185-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0804f4c&amp;gt;&amp;#93;&lt;/span&gt; ll_readpage+0x16c/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.256195-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810ffec8&amp;gt;&amp;#93;&lt;/span&gt; generic_file_aio_read+0x268/0x740&lt;br/&gt;
2014-05-15T17:18:36.281326-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0837030&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_read_start+0x2a0/0x430 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.281367-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa03746a2&amp;gt;&amp;#93;&lt;/span&gt; cl_io_start+0x72/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.306578-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0378224&amp;gt;&amp;#93;&lt;/span&gt; cl_io_loop+0xb4/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.306618-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07d2ccc&amp;gt;&amp;#93;&lt;/span&gt; ll_file_io_generic+0x58c/0x8a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.306630-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07d349c&amp;gt;&amp;#93;&lt;/span&gt; ll_file_aio_read+0x22c/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.331852-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa07d3e45&amp;gt;&amp;#93;&lt;/span&gt; ll_file_read+0x1e5/0x270 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
2014-05-15T17:18:36.331893-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811585f8&amp;gt;&amp;#93;&lt;/span&gt; vfs_read+0xc8/0x180&lt;br/&gt;
2014-05-15T17:18:36.331904-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811587b5&amp;gt;&amp;#93;&lt;/span&gt; sys_read+0x55/0x90&lt;br/&gt;
2014-05-15T17:18:36.357172-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81427a2b&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;br/&gt;
2014-05-15T17:18:36.357219-05:00 c0-0c2s0n3 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;000000002001d680&amp;gt;&amp;#93;&lt;/span&gt; 0x2001d680&lt;br/&gt;
2014-05-15T17:18:36.357233-05:00 c0-0c2s0n3 LustreError: 28354:0:(osc_io.c:826:osc_req_attr_set()) LBUG&lt;/p&gt;</comment>
                            <comment id="84222" author="jay" created="Fri, 16 May 2014 00:06:16 +0000"  >&lt;p&gt;I will look at this issue.&lt;/p&gt;</comment>
                            <comment id="84223" author="jay" created="Fri, 16 May 2014 00:49:40 +0000"  >&lt;p&gt;Hi Patrick, can you please upload one more crash dump for me? It looks like the previous upload has problem and I can&apos;t extract lustre log from there. Please make sure the ko files match the running code.&lt;/p&gt;</comment>
                            <comment id="84225" author="paf" created="Fri, 16 May 2014 01:35:28 +0000"  >&lt;p&gt;Ugh, sorry for the incorrect KO files...  There&apos;s a small issue in our automated dump gathering.&lt;/p&gt;

&lt;p&gt;I did this one by hand and checked that the KOs are right.  This is the dump for the report I just made a few hours ago:&lt;/p&gt;

&lt;p&gt;ftp.whamcloud.com&lt;br/&gt;
uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5062&quot; title=&quot;LBUG: osc_req_attr_set&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5062&quot;&gt;&lt;del&gt;LU-5062&lt;/del&gt;&lt;/a&gt;/LU_5062_140515.tar.gz&lt;/p&gt;

&lt;p&gt;&lt;del&gt;The timer is saying 9 minutes to complete the upload.&lt;/del&gt;&lt;br/&gt;
Upload complete.&lt;/p&gt;

&lt;p&gt;Thank you for taking a look.&lt;/p&gt;</comment>
                            <comment id="84259" author="jay" created="Fri, 16 May 2014 16:49:44 +0000"  >&lt;p&gt;Hi Patrick,&lt;/p&gt;

&lt;p&gt;I still can&apos;t extract lustre log from the crash dump file. This is what I got when I was trying to extract it.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; extend /home/jay/crashdump/lustre.so
/home/jay/crashdump/lustre.so: shared object loaded
crash&amp;gt; lustre -l x
lustre_walk_cpus(0, 5, 1)
cmd:	p (*cfs_trace_data[0])[0].tcd.tcd_cur_pages
	p (*cfs_trace_data[0])[0].tcd.tcd_pages.next
lustre: gdb request failed: &quot;p (*cfs_trace_data[0])[0].tcd.tcd_cur_pages&quot;
crash&amp;gt; p cfs_trace_data
cfs_trace_data = $7 = 
 {0x1400000004, 0x554e4700000003, 0x5b9a0443e17b2baa, 0xcd0dd360156499dc, 0x5abd76e8, 0x0, 0xffffffffa022c9e0,
 0xffffffffa024f201}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;cfs_trace_data should be an array of pointers. However, the memory cfs_trace_data pointing to seems to be mangled. Can you run crash on your local side to see if you can get the log? Otherwise, please check if the dump gathering script is correct.&lt;/p&gt;</comment>
                            <comment id="84281" author="paf" created="Fri, 16 May 2014 18:32:02 +0000"  >&lt;p&gt;Jinshan,&lt;/p&gt;

&lt;p&gt;Hmm.  The problem might be SLES11SP3...  Xyratex modified lustre.so so it could get logs out.  I&apos;m not familiar with what they changed.  (We also had to update our version of Crash to something more recent than we wer eusing.)  I&apos;ll attach that file as well.&lt;/p&gt;



&lt;p&gt;In any case, I was able to extract the logs.  I&apos;ll note there did appear to be a few corrupt messages (I got some error messages when using lctl to process the binary logs), so I&apos;ll include the binary logs as well as the processed ones.  Attaching in a moment.&lt;/p&gt;</comment>
                            <comment id="84282" author="paf" created="Fri, 16 May 2014 18:32:38 +0000"  >&lt;p&gt;log is the raw binary log, log.sort is the processed log.&lt;/p&gt;</comment>
                            <comment id="84297" author="jay" created="Fri, 16 May 2014 20:06:20 +0000"  >&lt;p&gt;It turned out that this is a race when a file with empty layout turned into raid0 layout.&lt;/p&gt;</comment>
                            <comment id="86448" author="jlevi" created="Thu, 12 Jun 2014 18:12:47 +0000"  >&lt;p&gt;Jinshan,&lt;br/&gt;
Are you planning to make a patch for this?&lt;/p&gt;</comment>
                            <comment id="87023" author="paf" created="Thu, 19 Jun 2014 14:48:59 +0000"  >&lt;p&gt;Cray hit this again today &lt;span class=&quot;error&quot;&gt;&amp;#91;Update: Looking internally, we&amp;#39;ve hit it several times recently in testing.&amp;#93;&lt;/span&gt;.&lt;/p&gt;

&lt;p&gt;Also:  It looks like &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3729&quot; title=&quot;uncovered page in osc_req_attr_set&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3729&quot;&gt;&lt;del&gt;LU-3729&lt;/del&gt;&lt;/a&gt; may be a duplicate of this?  Though there have been several osc_req_attr_set cases fixed since that bug was opened, I believe.&lt;/p&gt;</comment>
                            <comment id="87061" author="jay" created="Thu, 19 Jun 2014 17:37:27 +0000"  >&lt;p&gt;I&apos;ll start to work on this.&lt;/p&gt;</comment>
                            <comment id="87109" author="jay" created="Thu, 19 Jun 2014 23:33:04 +0000"  >&lt;p&gt;Hi Patrick,&lt;/p&gt;

&lt;p&gt;Can you try this patch: &lt;a href=&quot;http://review.whamcloud.com/10760?&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/10760?&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="87121" author="paf" created="Fri, 20 Jun 2014 03:26:49 +0000"  >&lt;p&gt;Jinshan - While testing the patch, I hit the assertion from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4900&quot; title=&quot;cl_use_try()) ASSERTION( result != -38 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4900&quot;&gt;&lt;del&gt;LU-4900&lt;/del&gt;&lt;/a&gt;, same stack trace as there as well:&lt;br/&gt;
2014-06-19T22:02:47.307062-05:00 c0-0c2s6n0 LustreError: 13331:0:(cl_lock.c:1122:cl_use_try()) ASSERTION( result != -38 ) failed:&lt;br/&gt;
2014-06-19T22:02:47.307096-05:00 c0-0c2s6n0 LustreError: 13331:0:(cl_lock.c:1122:cl_use_try()) LBUG&lt;/p&gt;

&lt;p&gt;It&apos;s probably just random, since that bug&apos;s known to exist, but I wanted to let you know in case there&apos;s a possible relationship.  It&apos;s worth noting I don&apos;t think we&apos;ve ever hit &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4900&quot; title=&quot;cl_use_try()) ASSERTION( result != -38 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4900&quot;&gt;&lt;del&gt;LU-4900&lt;/del&gt;&lt;/a&gt; before.  (Still is likely to be just chance.)&lt;/p&gt;</comment>
                            <comment id="87122" author="jay" created="Fri, 20 Jun 2014 03:31:29 +0000"  >&lt;p&gt;You used patch set 1, is that right?&lt;/p&gt;</comment>
                            <comment id="87126" author="paf" created="Fri, 20 Jun 2014 04:10:09 +0000"  >&lt;p&gt;Yes, I did, because I had it built and the system set up before I saw your response in Gerrit, and then I didn&apos;t realize the other code affected more than that sanity testing.  Oops.&lt;/p&gt;

&lt;p&gt;Having looked at &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4900&quot; title=&quot;cl_use_try()) ASSERTION( result != -38 ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4900&quot;&gt;&lt;del&gt;LU-4900&lt;/del&gt;&lt;/a&gt;, I understand now why you did what you did in patch set 2.  I see now that&apos;s called in the macro in cl_use_try.&lt;/p&gt;

&lt;p&gt;So it looks like your change to vvp_io_read_lock is getting this kind of cl_lock in to the cache, and then in to cl_use_try, where it asserts.&lt;br/&gt;
And since cl_use_try is only called for cached locks, we don&apos;t hit that assertion all the time...&lt;/p&gt;

&lt;p&gt;I&apos;ll do another run later with patch set 2.  Is the data from running only patch set 1 helpful in proving the vvp_io_read_lock part of the patch, or should I just ignore it? &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/wink.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt;&lt;/p&gt;

&lt;p&gt;Sorry about that.&lt;/p&gt;

&lt;p&gt;Just out of curiosity:  I&apos;m still a little puzzled.  If I&apos;m reading cl_use_try correctly, as long as there&apos;s at least one slice with a valid clo_use, rc shouldn&apos;t be ENOSYS, so no assertion.  Is the error case here a cl_lock with only a single slice, then?&lt;/p&gt;</comment>
                            <comment id="87137" author="jay" created="Fri, 20 Jun 2014 05:12:09 +0000"  >&lt;blockquote&gt;
&lt;p&gt;Just out of curiosity: I&apos;m still a little puzzled. If I&apos;m reading cl_use_try correctly, as long as there&apos;s at least one slice with a valid clo_use, rc shouldn&apos;t be ENOSYS, so no assertion. Is the error case here a cl_lock with only a single slice, then?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;For cl_lock of an empty file, it will have two slices: ccc and lov. However, neither of them has -&amp;gt;clo_use defined this is why cl_use_try() will see ENOSYS error.&lt;/p&gt;

&lt;p&gt;I don&apos;t need the data from patch set 1, thanks.&lt;/p&gt;</comment>
                            <comment id="87388" author="pjones" created="Tue, 24 Jun 2014 17:13:32 +0000"  >&lt;p&gt;Landed for 2.6&lt;/p&gt;</comment>
                            <comment id="100269" author="gerrit" created="Mon, 1 Dec 2014 04:19:30 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/12139/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/12139/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5062&quot; title=&quot;LBUG: osc_req_attr_set&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5062&quot;&gt;&lt;del&gt;LU-5062&lt;/del&gt;&lt;/a&gt; llite: Solve a race to access lli_has_smd in read case&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_5&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 078dee1448c2995257923dc712943eab2c78e48e&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="26781">LU-5685</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="14959" name="log" size="709351" author="paf" created="Fri, 16 May 2014 18:32:38 +0000"/>
                            <attachment id="14958" name="log.sort" size="586817" author="paf" created="Fri, 16 May 2014 18:32:38 +0000"/>
                            <attachment id="14960" name="lustre-ext.so" size="135638" author="paf" created="Fri, 16 May 2014 18:32:38 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwmi7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13981</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>