<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:53:45 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5700] async IO LBUG obj-&gt;cob_transient_pages == 0</title>
                <link>https://jira.whamcloud.com/browse/LU-5700</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;LustreError: 58185:0:(rw26.c:474:ll_direct_IO_26()) ASSERTION( obj-&amp;gt;cob_transient_pages == 0 ) failed:&lt;br/&gt;
LustreError: 58184:0:(rw26.c:474:ll_direct_IO_26()) ASSERTION( obj-&amp;gt;cob_transient_pages == 0 ) failed:&lt;br/&gt;
LustreError: 58184:0:(rw26.c:474:ll_direct_IO_26()) LBUG&lt;br/&gt;
Pid: 58184, comm: sio&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057d895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057de97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c03044&amp;gt;&amp;#93;&lt;/span&gt; ll_direct_IO_26+0xb64/0x1140 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
LustreError: 58186:0:(rw26.c:474:ll_direct_IO_26()) ASSERTION( obj-&amp;gt;cob_transient_pages == 0 ) failed:&lt;br/&gt;
LustreError: 58186:0:(rw26.c:474:ll_direct_IO_26()) LBUG&lt;br/&gt;
Pid: 58186, comm: sio&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81058bd3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112158b&amp;gt;&amp;#93;&lt;/span&gt; generic_file_aio_read+0x6bb/0x700&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811b4b5f&amp;gt;&amp;#93;&lt;/span&gt; ? list_move+0x1f/0x30&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811b51ff&amp;gt;&amp;#93;&lt;/span&gt; ? __mark_inode_dirty+0x13f/0x160&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811a59cd&amp;gt;&amp;#93;&lt;/span&gt; ? touch_atime+0x14d/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c173be&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_read_start+0x22e/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057d895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057de97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
LustreError: 58183:0:(rw26.c:474:ll_direct_IO_26()) ASSERTION( obj-&amp;gt;cob_transient_pages == 0 ) failed:&lt;br/&gt;
LustreError: 58183:0:(rw26.c:474:ll_direct_IO_26()) LBUG&lt;br/&gt;
Pid: 58183, comm: sio&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c03044&amp;gt;&amp;#93;&lt;/span&gt; ll_direct_IO_26+0xb64/0x1140 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057d895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81058bd3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057de97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112158b&amp;gt;&amp;#93;&lt;/span&gt; generic_file_aio_read+0x6bb/0x700&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f14da&amp;gt;&amp;#93;&lt;/span&gt; cl_io_start+0x6a/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81078fa7&amp;gt;&amp;#93;&lt;/span&gt; ? current_fs_time+0x27/0x30&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bce4d5&amp;gt;&amp;#93;&lt;/span&gt; ? ll_inode_size_unlock+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f5664&amp;gt;&amp;#93;&lt;/span&gt; cl_io_loop+0xb4/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb3f41&amp;gt;&amp;#93;&lt;/span&gt; ll_file_io_generic+0x481/0xaa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0d6e5&amp;gt;&amp;#93;&lt;/span&gt; ? ccc_object_size_unlock+0x35/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb4da0&amp;gt;&amp;#93;&lt;/span&gt; ll_file_aio_read+0x130/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811a58f1&amp;gt;&amp;#93;&lt;/span&gt; ? touch_atime+0x71/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb5079&amp;gt;&amp;#93;&lt;/span&gt; ll_file_read+0x159/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c173be&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_read_start+0x22e/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c03044&amp;gt;&amp;#93;&lt;/span&gt; ll_direct_IO_26+0xb64/0x1140 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811894d5&amp;gt;&amp;#93;&lt;/span&gt; vfs_read+0xb5/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81189802&amp;gt;&amp;#93;&lt;/span&gt; sys_pread64+0x82/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100b072&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;/p&gt;

&lt;p&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81058bd3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112158b&amp;gt;&amp;#93;&lt;/span&gt; generic_file_aio_read+0x6bb/0x700&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81078fa7&amp;gt;&amp;#93;&lt;/span&gt; ? current_fs_time+0x27/0x30&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bce4d5&amp;gt;&amp;#93;&lt;/span&gt; ? ll_inode_size_unlock+0x15/0x20 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c0d6e5&amp;gt;&amp;#93;&lt;/span&gt; ? ccc_object_size_unlock+0x35/0x40 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811a58f1&amp;gt;&amp;#93;&lt;/span&gt; ? touch_atime+0x71/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c173be&amp;gt;&amp;#93;&lt;/span&gt; vvp_io_read_start+0x22e/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f14da&amp;gt;&amp;#93;&lt;/span&gt; cl_io_start+0x6a/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f5664&amp;gt;&amp;#93;&lt;/span&gt; cl_io_loop+0xb4/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb3f41&amp;gt;&amp;#93;&lt;/span&gt; ll_file_io_generic+0x481/0xaa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb4da0&amp;gt;&amp;#93;&lt;/span&gt; ll_file_aio_read+0x130/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb5079&amp;gt;&amp;#93;&lt;/span&gt; ll_file_read+0x159/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811894d5&amp;gt;&amp;#93;&lt;/span&gt; vfs_read+0xb5/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81189802&amp;gt;&amp;#93;&lt;/span&gt; sys_pread64+0x82/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100b072&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;/p&gt;

&lt;p&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f14da&amp;gt;&amp;#93;&lt;/span&gt; cl_io_start+0x6a/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f5664&amp;gt;&amp;#93;&lt;/span&gt; cl_io_loop+0xb4/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb3f41&amp;gt;&amp;#93;&lt;/span&gt; ll_file_io_generic+0x481/0xaa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb4da0&amp;gt;&amp;#93;&lt;/span&gt; ll_file_aio_read+0x130/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb5079&amp;gt;&amp;#93;&lt;/span&gt; ll_file_read+0x159/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811894d5&amp;gt;&amp;#93;&lt;/span&gt; vfs_read+0xb5/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81189802&amp;gt;&amp;#93;&lt;/span&gt; sys_pread64+0x82/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100b072&amp;gt;&amp;#93;&lt;/span&gt; system_call_fastpath+0x16/0x1b&lt;/p&gt;

&lt;p&gt;Kernel panic - not syncing: LBUG&lt;br/&gt;
Pid: 58184, comm: sio Not tainted 2.6.32-431.20.3.hz100.el6.x86_64 #1&lt;br/&gt;
Call Trace:&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81528067&amp;gt;&amp;#93;&lt;/span&gt; ? panic+0xa7/0x16f&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa057deeb&amp;gt;&amp;#93;&lt;/span&gt; ? lbug_with_loc+0x9b/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c03044&amp;gt;&amp;#93;&lt;/span&gt; ? ll_direct_IO_26+0xb64/0x1140 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81058bd3&amp;gt;&amp;#93;&lt;/span&gt; ? __wake_up+0x53/0x70&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8112158b&amp;gt;&amp;#93;&lt;/span&gt; ? generic_file_aio_read+0x6bb/0x700&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811b4b5f&amp;gt;&amp;#93;&lt;/span&gt; ? list_move+0x1f/0x30&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811b51ff&amp;gt;&amp;#93;&lt;/span&gt; ? __mark_inode_dirty+0x13f/0x160&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811a59cd&amp;gt;&amp;#93;&lt;/span&gt; ? touch_atime+0x14d/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0c173be&amp;gt;&amp;#93;&lt;/span&gt; ? vvp_io_read_start+0x22e/0x450 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f14da&amp;gt;&amp;#93;&lt;/span&gt; ? cl_io_start+0x6a/0x140 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa06f5664&amp;gt;&amp;#93;&lt;/span&gt; ? cl_io_loop+0xb4/0x1b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb3f41&amp;gt;&amp;#93;&lt;/span&gt; ? ll_file_io_generic+0x481/0xaa0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb4da0&amp;gt;&amp;#93;&lt;/span&gt; ? ll_file_aio_read+0x130/0x2b0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0bb5079&amp;gt;&amp;#93;&lt;/span&gt; ? ll_file_read+0x159/0x290 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff811894d5&amp;gt;&amp;#93;&lt;/span&gt; ? vfs_read+0xb5/0x1a0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81189802&amp;gt;&amp;#93;&lt;/span&gt; ? sys_pread64+0x82/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100b072&amp;gt;&amp;#93;&lt;/span&gt; ? system_call_fastpath+0x16/0x1b&lt;/p&gt;</description>
                <environment></environment>
        <key id="26816">LU-5700</key>
            <summary>async IO LBUG obj-&gt;cob_transient_pages == 0</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="schamp">Stephen Champion</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Thu, 2 Oct 2014 09:59:50 +0000</created>
                <updated>Mon, 20 Oct 2014 22:10:12 +0000</updated>
                            <resolved>Mon, 20 Oct 2014 22:10:12 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="95500" author="schamp" created="Thu, 2 Oct 2014 10:18:29 +0000"  >&lt;p&gt;This is easy to reproduce using sio on an 8 socket UV system with 160 threads and 4 tb of memory.  I have been unable to reproduce this on a smaller configuration thus far.&lt;/p&gt;

&lt;p&gt;root@cy024-4-sys:/mnt/cy024/schamp # ./sio -tDcw -b 2048 -A 16 -s 83865632 -g /mnt/cy024/schamp/sio.1&lt;br/&gt;
Warning: filesize adjusted to 83865600 blocks&lt;br/&gt;
&amp;lt;crash&amp;gt;&lt;/p&gt;

&lt;p&gt;3 OSS with five OST each, stripe size -1 :&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;lfs getstripe /mnt/cy024-mnt/schamp/sio.1&lt;br/&gt;
/mnt/cy024-mnt/schamp/sio.1&lt;br/&gt;
lmm_stripe_count:   15&lt;br/&gt;
lmm_stripe_size:    1048576&lt;br/&gt;
lmm_pattern:        1&lt;br/&gt;
lmm_layout_gen:     0&lt;br/&gt;
lmm_stripe_offset:  7&lt;br/&gt;
        obdidx           objid           objid           group&lt;br/&gt;
             7            6276         0x1884                0&lt;br/&gt;
            11            6276         0x1884                0&lt;br/&gt;
            15            6276         0x1884                0&lt;br/&gt;
             2            6276         0x1884                0&lt;br/&gt;
            13            6276         0x1884                0&lt;br/&gt;
             9            6276         0x1884                0&lt;br/&gt;
             6            6276         0x1884                0&lt;br/&gt;
            10            6276         0x1884                0&lt;br/&gt;
            12            6276         0x1884                0&lt;br/&gt;
             4            6276         0x1884                0&lt;br/&gt;
             1            6276         0x1884                0&lt;br/&gt;
             0            6276         0x1884                0&lt;br/&gt;
            14            6276         0x1884                0&lt;br/&gt;
             5            6276         0x1884                0&lt;br/&gt;
             8            6276         0x1884                0&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;We have cores if they are useful.&lt;/p&gt;

&lt;p&gt;Olaf Weber&apos;s notes:&lt;/p&gt;

&lt;p&gt;There are four threads of interest, with stack traces like this showing they&apos;re doing direct IO.&lt;/p&gt;

&lt;p&gt;PID: 58183  TASK: ffff893de6382080  CPU: 151  COMMAND: &quot;sio&quot;&lt;br/&gt;
 #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fed9b0&amp;#93;&lt;/span&gt; panic at ffffffff81528001&lt;br/&gt;
 #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1feda30&amp;#93;&lt;/span&gt; lbug_with_loc at ffffffffa057deeb &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1feda50&amp;#93;&lt;/span&gt; ll_direct_IO_26 at ffffffffa0c03044 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedb80&amp;#93;&lt;/span&gt; generic_file_aio_read at ffffffff8112158b&lt;br/&gt;
 #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedc60&amp;#93;&lt;/span&gt; vvp_io_read_start at ffffffffa0c173be &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedcd0&amp;#93;&lt;/span&gt; cl_io_start at ffffffffa06f14da &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedd00&amp;#93;&lt;/span&gt; cl_io_loop at ffffffffa06f5664 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedd30&amp;#93;&lt;/span&gt; ll_file_io_generic at ffffffffa0bb3f41 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fede20&amp;#93;&lt;/span&gt; ll_file_aio_read at ffffffffa0bb4da0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fede80&amp;#93;&lt;/span&gt; ll_file_read at ffffffffa0bb5079 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
#10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedef0&amp;#93;&lt;/span&gt; vfs_read at ffffffff811894d5&lt;br/&gt;
#11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedf30&amp;#93;&lt;/span&gt; sys_pread64 at ffffffff81189802&lt;br/&gt;
#12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff893dd1fedf80&amp;#93;&lt;/span&gt; system_call_fastpath at ffffffff8100b072&lt;br/&gt;
    RIP: 00007ffff7495e13  RSP: 00007ffff7ff8dd8  RFLAGS: 00010206&lt;br/&gt;
    RAX: 0000000000000011  RBX: ffffffff8100b072  RCX: 0000000000000000&lt;br/&gt;
    RDX: 0000000000800000  RSI: 00007ffff699c000  RDI: 0000000000000003&lt;br/&gt;
    RBP: 0000000000606240   R8: 00007ffff7ff9700   R9: 00007ffff7ff9700&lt;br/&gt;
    R10: 0000000000000000  R11: 0000000000000293  R12: 00007ffff7ff9700&lt;br/&gt;
    R13: 0000000000000003  R14: 000000000061f3e8  R15: 0000000000000003&lt;br/&gt;
    ORIG_RAX: 0000000000000011  CS: 0033  SS: 002b&lt;/p&gt;

&lt;p&gt;Approximately they&apos;re here:&lt;/p&gt;

&lt;p&gt;lustre/llite/rw26.c&lt;br/&gt;
    373 static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,&lt;br/&gt;
    374                                const struct iovec *iov, loff_t file_offset,&lt;br/&gt;
    375                                unsigned long nr_segs)&lt;br/&gt;
    376 {&lt;br/&gt;
    377         struct lu_env *env;&lt;br/&gt;
    378         struct cl_io *io;&lt;br/&gt;
    379         struct file *file = iocb-&amp;gt;ki_filp;&lt;br/&gt;
    380         struct inode *inode = file-&amp;gt;f_mapping-&amp;gt;host;&lt;br/&gt;
    381         struct ccc_object *obj = cl_inode2ccc(inode);&lt;br/&gt;
    382         long count = iov_length(iov, nr_segs);&lt;br/&gt;
...&lt;br/&gt;
    433                         page_count = ll_get_user_pages(rw, user_addr, bytes,&lt;br/&gt;
    434                                                        &amp;amp;pages, &amp;amp;max_pages);&lt;br/&gt;
    435                         if (likely(page_count &amp;gt; 0)) {&lt;br/&gt;
    436                                 if (unlikely(page_count &amp;lt;  max_pages))&lt;br/&gt;
    437                                         bytes = page_count &amp;lt;&amp;lt; PAGE_CACHE_SHIFT;&lt;br/&gt;
    438                                 result = ll_direct_IO_26_seg(env, io, rw, inode,&lt;br/&gt;
    439                                                              file-&amp;gt;f_mapping,&lt;br/&gt;
    440                                                              bytes, file_offset,&lt;br/&gt;
    441                                                              pages, page_count);&lt;br/&gt;
    442                                 ll_free_user_pages(pages, max_pages, rw==READ);&lt;br/&gt;
...&lt;br/&gt;
    474 out:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;475         LASSERT(obj-&amp;gt;cob_transient_pages == 0);&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;They are all referencing the same obj&lt;/p&gt;

&lt;p&gt;crash&amp;gt; ccc_object.cob_transient_pages ffff88bdd31dd078&lt;br/&gt;
  cob_transient_pages = 36&lt;/p&gt;

&lt;p&gt;Looking at how cob_transient_pages is manipulated:&lt;/p&gt;

&lt;p&gt;lustre/include/lclient.h:&lt;br/&gt;
    194 /**&lt;br/&gt;
    195  * ccc-private object state.&lt;br/&gt;
    196  */&lt;br/&gt;
    197 struct ccc_object {&lt;br/&gt;
...&lt;br/&gt;
    210         /**&lt;br/&gt;
    211          * Access this counter is protected by inode-&amp;gt;i_sem. Now that&lt;br/&gt;
    212          * the lifetime of transient pages must be covered by inode sem,&lt;br/&gt;
    213          * we don&apos;t need to hold any lock..&lt;br/&gt;
    214          */&lt;br/&gt;
    215         int                     cob_transient_pages;&lt;/p&gt;

&lt;p&gt;Protected by i_sem? A very bad sign, as i_sem has been replaced by i_mutex for&lt;br/&gt;
quite some time now, and the i_mutex is not held across a read operation. Lustre&lt;br/&gt;
would have to do this itself.&lt;/p&gt;

&lt;p&gt;Which points to commit ed5ebb87bfc2b684958daac90c4369f395482a16, part of which is this:&lt;/p&gt;

&lt;p&gt;diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c&lt;br/&gt;
index b605fa4..b9c8293 100644&lt;br/&gt;
&amp;#8212; a/lustre/llite/rw26.c&lt;br/&gt;
+++ b/lustre/llite/rw26.c&lt;br/&gt;
@@ -412,13 +412,6 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,&lt;br/&gt;
         io = ccc_env_io(env)-&amp;gt;cui_cl.cis_io;&lt;br/&gt;
         LASSERT(io != NULL);&lt;/p&gt;

&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;/* 0. Need locking between buffered and direct access. and race with&lt;/li&gt;
	&lt;li&gt;*    size changing by concurrent truncates and writes.&lt;/li&gt;
	&lt;li&gt;* 1. Need inode mutex to operate transient pages.&lt;/li&gt;
	&lt;li&gt;*/&lt;/li&gt;
	&lt;li&gt;if (rw == READ)&lt;/li&gt;
	&lt;li&gt;mutex_lock(&amp;amp;inode-&amp;gt;i_mutex);&lt;br/&gt;
-&lt;br/&gt;
         LASSERT(obj-&amp;gt;cob_transient_pages == 0);&lt;br/&gt;
         for (seg = 0; seg &amp;lt; nr_segs; seg++) 
{
                 long iov_left = iov[seg].iov_len;
@@ -480,8 +473,6 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
         }
&lt;p&gt; out:&lt;br/&gt;
        LASSERT(obj-&amp;gt;cob_transient_pages == 0);&lt;/p&gt;&lt;/li&gt;
	&lt;li&gt;if (rw == READ)&lt;/li&gt;
	&lt;li&gt;mutex_unlock(&amp;amp;inode-&amp;gt;i_mutex);&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;         if (tot_bytes &amp;gt; 0) {&lt;br/&gt;
                struct ccc_io *cio = ccc_env_io(env);&lt;/p&gt;</comment>
                            <comment id="95501" author="olaf" created="Thu, 2 Oct 2014 10:24:25 +0000"  >&lt;p&gt;A bit more info from the commit header: the change was &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1669&quot; title=&quot;lli-&amp;gt;lli_write_mutex (single shared file performance)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1669&quot;&gt;&lt;del&gt;LU-1669&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;commit ed5ebb87bfc2b684958daac90c4369f395482a16&lt;br/&gt;
Author: Prakash Surya &amp;lt;surya1@llnl.gov&amp;gt;&lt;br/&gt;
Date:   Wed Oct 2 17:16:51 2013 -0700&lt;/p&gt;

&lt;p&gt;    &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1669&quot; title=&quot;lli-&amp;gt;lli_write_mutex (single shared file performance)&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1669&quot;&gt;&lt;del&gt;LU-1669&lt;/del&gt;&lt;/a&gt; vvp: Use lockless __generic_file_aio_write&lt;br/&gt;
...&lt;br/&gt;
    Change-Id: I0023132b5d941b3304f39f015f95106542998072&lt;br/&gt;
    Reviewed-on: &lt;a href=&quot;http://review.whamcloud.com/6672&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/6672&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="95513" author="pjones" created="Thu, 2 Oct 2014 14:02:32 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;Could you please advise on this issue?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="95629" author="schamp" created="Fri, 3 Oct 2014 14:56:16 +0000"  >&lt;p&gt;Our test system had another crash with normal (not async) DIO last night.  This led to &lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/12179/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/12179/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Which lets us get on with the performance evaluation we were doing.&lt;/p&gt;

&lt;p&gt;The patch removes the assertions and makes the counter atomic.  We are not convinced that this section is safe for multiple threads - it needs to be considered carefully.  The patch should not introduce any new bugs, but may expose something by not crashing.&lt;/p&gt;</comment>
                            <comment id="96753" author="jfc" created="Mon, 20 Oct 2014 21:58:34 +0000"  >&lt;p&gt;Stephen,&lt;br/&gt;
Your patch 12179 has been merged into master (October 10).&lt;/p&gt;

&lt;p&gt;Do you need any more work on this ticket, or can I mark it as resolved, fixed?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="96756" author="schamp" created="Mon, 20 Oct 2014 22:06:24 +0000"  >&lt;p&gt;Works for now.&lt;/p&gt;

&lt;p&gt;We&apos;re not entirely convinced that this section of code is safe for reentry, but this clearly fixed one problem, and we can open a new ticket if we identify another.&lt;/p&gt;</comment>
                            <comment id="96757" author="pjones" created="Mon, 20 Oct 2014 22:10:12 +0000"  >&lt;p&gt;ok thanks Steve&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="15301">LU-1669</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwxlz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15957</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>