<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:06:33 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-14066] dd oflag=direct writes hang when size is not a multiple of page size</title>
                <link>https://jira.whamcloud.com/browse/LU-14066</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;There is an issue with using direct io to overwrite a file with transfers that are not a multiple of the page size:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;k:~# cd /mnt/lustre
k:lustre# ls 
k:lustre# dd if=/dev/zero of=f0 oflag=direct bs=4K count=1 
1+0 records in
1+0 records out
4096 bytes (4.1 kB) copied, 0.0323935 s, 126 kB/s
k:lustre# dd if=/dev/zero of=f0 oflag=direct bs=4K count=1 
1+0 records in
1+0 records out
4096 bytes (4.1 kB) copied, 0.0207773 s, 197 kB/s
k:lustre# dd if=/dev/zero of=f0 oflag=direct bs=4K count=1 
1+0 records in
1+0 records out
4096 bytes (4.1 kB) copied, 0.0300933 s, 136 kB/s
k:lustre# /bin/truncate --size=1397 f0
k:lustre# dd if=f0 of=f1 oflag=direct bs=4K count=1 
0+1 records in
0+1 records out
1397 bytes (1.4 kB) copied, 0.0510133 s, 27.4 kB/s
k:lustre# dd if=f0 of=f1 oflag=direct bs=4K count=1 
# Hangs here in a 1397 byte write to f1 and resumes as follows.
dd: fsync failed for &#8216;f1&#8217;: Input/output error
0+1 records in
0+1 records out
1397 bytes (1.4 kB) copied, 100.296 s, 0.0 kB/s
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I tried adding a sync before overwriting but it did not change anything.&lt;/p&gt;

&lt;p&gt;Here are the stack traces I collected:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(&apos;26269&apos;, &apos;ldlm_bl_02&apos;)
[&amp;lt;ffffffffc0f2e328&amp;gt;] osc_extent_wait+0x528/0x750 [osc]
[&amp;lt;ffffffffc0f3068d&amp;gt;] osc_cache_wait_range+0x2dd/0x940 [osc]
[&amp;lt;ffffffffc0f3163e&amp;gt;] osc_cache_writeback_range+0x94e/0xfd0 [osc]
[&amp;lt;ffffffffc0f1cc05&amp;gt;] osc_lock_flush+0x195/0x290 [osc]
[&amp;lt;ffffffffc0f1d0e8&amp;gt;] osc_ldlm_blocking_ast+0x2f8/0x3e0 [osc]
[&amp;lt;ffffffffc10ed051&amp;gt;] ldlm_cancel_callback+0x91/0x330 [ptlrpc]
[&amp;lt;ffffffffc11054d1&amp;gt;] ldlm_cli_cancel_local+0xd1/0x420 [ptlrpc]
[&amp;lt;ffffffffc110b81c&amp;gt;] ldlm_cli_cancel+0x10c/0x560 [ptlrpc]
[&amp;lt;ffffffffc0f1cf6a&amp;gt;] osc_ldlm_blocking_ast+0x17a/0x3e0 [osc]
[&amp;lt;ffffffffc1117b98&amp;gt;] ldlm_handle_bl_callback+0xc8/0x3e0 [ptlrpc]
[&amp;lt;ffffffffc111846f&amp;gt;] ldlm_bl_thread_main+0x5bf/0xae0 [ptlrpc]
[&amp;lt;ffffffff824e1c0f&amp;gt;] kthread+0xef/0x100
[&amp;lt;ffffffff82c878f7&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff

(&apos;28085&apos;, &apos;dd&apos;)
[&amp;lt;ffffffffc0f2e328&amp;gt;] osc_extent_wait+0x528/0x750 [osc]
[&amp;lt;ffffffffc0f3068d&amp;gt;] osc_cache_wait_range+0x2dd/0x940 [osc]
[&amp;lt;ffffffffc0f3163e&amp;gt;] osc_cache_writeback_range+0x94e/0xfd0 [osc]
[&amp;lt;ffffffffc0f1cc05&amp;gt;] osc_lock_flush+0x195/0x290 [osc]
[&amp;lt;ffffffffc0f1cd3f&amp;gt;] osc_lock_lockless_cancel+0x3f/0xf0 [osc]
[&amp;lt;ffffffffc0c29108&amp;gt;] cl_lock_cancel+0x78/0x160 [obdclass]
[&amp;lt;ffffffffc0fa9af9&amp;gt;] lov_lock_cancel+0x99/0x190 [lov]
[&amp;lt;ffffffffc0c29108&amp;gt;] cl_lock_cancel+0x78/0x160 [obdclass]
[&amp;lt;ffffffffc0c29622&amp;gt;] cl_lock_release+0x52/0x140 [obdclass]
[&amp;lt;ffffffffc0c2cfb9&amp;gt;] cl_io_unlock+0x139/0x290 [obdclass]
[&amp;lt;ffffffffc0c2dfa8&amp;gt;] cl_io_loop+0xb8/0x200 [obdclass]
[&amp;lt;ffffffffc1b273ed&amp;gt;] ll_file_io_generic+0x8ad/0xd70 [lustre]
[&amp;lt;ffffffffc1b27d40&amp;gt;] ll_file_aio_write+0x490/0x780 [lustre]
[&amp;lt;ffffffffc1b28130&amp;gt;] ll_file_write+0x100/0x1c0 [lustre]
[&amp;lt;ffffffff826a6fbc&amp;gt;] vfs_write+0xdc/0x240
[&amp;lt;ffffffff826a7e8a&amp;gt;] SyS_write+0x8a/0x100
[&amp;lt;ffffffff82c87a9e&amp;gt;] system_call_fastpath+0x25/0x2a
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff

(&apos;26540&apos;, &apos;ll_ost_io00_001&apos;)
[&amp;lt;ffffffffc1108987&amp;gt;] ldlm_completion_ast+0x787/0x9e0 [ptlrpc]
[&amp;lt;ffffffffc110791f&amp;gt;] ldlm_cli_enqueue_local+0x25f/0x870 [ptlrpc]
[&amp;lt;ffffffffc11a3aea&amp;gt;] tgt_extent_lock+0xea/0x290 [ptlrpc]
[&amp;lt;ffffffffc11a5d2f&amp;gt;] tgt_brw_lock.isra.28+0x14f/0x340 [ptlrpc]
[&amp;lt;ffffffffc11a7e27&amp;gt;] tgt_brw_write+0x797/0x1a10 [ptlrpc]
[&amp;lt;ffffffffc11ab103&amp;gt;] tgt_request_handle+0x813/0x1780 [ptlrpc]
[&amp;lt;ffffffffc1149e16&amp;gt;] ptlrpc_server_handle_request+0x266/0xb50 [ptlrpc]
[&amp;lt;ffffffffc114ec51&amp;gt;] ptlrpc_main+0xca1/0x26b0 [ptlrpc]
[&amp;lt;ffffffff824e1c0f&amp;gt;] kthread+0xef/0x100
[&amp;lt;ffffffff82c878f7&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And some dmesg logs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[202397.535229] Lustre: ll_ost_io00_001: service thread pid 26540 was inactive for 40.094 seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[202397.543576] Pid: 26540, comm: ll_ost_io00_001 3.10.0-1062.9.1.el7.x86_64.debug #1 SMP Mon Mar 16 12:44:56 CDT 2020
[202397.546304] Call Trace:
[202397.546978]  [&amp;lt;ffffffffc1108987&amp;gt;] ldlm_completion_ast+0x787/0x9e0 [ptlrpc]
[202397.548875]  [&amp;lt;ffffffffc110791f&amp;gt;] ldlm_cli_enqueue_local+0x25f/0x870 [ptlrpc]
[202397.550795]  [&amp;lt;ffffffffc11a3aea&amp;gt;] tgt_extent_lock+0xea/0x290 [ptlrpc]
[202397.552573]  [&amp;lt;ffffffffc11a5d2f&amp;gt;] tgt_brw_lock.isra.28+0x14f/0x340 [ptlrpc]
[202397.554371]  [&amp;lt;ffffffffc11a7e27&amp;gt;] tgt_brw_write+0x797/0x1a10 [ptlrpc]
[202397.556390]  [&amp;lt;ffffffffc11ab103&amp;gt;] tgt_request_handle+0x813/0x1780 [ptlrpc]
[202397.558253]  [&amp;lt;ffffffffc1149e16&amp;gt;] ptlrpc_server_handle_request+0x266/0xb50 [ptlrpc]
[202397.560299]  [&amp;lt;ffffffffc114ec51&amp;gt;] ptlrpc_main+0xca1/0x26b0 [ptlrpc]
[202397.562022]  [&amp;lt;ffffffff824e1c0f&amp;gt;] kthread+0xef/0x100
[202397.563260]  [&amp;lt;ffffffff82c878f7&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[202397.565262]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[202457.567573] LustreError: 26270:0:(ldlm_lockd.c:260:expired_lock_main()) ### lock callback timer expired after 100s: evicting client at 0@lo  ns: filter-lustre-OST0002_UUID lock: ffff9333df8ac580/0x30c43bdabaa6c7b7 lrc: 3/0,0 mode: PW/PW res: [0x2:0x0:0x0].0x0 rrc: 3 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x60000480000020 nid: 0@lo remote: 0x30c43bdabaa6c7b0 expref: 7 pid: 26534 timeout: 202457 lvb_type: 0
[202457.584810] LustreError: 26540:0:(tgt_grant.c:758:tgt_grant_check()) lustre-OST0002: cli 47f1d083-bc11-4da7-b42f-9a3a45d5d410 claims 28672 GRANT, real grant 0
[202457.607108] LustreError: 27832:0:(ldlm_lockd.c:2489:ldlm_cancel_handler()) ldlm_cancel from 0@lo arrived at 1603402419 with bad export cookie 3513999419876230990
[202457.611541] LustreError: 11-0: lustre-OST0002-osc-ffff93340109b000: operation ost_sync to node 0@lo failed: rc = -107
[202457.611551] Lustre: lustre-OST0002-osc-ffff93340109b000: Connection to lustre-OST0002 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
[202457.612031] Lustre: lustre-OST0002: Connection restored to 192.168.122.75@tcp (at 0@lo)
[202457.612195] LustreError: 167-0: lustre-OST0002-osc-ffff93340109b000: This client was evicted by lustre-OST0002; in progress operations using this service will fail.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>3.10.0-1062.9.1.el7.x86_64.debug v2_13_56-40-g0104258 </environment>
        <key id="61319">LU-14066</key>
            <summary>dd oflag=direct writes hang when size is not a multiple of page size</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                    </labels>
                <created>Thu, 22 Oct 2020 21:35:11 +0000</created>
                <updated>Tue, 24 Nov 2020 22:55:52 +0000</updated>
                            <resolved>Tue, 24 Nov 2020 22:55:52 +0000</resolved>
                                    <version>Lustre 2.14.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>1</watches>
                                                                            <comments>
                            <comment id="285940" author="adilger" created="Tue, 24 Nov 2020 22:55:52 +0000"  >&lt;p&gt;Patch is in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-14072&quot; title=&quot;client evicitions with DIO&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-14072&quot;&gt;&lt;del&gt;LU-14072&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="61335">LU-14072</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01d1r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>