<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:33:32 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-10267] Wrong poll() returned revents for changelog device</title>
                <link>https://jira.whamcloud.com/browse/LU-10267</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The following program failed to poll on the changelog device:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;     rc = llapi_changelog_start(&amp;amp;chglog_hdlr,
                                   CHANGELOG_FLAG_BLOCK | CHANGELOG_FLAG_JOBID,
                                   mdtname, 0);
     
        fd = llapi_changelog_get_fd(chglog_hdlr);

        fds[0].fd = fd;
        fds[0].events = POLLIN;
        rc = llapi_changelog_recv(chglog_hdlr, &amp;amp;rec);
        rc = poll(fds, 1, CHLG_POLL_INTV);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The returned revents are always POLLERR.&lt;/p&gt;

&lt;p&gt;The Lustre log output caused this failure is:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000002:20000000:1.0:1511318935.046970:0:110785:0:(mdc_changelog.c:153:chlg_read_cat_process_cb()) 30 13TRUNC 1622762232318656574 0xe t=[0x200000401:0x1:0x0] p=[0x0:0x0:0x0] 
00000040:00001000:1.0:1511318935.046987:0:110785:0:(llog.c:513:llog_process_thread()) processing rec 0xffff880012e1ae10 type 0x10660000
00000040:00001000:1.0:1511318935.046988:0:110785:0:(llog.c:519:llog_process_thread()) after swabbing, type=0x10660000 idx=31
00000040:00001000:1.0:1511318935.046988:0:110785:0:(llog.c:595:llog_process_thread()) lrh_index: 31 lrh_len: 120 (4592 remains)
00000002:20000000:1.0:1511318935.047002:0:110785:0:(mdc_changelog.c:153:chlg_read_cat_process_cb()) 31 11CLOSE 1622762232427964466 0x242 t=[0x200000401:0x1:0x0] p=[0x0:0x0:0x0] 
00000040:00001000:1.0:1511318935.047004:0:110785:0:(llog.c:513:llog_process_thread()) processing rec 0xffff880012e1ae88 type 0x10660000
00000040:00001000:1.0:1511318935.047004:0:110785:0:(llog.c:519:llog_process_thread()) after swabbing, type=0x10660000 idx=32
00000040:00001000:1.0:1511318935.047004:0:110785:0:(llog.c:595:llog_process_thread()) lrh_index: 32 lrh_len: 120 (4472 remains)
00000002:20000000:1.0:1511318935.047004:0:110785:0:(mdc_changelog.c:153:chlg_read_cat_process_cb()) 32 13TRUNC 1622762281381438112 0xe t=[0x200000401:0x1:0x0] p=[0x0:0x0:0x0] 
00000040:00001000:1.0:1511318935.047005:0:110785:0:(llog.c:513:llog_process_thread()) processing rec 0xffff880012e1af00 type 0x10660000
00000040:00001000:1.0:1511318935.047005:0:110785:0:(llog.c:519:llog_process_thread()) after swabbing, type=0x10660000 idx=33
00000040:00001000:1.0:1511318935.047006:0:110785:0:(llog.c:595:llog_process_thread()) lrh_index: 33 lrh_len: 120 (4352 remains)
00000002:20000000:1.0:1511318935.047006:0:110785:0:(mdc_changelog.c:153:chlg_read_cat_process_cb()) 33 11CLOSE 1622762281387196858 0x242 t=[0x200000401:0x1:0x0] p=[0x0:0x0:0x0] 
00000040:00001000:1.0:1511318935.047007:0:110785:0:(llog.c:513:llog_process_thread()) processing rec 0xffff880012e1af78 type 0x0
00000040:00001000:1.0:1511318935.047007:0:110785:0:(llog.c:519:llog_process_thread()) after swabbing, type=0x0 idx=0
00000040:00001000:1.0:1511318935.047009:0:110785:0:(llog.c:513:llog_process_thread()) processing rec 0xffff880012ce4040 type 0x0
00000040:00001000:1.0:1511318935.047009:0:110785:0:(llog.c:519:llog_process_thread()) after swabbing, type=0x0 idx=0
00000040:00001000:1.0:1511318935.047010:0:110785:0:(llog.c:556:llog_process_thread()) Re-read last llog buffer &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;new&lt;/span&gt; records, index 2, last 1
00000040:00001000:0.0:1511318935.047146:0:85780:0:(llog_osd.c:859:llog_osd_next_block()) looking &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; log index 2 (cur idx 1 off8192), size 8256
00000040:00001000:1.0:1511318935.047240:0:110785:0:(llog.c:483:llog_process_thread()) cur_offset 8192, chunk_offset 8192, buf_offset 64, rc = -5
00000002:00020000:1.0:1511318935.047242:0:110785:0:(mdc_changelog.c:244:chlg_load()) lustre-MDT0000-mdc-ffff88000cabe000: fail to process llog: rc = -5
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And this bug can be easy reproduced by the following commands:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;lctl --device lustre-MDT0000 changelog_register
dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/zero of=/mnt/lustre/test bs=1M count=2
lfs changelog lustre-MDT0000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;,&lt;/p&gt;</description>
                <environment></environment>
        <key id="49420">LU-10267</key>
            <summary>Wrong poll() returned revents for changelog device</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="qian">Qian Yingjin</reporter>
                        <labels>
                    </labels>
                <created>Wed, 22 Nov 2017 03:09:53 +0000</created>
                <updated>Thu, 4 Jan 2018 18:31:29 +0000</updated>
                            <resolved>Sun, 17 Dec 2017 16:15:12 +0000</resolved>
                                                    <fixVersion>Lustre 2.11.0</fixVersion>
                    <fixVersion>Lustre 2.10.3</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>7</watches>
                                                                            <comments>
                            <comment id="214512" author="pjones" created="Thu, 23 Nov 2017 13:15:50 +0000"  >&lt;p&gt;Bruno&lt;/p&gt;

&lt;p&gt;Could you please advise?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="214683" author="jhammond" created="Mon, 27 Nov 2017 16:03:15 +0000"  >&lt;p&gt;Hi Qian Yingjin,&lt;/p&gt;

&lt;p&gt;What version of Lustre are you using here?&lt;/p&gt;</comment>
                            <comment id="214760" author="qian" created="Tue, 28 Nov 2017 01:32:06 +0000"  >&lt;p&gt;Hi John,&lt;br/&gt;
I am using the intel master branch:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[root@server lustre-release]# lctl get_param version
version=2.10.55_31_g8824b0d_dirty
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="214808" author="bfaccini" created="Tue, 28 Nov 2017 14:03:18 +0000"  >&lt;p&gt;Hello Quian,&lt;br/&gt;
Can you better detail how this problem happens or can be reproduced? Particularly, can you explain how the program calling poll() is run ? Can you also provide its full source code if not part of very big project?&lt;br/&gt;
Also, how did you gather the debug logs you have posted ? I mean were they all consecutive like this when being extracted from the Lustre debug ring buffer or did you grep some specific pattern, like the PID ? I am asking because I am surprised to find that all traces are for PID 110785 but the llog_osd_next_block() trace which is for PID 85780 when I expected it to be also for 110785...&lt;/p&gt;</comment>
                            <comment id="214846" author="jhammond" created="Tue, 28 Nov 2017 17:02:07 +0000"  >&lt;p&gt;Yes, I have seen this too.&lt;/p&gt;

&lt;p&gt;In &lt;tt&gt;llog_process_thread()&lt;/tt&gt; we expect &lt;tt&gt;cur_offset&lt;/tt&gt; to be updated by &lt;tt&gt;llog_next_block()&lt;/tt&gt; even when it returns &lt;tt&gt;-EIO&lt;/tt&gt;. &lt;tt&gt;llog_osd_next_block()&lt;/tt&gt; does this, but &lt;tt&gt;llog_client_next_block()&lt;/tt&gt; does not. So another case of misusing error returns to pass information. (There are other ways that &lt;tt&gt;ptlrpc_queue_wait()&lt;/tt&gt; can return &lt;tt&gt;-EIO&lt;/tt&gt; so using it this way is ambiguous.)&lt;/p&gt;</comment>
                            <comment id="214918" author="qian" created="Wed, 29 Nov 2017 06:19:05 +0000"  >&lt;p&gt;Hi Bruno,&lt;/p&gt;

&lt;p&gt;You can checkout the code of LSOM syncing tool (&lt;a href=&quot;https://review.whamcloud.com/#/c/30124/),&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/30124/)&lt;/a&gt;. In the code, I have commented out the code of using poll(), you can enable it.&lt;/p&gt;

&lt;p&gt;The way I gathered the debug logs is:&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;lctl set_param subsystem_debug=mdc
lctl set_param subsystem_deubg=+log
lctl set_param debug=other
lctl set_param debug=+hsm
lctl --device lustre-MDT0000 changelog_register 
dd &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt;=/dev/zero of=/mnt/lustre/test bs=1M count=2
lfs changelog lustre-MDT0000
lctl dk
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I really don&apos;t think current changelog device can support poll() just like keyborad char device, or like a pipe. I mean the MDT generates some changelog records due to some file operations, the client opens the changelog misc char device, reads and processes the records. After finished the read (reaching EOF), we can call poll() to wait for new changelog record generation, repeating to read and process.&lt;/p&gt;

&lt;p&gt;To implement the poll, the best way is that the changelog orignator (MDT) pushed the change logs to the replaciator(client) once generated new log records, but I don&apos;t think current llog mechanism can support this.&lt;/p&gt;

&lt;p&gt;If you need any other help, please let me know.&lt;/p&gt;

&lt;p&gt;Thanks, &lt;br/&gt;
Qian&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="214934" author="gerrit" created="Wed, 29 Nov 2017 14:50:34 +0000"  >&lt;p&gt;John L. Hammond (john.hammond@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/30313&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30313&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10267&quot; title=&quot;Wrong poll() returned revents for changelog device&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10267&quot;&gt;&lt;del&gt;LU-10267&lt;/del&gt;&lt;/a&gt; llog: fix EOF handling in llog_client_next_block()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: efde5a31f0b5fe4d5667d403fb399f6fb54d3d39&lt;/p&gt;</comment>
                            <comment id="214935" author="jhammond" created="Wed, 29 Nov 2017 14:51:46 +0000"  >&lt;p&gt;&amp;gt; To implement the poll, the best way is that the changelog orignator (MDT) pushed the change logs to the replaciator(client) once generated new log records, but I don&apos;t think current llog mechanism can support this.&lt;/p&gt;

&lt;p&gt;I agree.&lt;/p&gt;</comment>
                            <comment id="216518" author="gerrit" created="Sun, 17 Dec 2017 06:20:29 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/30313/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30313/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10267&quot; title=&quot;Wrong poll() returned revents for changelog device&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10267&quot;&gt;&lt;del&gt;LU-10267&lt;/del&gt;&lt;/a&gt; llog: fix EOF handling in llog_client_next_block()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: a485c51379a0218aaf01f7c0baf8e4cc993d8824&lt;/p&gt;</comment>
                            <comment id="216549" author="pjones" created="Sun, 17 Dec 2017 16:15:12 +0000"  >&lt;p&gt;Landed for 2.11&lt;/p&gt;</comment>
                            <comment id="216630" author="gerrit" created="Mon, 18 Dec 2017 18:22:59 +0000"  >&lt;p&gt;Minh Diep (minh.diep@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/30582&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30582&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10267&quot; title=&quot;Wrong poll() returned revents for changelog device&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10267&quot;&gt;&lt;del&gt;LU-10267&lt;/del&gt;&lt;/a&gt; llog: fix EOF handling in llog_client_next_block()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d60e632396623c809bfafc27e1b529407106f198&lt;/p&gt;</comment>
                            <comment id="217505" author="gerrit" created="Thu, 4 Jan 2018 17:51:02 +0000"  >&lt;p&gt;John L. Hammond (john.hammond@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/30582/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/30582/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10267&quot; title=&quot;Wrong poll() returned revents for changelog device&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10267&quot;&gt;&lt;del&gt;LU-10267&lt;/del&gt;&lt;/a&gt; llog: fix EOF handling in llog_client_next_block()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_10&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 37047ec8599e9b5ec3884be45c83e58a2b4f5270&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="49419">LU-10266</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="49728">LU-10380</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="49723">LU-10379</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzo5z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>