<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:55:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5877] lfs changelog hangs after lfs changelog_clear</title>
                <link>https://jira.whamcloud.com/browse/LU-5877</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;To reproduce:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@test1 vagrant]# lctl --device lustre-MDT0000 changelog_register
lustre-MDT0000: Registered changelog userid &apos;cl1&apos;
[root@test1 vagrant]# lfs changelog lustre-MDT0000
[root@test1 vagrant]# touch /mnt/lustre/foo
[root@test1 vagrant]# lfs changelog lustre-MDT0000
1 01CREAT 23:24:41.475662597 2014.11.05 0x0 t=[0x200000400:0x2c:0x0] j=touch.0 p=[0x200000007:0x1:0x0] foo
[root@test1 vagrant]# lfs changelog_clear lustre-MDT0000 cl1 1
[root@test1 vagrant]# lfs changelog lustre-MDT0000

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This last command hangs, even when changes are made to the filesystem. I also tried a version that calls llapi_changelog_start() without CHANGELOG_FLAG_BLOCK, and it still blocks here.  The lfs process stack trace:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov  5 15:26:04 test1 kernel: lfs           S 0000000000000000     0 16712  10059 0x00000080
Nov  5 15:26:04 test1 kernel: ffff88001ade9ca8 0000000000000086 ffff88001ade9c18 ffffffff8111fc67
Nov  5 15:26:04 test1 kernel: 000000000040c9b0 ffff88001dbc6890 ffff88001ade9ca8 ffffffff8114a669
Nov  5 15:26:04 test1 kernel: ffff88002aad8638 ffff88001ade9fd8 000000000000fbc8 ffff88002aad8638
Nov  5 15:26:04 test1 kernel: Call Trace:
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8111fc67&amp;gt;] ? unlock_page+0x27/0x30
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8114a669&amp;gt;] ? __do_fault+0x469/0x530
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8109b2ce&amp;gt;] ? prepare_to_wait+0x4e/0x80
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81193e1b&amp;gt;] pipe_wait+0x5b/0x80
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8152ad0e&amp;gt;] ? mutex_lock+0x1e/0x50
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff811948c6&amp;gt;] pipe_read+0x3e6/0x4e0
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa10f0c50&amp;gt;] ? ll_dir_open+0x0/0xf0 [lustre]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff811890da&amp;gt;] do_sync_read+0xfa/0x140
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8109afa0&amp;gt;] ? autoremove_wake_function+0x0/0x40
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff811ab840&amp;gt;] ? mntput_no_expire+0x30/0x110
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81227386&amp;gt;] ? security_file_permission+0x16/0x20
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81189a95&amp;gt;] vfs_read+0xb5/0x1a0
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81189bd1&amp;gt;] sys_read+0x51/0x90
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8152c93e&amp;gt;] ? do_device_not_available+0xe/0x10
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8100b072&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I also noticed this stack for mdc_clg_send, and it seems the mdc has lost connection with the mdt (which is on the same node)&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov  5 15:26:04 test1 kernel: mdc_clg_send_ S 0000000000000000     0 16713      2 0x00000080
Nov  5 15:26:04 test1 kernel: ffff88001fc77ad0 0000000000000046 ffff88001c2f9138 ffff88001bbf04c8
Nov  5 15:26:04 test1 kernel: ffff88001fc77a50 ffffffffa06f83ea ffff88001bbf0400 00000000ffffffed
Nov  5 15:26:04 test1 kernel: ffff88002aad9af8 ffff88001fc77fd8 000000000000fbc8 ffff88002aad9af8
Nov  5 15:26:04 test1 kernel: Call Trace:
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa06f83ea&amp;gt;] ? ptlrpc_request_handle_notconn+0x11a/0x290 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8152a532&amp;gt;] schedule_timeout+0x192/0x2e0
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81083f30&amp;gt;] ? process_timeout+0x0/0x10
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa06f73ea&amp;gt;] ptlrpc_set_wait+0x2da/0x860 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0702066&amp;gt;] ? lustre_msg_set_jobid+0xb6/0x140 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa06f79f7&amp;gt;] ptlrpc_queue_wait+0x87/0x220 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa071840c&amp;gt;] llog_client_destroy+0xcc/0x340 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0483421&amp;gt;] llog_destroy+0x51/0x170 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa04859dd&amp;gt;] llog_cat_process_cb+0x39d/0x5e0 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0480dc2&amp;gt;] llog_process_thread+0x9a2/0xde0 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0485640&amp;gt;] ? llog_cat_process_cb+0x0/0x5e0 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0482a1f&amp;gt;] llog_process_or_fork+0x13f/0x540 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0483d5d&amp;gt;] llog_cat_process_or_fork+0x1ad/0x300 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa07175c3&amp;gt;] ? llog_client_read_header+0x193/0x5e0 [ptlrpc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0a27740&amp;gt;] ? changelog_kkuc_cb+0x0/0x490 [mdc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0483ec9&amp;gt;] llog_cat_process+0x19/0x20 [obdclass]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0a2c42b&amp;gt;] mdc_changelog_send_thread+0x4bb/0x890 [mdc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffffa0a2bf70&amp;gt;] ? mdc_changelog_send_thread+0x0/0x890 [mdc]
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
Nov  5 15:26:04 test1 kernel: [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Actually it appears the MDC is spinning on reconnecting to the local MDT:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Nov  5 15:25:09 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
Nov  5 15:25:09 test1 kernel: Lustre: Skipped 55176 previous similar messages
Nov  5 15:25:09 test1 kernel: Lustre: lustre-MDT0000: Client 43cec978-39b5-b4ea-ff11-d7e5611258e4 (at 0@lo) reconnecting
Nov  5 15:25:09 test1 kernel: Lustre: Skipped 55177 previous similar messages
Nov  5 15:25:09 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection restored to lustre-MDT0000 (at 0@lo)
Nov  5 15:25:09 test1 kernel: Lustre: Skipped 55177 previous similar messages
Nov  5 15:25:25 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
Nov  5 15:25:25 test1 kernel: Lustre: Skipped 117892 previous similar messages
Nov  5 15:25:25 test1 kernel: Lustre: lustre-MDT0000: Client 43cec978-39b5-b4ea-ff11-d7e5611258e4 (at 0@lo) reconnecting
Nov  5 15:25:25 test1 kernel: Lustre: Skipped 117892 previous similar messages
Nov  5 15:25:25 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection restored to lustre-MDT0000 (at 0@lo)
Nov  5 15:25:25 test1 kernel: Lustre: Skipped 117892 previous similar messages
Nov  5 15:25:57 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection restored to lustre-MDT0000 (at 0@lo)
Nov  5 15:25:57 test1 kernel: Lustre: Skipped 230776 previous similar messages
Nov  5 15:25:57 test1 kernel: Lustre: lustre-MDT0000-mdc-ffff88001ccf2800: Connection to lustre-MDT0000 (at 0@lo) was lost; in progress operations using this service will wait for recovery to complete
Nov  5 15:25:57 test1 kernel: Lustre: Skipped 230777 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="27481">LU-5877</key>
            <summary>lfs changelog hangs after lfs changelog_clear</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="rread">Robert Read</reporter>
                        <labels>
                    </labels>
                <created>Wed, 5 Nov 2014 23:38:38 +0000</created>
                <updated>Mon, 19 Jan 2015 17:44:05 +0000</updated>
                            <resolved>Mon, 19 Jan 2015 17:44:05 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="98561" author="adilger" created="Thu, 6 Nov 2014 18:39:03 +0000"  >&lt;p&gt;Robert, what Lustre version is this (commit hash also if not on a tag)?&lt;/p&gt;

&lt;p&gt;Is this new behaviour (i.e. it wasn&apos;t broken in a previous release), or this is something you haven&apos;t tested before and just found?&lt;/p&gt;</comment>
                            <comment id="98566" author="adilger" created="Thu, 6 Nov 2014 18:42:04 +0000"  >&lt;p&gt;Also, what is the impact/severity of this problem?  Should it be critical/blocker for 2.7.0, or a corner case that doesn&apos;t need to be fixed immediately?&lt;/p&gt;</comment>
                            <comment id="98579" author="rread" created="Thu, 6 Nov 2014 19:04:53 +0000"  >&lt;p&gt;I installed from rpm on Monday, the commit hash is c071633. &lt;/p&gt;

&lt;p&gt;I&apos;d consider this pretty severe since it makes changelogs unusable if you can&apos;t clear them.  &lt;/p&gt;</comment>
                            <comment id="98610" author="rread" created="Thu, 6 Nov 2014 21:58:36 +0000"  >&lt;p&gt;This problem goes away if I revert &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5038&quot; title=&quot;Mount hangs for hours processing some llog&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5038&quot;&gt;&lt;del&gt;LU-5038&lt;/del&gt;&lt;/a&gt;, so this is might be a dupe of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5859&quot; title=&quot;Running lfs changelog with no registered user results in LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5859&quot;&gt;&lt;del&gt;LU-5859&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="103508" author="rread" created="Wed, 14 Jan 2015 18:48:18 +0000"  >&lt;p&gt;Poke - this bug should be a candidate for 2.7 since this makes changelogs unusable. Note, reverting &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5038&quot; title=&quot;Mount hangs for hours processing some llog&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5038&quot;&gt;&lt;del&gt;LU-5038&lt;/del&gt;&lt;/a&gt; appears to fix this as well as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5859&quot; title=&quot;Running lfs changelog with no registered user results in LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5859&quot;&gt;&lt;del&gt;LU-5859&lt;/del&gt;&lt;/a&gt;, so these appear to be related. &lt;/p&gt;</comment>
                            <comment id="103842" author="hdoreau" created="Mon, 19 Jan 2015 12:49:35 +0000"  >&lt;p&gt;As expected, the patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5859&quot; title=&quot;Running lfs changelog with no registered user results in LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5859&quot;&gt;&lt;del&gt;LU-5859&lt;/del&gt;&lt;/a&gt; fixes this issue as well (see &lt;a href=&quot;http://review.whamcloud.com/#/c/13414/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13414/&lt;/a&gt;)&lt;/p&gt;</comment>
                            <comment id="103847" author="jlevi" created="Mon, 19 Jan 2015 13:47:43 +0000"  >&lt;p&gt;Should this be closed as a duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5859&quot; title=&quot;Running lfs changelog with no registered user results in LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5859&quot;&gt;&lt;del&gt;LU-5859&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="103864" author="rread" created="Mon, 19 Jan 2015 17:03:12 +0000"  >&lt;p&gt;Sure, they appear to have the same cause.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="27447">LU-5859</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="27447">LU-5859</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="24638">LU-5038</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx0d3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>16431</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>