<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:57:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6109] LFSCK gets &quot;inconsistent&quot; flag and won&apos;t stop</title>
                <link>https://jira.whamcloud.com/browse/LU-6109</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While running the performance test (3.3.2) from the LFSCK Phase 3 test plan, I&#8217;ve run into a problem. I installed Lustre on all nodes and created a new file system. I don&#8217;t know if this matters, but the file system is empty when I run this test. I then run test 3.3.2 from the test plan; set fail_loc on the MDSs, create a small number of objects and run LFSCK. For some reason, LFSCk has flag &#8220;inconsistent&#8221; on both MDSs. &lt;/p&gt;

&lt;p&gt;On mds01:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /proc/fs/lustre/mdd/scratch-MDT0000/lfsck_namespace 
name: lfsck_namespace
magic: 0xa0629d03
version: 2
status: scanning-phase1
flags: inconsistent
param: all_targets,create_ostobj,create_mdtobj
time_since_last_completed: 1155 seconds
time_since_latest_start: 27 seconds
time_since_last_checkpoint: N/A
latest_start_position: 77, N/A, N/A
last_checkpoint_position: N/A, N/A, N/A
first_failure_position: N/A, N/A, N/A
checked_phase1: 461
checked_phase2: 0
updated_phase1: 208
updated_phase2: 0
failed_phase1: 0
failed_phase2: 0
directories: 27
dirent_repaired: 0
linkea_repaired: 208
nlinks_repaired: 0
multiple_linked_checked: 187
multiple_linked_repaired: 0
unknown_inconsistency: 0
unmatched_pairs_repaired: 0
dangling_repaired: 0
multiple_referenced_repaired: 0
bad_file_type_repaired: 0
lost_dirent_repaired: 0
local_lost_found_scanned: 0
local_lost_found_moved: 0
local_lost_found_skipped: 0
local_lost_found_failed: 0
striped_dirs_scanned: 0
striped_dirs_repaired: 0
striped_dirs_failed: 0
striped_dirs_disabled: 0
striped_dirs_skipped: 0
striped_shards_scanned: 2
striped_shards_repaired: 0
striped_shards_failed: 0
striped_shards_skipped: 0
name_hash_repaired: 0
success_count: 1
run_time_phase1: 28 seconds
run_time_phase2: 0 seconds
average_speed_phase1: 16 items/sec
average_speed_phase2: N/A
real_time_speed_phase1: 16 items/sec
real_time_speed_phase2: N/A
current_position: 238551041, N/A, N/A
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;There is nothing in dmesg except confirmation that fail_loc was set on the MDTs:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# dmesg
Lustre: ctl-scratch-MDT0000: super-sequence allocation rc = 0 [0x0000000380000400-0x00000003c0000400):0:mdt
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: ctl-scratch-MDT0000: super-sequence allocation rc = 0 [0x00000003c0000400-0x0000000400000400):1:mdt
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: Skipped 101 previous similar messages
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: Skipped 3 previous similar messages
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: Skipped 100 previous similar messages
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: Skipped 101 previous similar messages
Lustre: *** cfs_fail_loc=1603, val=0***
Lustre: Skipped 3 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;If I wait a few minutes, the following is in dmesg:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;INFO: task lfsck_namespace:469 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.gefdacb7.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
lfsck_namespa D 0000000000000006     0   469      2 0x00000080
 ffff880478e87ba0 0000000000000046 0000000000000000 ffff880a9849b7e0
 ffff880a9849b7e0 ffff880d7d69f000 ffff880478e87ba0 ffffffffa06d57e9
 ffff880478e66638 ffff880478e87fd8 000000000000fbc8 ffff880478e66638
Call Trace:
 [&amp;lt;ffffffffa06d57e9&amp;gt;] ? lu_object_find_try+0x99/0x2b0 [obdclass]
 [&amp;lt;ffffffffa06d5a3d&amp;gt;] lu_object_find_at+0x3d/0xe0 [obdclass]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa06d5b1f&amp;gt;] lu_object_find_slice+0x1f/0x80 [obdclass]
 [&amp;lt;ffffffffa0f33619&amp;gt;] lfsck_namespace_assistant_handler_p1+0x2d9/0x1f70 [lfsck]
 [&amp;lt;ffffffffa057d472&amp;gt;] ? cfs_hash_bd_from_key+0x42/0xd0 [libcfs]
 [&amp;lt;ffffffffa0f23347&amp;gt;] lfsck_assistant_engine+0x497/0x1e00 [lfsck]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0f22eb0&amp;gt;] ? lfsck_assistant_engine+0x0/0x1e00 [lfsck]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
INFO: task lfsck_namespace:469 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.gefdacb7.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
lfsck_namespa D 0000000000000006     0   469      2 0x00000080
 ffff880478e87ba0 0000000000000046 0000000000000000 ffff880a9849b7e0
 ffff880a9849b7e0 ffff880d7d69f000 ffff880478e87ba0 ffffffffa06d57e9
 ffff880478e66638 ffff880478e87fd8 000000000000fbc8 ffff880478e66638
Call Trace:
 [&amp;lt;ffffffffa06d57e9&amp;gt;] ? lu_object_find_try+0x99/0x2b0 [obdclass]
 [&amp;lt;ffffffffa06d5a3d&amp;gt;] lu_object_find_at+0x3d/0xe0 [obdclass]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa06d5b1f&amp;gt;] lu_object_find_slice+0x1f/0x80 [obdclass]
 [&amp;lt;ffffffffa0f33619&amp;gt;] lfsck_namespace_assistant_handler_p1+0x2d9/0x1f70 [lfsck]
 [&amp;lt;ffffffffa057d472&amp;gt;] ? cfs_hash_bd_from_key+0x42/0xd0 [libcfs]
 [&amp;lt;ffffffffa0f23347&amp;gt;] lfsck_assistant_engine+0x497/0x1e00 [lfsck]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0f22eb0&amp;gt;] ? lfsck_assistant_engine+0x0/0x1e00 [lfsck]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On mds02, the following is the state of LFSCK on mds02:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /proc/fs/lustre/mdd/scratch-MDT0001/lfsck_namespace 
name: lfsck_namespace
magic: 0xa0629d03
version: 2
status: scanning-phase2
flags: scanned-once,inconsistent
param: all_targets,create_ostobj,create_mdtobj
time_since_last_completed: 2533 seconds
time_since_latest_start: 1405 seconds
time_since_last_checkpoint: 1405 seconds
latest_start_position: 77, N/A, N/A
last_checkpoint_position: 260571137, [0x3c0000401:0x4:0x0], 0x5d2c710b01284458
first_failure_position: N/A, N/A, N/A
checked_phase1: 16
checked_phase2: 0
updated_phase1: 0
updated_phase2: 0
failed_phase1: 0
failed_phase2: 0
directories: 6
dirent_repaired: 0
linkea_repaired: 4
nlinks_repaired: 0
multiple_linked_checked: 0
multiple_linked_repaired: 0
unknown_inconsistency: 0
unmatched_pairs_repaired: 0
dangling_repaired: 0
multiple_referenced_repaired: 0
bad_file_type_repaired: 0
lost_dirent_repaired: 0
local_lost_found_scanned: 0
local_lost_found_moved: 0
local_lost_found_skipped: 0
local_lost_found_failed: 0
striped_dirs_scanned: 0
striped_dirs_repaired: 0
striped_dirs_failed: 0
striped_dirs_disabled: 0
striped_dirs_skipped: 0
striped_shards_scanned: 2
striped_shards_repaired: 0
striped_shards_failed: 0
striped_shards_skipped: 0
name_hash_repaired: 0
success_count: 1
run_time_phase1: 0 seconds
run_time_phase2: 1405 seconds
average_speed_phase1: 16 items/sec
average_speed_phase2: 0 objs/sec
real_time_speed_phase1: N/A
real_time_speed_phase2: 0 objs/sec
current_position: [0x0:0x0:0x0]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I cannot stop nor start LFSCK again. So, I&#8217;m not sure what state it is in.&lt;/p&gt;

&lt;p&gt;Trying to start LFSCK again fails:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl lfsck_start -A -M scratch-MDT0000 -c -C --type namespace 
Fail to start LFSCK: Operation already in progress
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Stopping LFSCK works on mds02:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /proc/fs/lustre/mdd/scratch-MDT0001/lfsck_namespace 
name: lfsck_namespace
magic: 0xa0629d03
version: 2
status: stopped
flags: scanned-once,inconsistent
param: all_targets,create_ostobj,create_mdtobj
time_since_last_completed: 2890 seconds
time_since_latest_start: 1762 seconds
time_since_last_checkpoint: 10 seconds
latest_start_position: 77, N/A, N/A
last_checkpoint_position: 260571137, [0x3c0000401:0x4:0x0], 0x5d2c710b01284458
first_failure_position: N/A, N/A, N/A
checked_phase1: 16
checked_phase2: 0
updated_phase1: 0
updated_phase2: 0
failed_phase1: 0
failed_phase2: 0
directories: 6
dirent_repaired: 0
linkea_repaired: 4
nlinks_repaired: 0
multiple_linked_checked: 0
multiple_linked_repaired: 0
unknown_inconsistency: 0
unmatched_pairs_repaired: 0
dangling_repaired: 0
multiple_referenced_repaired: 0
bad_file_type_repaired: 0
lost_dirent_repaired: 0
local_lost_found_scanned: 0
local_lost_found_moved: 0
local_lost_found_skipped: 0
local_lost_found_failed: 0
striped_dirs_scanned: 0
striped_dirs_repaired: 0
striped_dirs_failed: 0
striped_dirs_disabled: 0
striped_dirs_skipped: 0
striped_shards_scanned: 2
striped_shards_repaired: 0
striped_shards_failed: 0
striped_shards_skipped: 0
name_hash_repaired: 0
success_count: 1
run_time_phase1: 0 seconds
run_time_phase2: 1753 seconds
average_speed_phase1: 16 items/sec
average_speed_phase2: 0 objs/sec
real_time_speed_phase1: N/A
real_time_speed_phase2: N/A
current_position: N/A
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;but does not stop LFSCK on mds01&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# cat /proc/fs/lustre/mdd/scratch-MDT0000/lfsck_namespace 
name: lfsck_namespace
magic: 0xa0629d03
version: 2
status: scanning-phase1
flags: inconsistent
param: all_targets,create_ostobj,create_mdtobj
time_since_last_completed: 2953 seconds
time_since_latest_start: 1825 seconds
time_since_last_checkpoint: N/A
latest_start_position: 77, N/A, N/A
last_checkpoint_position: N/A, N/A, N/A
first_failure_position: N/A, N/A, N/A
checked_phase1: 461
checked_phase2: 0
updated_phase1: 208
updated_phase2: 0
failed_phase1: 0
failed_phase2: 0
directories: 27
dirent_repaired: 0
linkea_repaired: 208
nlinks_repaired: 0
multiple_linked_checked: 187
multiple_linked_repaired: 0
unknown_inconsistency: 0
unmatched_pairs_repaired: 0
dangling_repaired: 0
multiple_referenced_repaired: 0
bad_file_type_repaired: 0
lost_dirent_repaired: 0
local_lost_found_scanned: 0
local_lost_found_moved: 0
local_lost_found_skipped: 0
local_lost_found_failed: 0
striped_dirs_scanned: 0
striped_dirs_repaired: 0
striped_dirs_failed: 0
striped_dirs_disabled: 0
striped_dirs_skipped: 0
striped_shards_scanned: 2
striped_shards_repaired: 0
striped_shards_failed: 0
striped_shards_skipped: 0
name_hash_repaired: 0
success_count: 1
run_time_phase1: 1826 seconds
run_time_phase2: 0 seconds
average_speed_phase1: 0 items/sec
average_speed_phase2: N/A
real_time_speed_phase1: 0 items/sec
real_time_speed_phase2: N/A
current_position: 238551041, N/A, N/A
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>OpenSFS cluster with 2 MDSs with 1MDT each, 3 OSSs with two OSTs each and three clients running Lustre (master) build # 2812</environment>
        <key id="28149">LU-6109</key>
            <summary>LFSCK gets &quot;inconsistent&quot; flag and won&apos;t stop</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="jamesanunez">James Nunez</reporter>
                        <labels>
                            <label>HB</label>
                            <label>lfsck</label>
                    </labels>
                <created>Mon, 12 Jan 2015 04:58:53 +0000</created>
                <updated>Wed, 16 Mar 2016 04:47:35 +0000</updated>
                            <resolved>Sun, 8 Feb 2015 04:55:38 +0000</resolved>
                                    <version>Lustre 2.7.0</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="103162" author="jamesanunez" created="Mon, 12 Jan 2015 05:06:14 +0000"  >&lt;p&gt;Unfortunately, this issue is very reproducible.&lt;/p&gt;</comment>
                            <comment id="103212" author="jlevi" created="Mon, 12 Jan 2015 18:20:34 +0000"  >&lt;p&gt;Fan Yong,&lt;br/&gt;
Can you please have a look at this one and comment?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="103272" author="jamesanunez" created="Mon, 12 Jan 2015 22:02:16 +0000"  >&lt;p&gt;The Performance test 3.3.2 was run on the previous three lustre-master tags, but this problem was not discovered earlier due to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5914&quot; title=&quot;LFSCK: dt_lookup()) LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5914&quot;&gt;&lt;del&gt;LU-5914&lt;/del&gt;&lt;/a&gt;. &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5914&quot; title=&quot;LFSCK: dt_lookup()) LBUG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5914&quot;&gt;&lt;del&gt;LU-5914&lt;/del&gt;&lt;/a&gt; also made 3.3.2 fail.&lt;/p&gt;</comment>
                            <comment id="104309" author="gerrit" created="Thu, 22 Jan 2015 06:47:08 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13493&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13493&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6109&quot; title=&quot;LFSCK gets &amp;quot;inconsistent&amp;quot; flag and won&amp;#39;t stop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6109&quot;&gt;&lt;del&gt;LU-6109&lt;/del&gt;&lt;/a&gt; lfsck: NOT purge object by OI scrub&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b6d3fd89a129e7aed971ed2e0ead614b8c7bb970&lt;/p&gt;</comment>
                            <comment id="104310" author="yong.fan" created="Thu, 22 Jan 2015 07:06:44 +0000"  >&lt;p&gt;James, would you please to verify this patch in your environment? Thanks!&lt;/p&gt;</comment>
                            <comment id="104462" author="jamesanunez" created="Fri, 23 Jan 2015 03:36:03 +0000"  >&lt;p&gt;The proposed patch, &lt;a href=&quot;http://review.whamcloud.com/#/c/13493/4&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13493/4&lt;/a&gt; ,  does not fix the problem reported in this ticket. I ran test 3.3.2 against&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl get_param version
version=
lustre: 2.6.92
kernel: patchless_client
build:  jenkins-arch=x86_64,build_type=server,distro=el6,ib_stack=inkernel-29887-g2fb04bd-PRISTINE-2.6.32-431.29.2.el6_lustre.gffd1fc2.x86_64
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The primary MDS, MDS01, is stuck in:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;status: scanning-phase1
flags: inconsistent
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;On MDS02, it&#8217;s showing as LFSCK status:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;status: scanning-phase2
flags: scanned-once,inconsistent
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In dmesg on MDS01, &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;INFO: task lfsck_namespace:542 blocked for more than 120 seconds.
      Not tainted 2.6.32-431.29.2.el6_lustre.gffd1fc2.x86_64 #1
&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot; disables this message.
lfsck_namespa D 0000000000000001     0   542      2 0x00000080
 ffff880a556f3ba0 0000000000000046 0000000000000000 ffff880597ae2360
 ffff880597ae2360 ffff880832e27000 ffff880a556f3ba0 ffffffffa06d2bc9
 ffff880a56a285f8 ffff880a556f3fd8 000000000000fbc8 ffff880a56a285f8
Call Trace:
 [&amp;lt;ffffffffa06d2bc9&amp;gt;] ? lu_object_find_try+0x99/0x2b0 [obdclass]
 [&amp;lt;ffffffffa06d2e1d&amp;gt;] lu_object_find_at+0x3d/0xe0 [obdclass]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa06d1e69&amp;gt;] ? lu_object_put+0x1d9/0x350 [obdclass]
 [&amp;lt;ffffffffa06d2eff&amp;gt;] lu_object_find_slice+0x1f/0x80 [obdclass]
 [&amp;lt;ffffffffa0f2ad19&amp;gt;] lfsck_namespace_assistant_handler_p1+0x2d9/0x1f70 [lfsck]
 [&amp;lt;ffffffffa057b442&amp;gt;] ? cfs_hash_bd_from_key+0x42/0xd0 [libcfs]
 [&amp;lt;ffffffffa0f1f9f7&amp;gt;] lfsck_assistant_engine+0x497/0x1e00 [lfsck]
 [&amp;lt;ffffffff81061d00&amp;gt;] ? default_wake_function+0x0/0x20
 [&amp;lt;ffffffffa0f1f560&amp;gt;] ? lfsck_assistant_engine+0x0/0x1e00 [lfsck]
 [&amp;lt;ffffffff8109abf6&amp;gt;] kthread+0x96/0xa0
 [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
 [&amp;lt;ffffffff8109ab60&amp;gt;] ? kthread+0x0/0xa0
 [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;which differs from the stack trace in the description of this ticket in that it  has the added call &#8216;lu_object_put&#8217;.&lt;/p&gt;

&lt;p&gt;When I issue the following on MDS01:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lctl lfsck_stop -A -M scratch-MDT0000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;LFSCK on MDS02 does stop, but LFSCK does not stop on MDS01. I can&#8217;t kill the lfsck_namespace process on MDS01.&lt;/p&gt;</comment>
                            <comment id="104464" author="yong.fan" created="Fri, 23 Jan 2015 04:23:41 +0000"  >&lt;p&gt;James, would you please to upload the MDS debug log to the Jira? Is there any file deleted during your test? Thanks!&lt;/p&gt;</comment>
                            <comment id="104465" author="jamesanunez" created="Fri, 23 Jan 2015 04:40:49 +0000"  >&lt;p&gt;There are no files deleted during this test. &lt;/p&gt;

&lt;p&gt;I did not capture kernel logs. So, let me run again and I&apos;ll upload loads when I have them.&lt;/p&gt;</comment>
                            <comment id="104466" author="yong.fan" created="Fri, 23 Jan 2015 04:42:03 +0000"  >&lt;p&gt;Thanks James! If possible, please upload the log to Jira directly, that is more easy for me to access.&lt;/p&gt;</comment>
                            <comment id="104468" author="yong.fan" created="Fri, 23 Jan 2015 05:00:08 +0000"  >&lt;p&gt;To be confirmed with you: before LFSCK run, you injected failure stub fail_loc=1603 on both MDTs, so all the files/directories/remote_directories/striped_directories have crashed linkEA, then run namespace LFSCK to repair them. Right?&lt;/p&gt;</comment>
                            <comment id="104493" author="gerrit" created="Fri, 23 Jan 2015 08:34:57 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13511&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13511&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6109&quot; title=&quot;LFSCK gets &amp;quot;inconsistent&amp;quot; flag and won&amp;#39;t stop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6109&quot;&gt;&lt;del&gt;LU-6109&lt;/del&gt;&lt;/a&gt; lfsck: check FID validity before locating object&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e248ffbf7d27417b3b85e930b0266d4719cb75ce&lt;/p&gt;</comment>
                            <comment id="104494" author="yong.fan" created="Fri, 23 Jan 2015 08:39:12 +0000"  >&lt;p&gt;The 13511 is based on the patch 13493. It gives more fix for the possible hung during locating object.&lt;br/&gt;
James, would you please to try this one when you have time. Thanks!&lt;/p&gt;</comment>
                            <comment id="104496" author="jamesanunez" created="Fri, 23 Jan 2015 08:59:19 +0000"  >&lt;p&gt;Here are the logs from the MDSs with only the first patch, 13493, applied. I stopped collecting logs after a few minutes of MDS01 stuck in scanning-phase1.&lt;/p&gt;

&lt;p&gt;The script, sets fail_loc to 1603 on the MDSs, writes/creates objects, sets fail_loc to 0 on the MDSs and then runs LFSCK namespace. &lt;/p&gt;

&lt;p&gt;I will test with the new patch next.&lt;/p&gt;</comment>
                            <comment id="104680" author="jamesanunez" created="Mon, 26 Jan 2015 05:12:50 +0000"  >&lt;p&gt;I&apos;ve attached logs from the MDSs with the build for patch 13511; lfsck_log_mds01_13511.txt and lfsck_log_mds02_13511.txt. &lt;/p&gt;

&lt;p&gt;The first time I ran test 3.3.2, it completed with no problems. I ran it a second time and LFSCK namespace on MDS01 is stuck in &apos;scanning-phase1&apos; as described in this ticket.&lt;/p&gt;</comment>
                            <comment id="105071" author="yong.fan" created="Thu, 29 Jan 2015 07:49:51 +0000"  >&lt;p&gt;James,&lt;/p&gt;

&lt;p&gt;Thanks for the verification. I found more clew and updated the patch (&lt;a href=&quot;http://review.whamcloud.com/#/c/13511/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13511/&lt;/a&gt;). Would you please to retry the latest patch with -1 debug_log enabled? BTW, please tell me what files/subdirs you created under the striped directory for test? Thanks!&lt;/p&gt;</comment>
                            <comment id="105099" author="jamesanunez" created="Thu, 29 Jan 2015 18:41:43 +0000"  >&lt;p&gt;I ran with your latest patch and uploaded a tar gzipped file at lfsck_logs.tgz. &lt;/p&gt;

&lt;p&gt;The directories/files that are create during this test are, in order:&lt;br/&gt;
create a top level directory called test_dir&lt;br/&gt;
create another directory test_dir/sdir-0&lt;br/&gt;
in sdir_0, create 87 files test_dir/sdir-0/z.N, where N=0.{0..28}, 1.{29..57}, 2.{58..86}&lt;br/&gt;
create one remote directory at test_dir/sdir-0/rdir-1 (left empty)&lt;br/&gt;
create 10 local directory test_dir/sdir-0/ldir-{1..10} (left empty)&lt;br/&gt;
create one striped directory at test_dir/sdir-0/rdir-1 (left empty)&lt;br/&gt;
create 1 hard links at test_dir/sdir-0/ln.1 to z.0.0&lt;/p&gt;

&lt;p&gt;Then the directory test_dir/sdir-1 is created with the same files and directories with the same names as above.&lt;/p&gt;</comment>
                            <comment id="105294" author="yong.fan" created="Sat, 31 Jan 2015 14:33:44 +0000"  >&lt;p&gt;There is memory leak in former LFSCK run, as to the subsequent LFSCK was blocked by some stale data. The root reason for the memory leak is because the LFSCK used OSD device to handle OSP (remote MDT) inconsistency by wrong. Such issues have been fixed by &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-5791&quot; title=&quot;LFSCK 5: use bottom object for consistency verification&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-5791&quot;&gt;&lt;del&gt;LU-5791&lt;/del&gt;&lt;/a&gt; patch: &lt;a href=&quot;http://review.whamcloud.com/13392&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13392&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;So I enhanced the 13511 patch (&lt;a href=&quot;http://review.whamcloud.com/#/c/13511/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/13511/&lt;/a&gt;) against the 13392 patch.&lt;br/&gt;
James, would you please to retry the latest patch? Thanks!&lt;/p&gt;

&lt;p&gt;BTW, I have verified with your method, but cannot reproduce the failures any longer.&lt;/p&gt;</comment>
                            <comment id="105459" author="jamesanunez" created="Tue, 3 Feb 2015 00:27:35 +0000"  >&lt;p&gt;I&apos;ve run with the latest version of the 13511 patch and cannot reproduce this error any more. I&apos;ve run the test several times in a row and increased the number of objects written and LFSCK completes each time.&lt;/p&gt;</comment>
                            <comment id="105471" author="yong.fan" created="Tue, 3 Feb 2015 03:29:35 +0000"  >&lt;p&gt;Thanks James. I think the original issue should have been fixed via above two patches (13392, 13511).&lt;/p&gt;</comment>
                            <comment id="106179" author="gerrit" created="Sun, 8 Feb 2015 02:48:36 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13511/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13511/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6109&quot; title=&quot;LFSCK gets &amp;quot;inconsistent&amp;quot; flag and won&amp;#39;t stop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6109&quot;&gt;&lt;del&gt;LU-6109&lt;/del&gt;&lt;/a&gt; lfsck: check FID validity before locating object&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4d408c9aed9adaf1f4e2ea87851728a1cf662594&lt;/p&gt;</comment>
                            <comment id="106192" author="pjones" created="Sun, 8 Feb 2015 04:55:38 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                            <comment id="108930" author="gerrit" created="Thu, 5 Mar 2015 19:08:25 +0000"  >&lt;p&gt;Yang Sheng (yang.sheng@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/13987&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13987&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6109&quot; title=&quot;LFSCK gets &amp;quot;inconsistent&amp;quot; flag and won&amp;#39;t stop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6109&quot;&gt;&lt;del&gt;LU-6109&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: fix previous patch issue&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: defae4053b7bbe1e013af1a9ec99090d6e7abe1c&lt;/p&gt;</comment>
                            <comment id="120722" author="gerrit" created="Wed, 8 Jul 2015 17:09:23 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/13987/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/13987/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6109&quot; title=&quot;LFSCK gets &amp;quot;inconsistent&amp;quot; flag and won&amp;#39;t stop&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6109&quot;&gt;&lt;del&gt;LU-6109&lt;/del&gt;&lt;/a&gt; osd-ldiskfs: handle no fid-in-dirent correctly&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: a27201d13828d4b9bae9261dcc552c1f767aff21&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="16738" name="lfsck_log_mds01.txt" size="412410" author="jamesanunez" created="Fri, 23 Jan 2015 08:59:19 +0000"/>
                            <attachment id="16759" name="lfsck_log_mds01_13511.txt" size="411330" author="jamesanunez" created="Mon, 26 Jan 2015 05:12:50 +0000"/>
                            <attachment id="16739" name="lfsck_log_mds02.txt" size="10088" author="jamesanunez" created="Fri, 23 Jan 2015 08:59:19 +0000"/>
                            <attachment id="16760" name="lfsck_log_mds02_13511.txt" size="11144" author="jamesanunez" created="Mon, 26 Jan 2015 05:12:50 +0000"/>
                            <attachment id="16795" name="lfsck_logs.tgz" size="234" author="jamesanunez" created="Thu, 29 Jan 2015 18:41:43 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzx3rj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>17016</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>