<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:18:42 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8569] Sharded DNE directory full of files that don&apos;t exist</title>
                <link>https://jira.whamcloud.com/browse/LU-8569</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;On our DNE testbed, one of our sharded directories seems to contain files that are all in a broken state.  Currently both servers and clients are running 2.8.0_0.0.llnlpreview.40 (see the lustre-release-fe-llnl repo).&lt;/p&gt;

&lt;p&gt;We can get a directory listing, but nothing listed is actually accessible.  Here is an excerpt from running &lt;em&gt;ls -l&lt;/em&gt;:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# pwd
/p/lquake/casses1/opal-jet/simul_2
# ls -l
ls: cannot access simul_link.2243: No such file or directory
ls: cannot access simul_link.3161: No such file or directory
ls: cannot access simul_link.3129: No such file or directory
ls: cannot access simul_link.3893: No such file or directory
ls: cannot access simul_link.691: No such file or directory
ls: cannot access simul_link.3233: No such file or directory
ls: cannot access simul_link.235: No such file or directory
ls: cannot access simul_link.1653: No such file or directory
ls: cannot access simul_link.3167: No such file or directory
ls: cannot access simul_link.681: No such file or directory
ls: cannot access simul_link.835: No such file or directory
ls: cannot access simul_link.3857: No such file or directory
ls: cannot access simul_link.1591: No such file or directory
ls: cannot access simul_link.1175: No such file or directory
[cut]
-????????? ? ? ? ?            ? simul_link.937
-????????? ? ? ? ?            ? simul_link.94
-????????? ? ? ? ?            ? simul_link.940
-????????? ? ? ? ?            ? simul_link.941
-????????? ? ? ? ?            ? simul_link.942
-????????? ? ? ? ?            ? simul_link.943
-????????? ? ? ? ?            ? simul_link.944
-????????? ? ? ? ?            ? simul_link.947
[cut]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Here is the striping information:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lfs getdirstripe .
.
lmv_stripe_count: 16 lmv_stripe_offset: 12
mdtidx           FID[seq:oid:ver]
    12           [0x50000996c:0x14fed:0x0]
    13           [0x54000919d:0x14fed:0x0]
    14           [0x58000a086:0x14fed:0x0]
    15           [0x5c000996b:0x14fed:0x0]
     0           [0x200006b03:0x14fed:0x0]
     1           [0x3000089cc:0x14fed:0x0]
     2           [0x38000996d:0x14fed:0x0]
     3           [0x4c000b0df:0x14fed:0x0]
     4           [0x2c000a142:0xec09:0x0]
     5           [0x3c000b8b2:0xec09:0x0]
     6           [0x34000a143:0xec09:0x0]
     7           [0x40000a143:0xec09:0x0]
     8           [0x44000a142:0xec09:0x0]
     9           [0x24000a143:0xec09:0x0]
    10           [0x2800091a4:0xec09:0x0]
    11           [0x4800091a3:0xec09:0x0]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I ran lfsck on all services (at least those started by the &quot;--all&quot; option), but that did not address this situation.&lt;/p&gt;

&lt;p&gt;The problem files cannot be unlinked:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# rm simul_link.999
rm: cannot remove &apos;simul_link.999&apos;: No such file or directory
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="39229">LU-8569</key>
            <summary>Sharded DNE directory full of files that don&apos;t exist</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="morrone">Christopher Morrone</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Tue, 30 Aug 2016 21:14:18 +0000</created>
                <updated>Thu, 10 Aug 2017 23:41:28 +0000</updated>
                            <resolved>Wed, 18 Jan 2017 19:08:36 +0000</resolved>
                                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="163690" author="adilger" created="Wed, 31 Aug 2016 07:48:07 +0000"  >&lt;p&gt;Can you check &quot;lfs getstripe&quot; on a few of the broken files, to see if the FIDs of the IST objects are unusual?  I suspect that the directory is OK, but the error is coming from the OST which does not have the objects in the MDT file&apos;s layout. That may still indicate a problem with the MDT or OST, but will give a starting point. &lt;/p&gt;</comment>
                            <comment id="163691" author="adilger" created="Wed, 31 Aug 2016 07:50:47 +0000"  >&lt;p&gt;Can you please check &quot;lfs getstripe&quot; on a few of the broken files. It may be that the error is coming from the OST and not the directory at all.&lt;/p&gt;</comment>
                            <comment id="163703" author="pjones" created="Wed, 31 Aug 2016 11:40:48 +0000"  >&lt;p&gt;Assigning to Fan Yong for further investigation&lt;/p&gt;</comment>
                            <comment id="163837" author="morrone" created="Wed, 31 Aug 2016 20:39:42 +0000"  >&lt;p&gt;Here is the result of lfs getstripe for files in that directory:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# lfs getstripe simul_link.2280
error opening simul_link.2280: Bad address (14)
llapi_semantic_traverse: Failed to open &apos;simul_link.2280&apos;: Bad address (14)
error: getstripe failed for simul_link.2280.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="164538" author="yong.fan" created="Thu, 1 Sep 2016 00:27:30 +0000"  >&lt;p&gt;Would you please to collect the -1 level Lustre debug log on both the client and MDT when you hit &quot;lfs getstripe simul_link.2280&quot; failure? Since we do NOT know (if you know, that is better) on which MDT the file &quot;lfs getstripe simul_link.2280&quot; resides, then have to collect the logs on all MDTs.&lt;/p&gt;

&lt;p&gt;Thanks! &lt;/p&gt;</comment>
                            <comment id="166472" author="dinatale2" created="Mon, 19 Sep 2016 23:37:19 +0000"  >&lt;p&gt;I collected -1 level Lustre logs on the client and for each MDT. They are in the tar file &apos;getstripelogs.tar.gz&apos; which I attached to this issue.&lt;/p&gt;

&lt;p&gt;The command I logged is: &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lfs getstripe simul_link.898&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The output of the command was: &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;error opening simul_link.898: Bad address (14)
llapi_semantic_traverse: Failed to open &apos;simul_link.898&apos;: Bad address (14)
error: getstripe failed for simul_link.898.
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;A grep seems to indicate that jet2 may be the log of interest, but I included all of them for completeness. Let me know if you need any other information.&lt;/p&gt;</comment>
                            <comment id="166477" author="yong.fan" created="Tue, 20 Sep 2016 02:02:26 +0000"  >&lt;p&gt;The log on the client (client-getstripe.log) shows that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00800000:00000001:3.0:1474324139.597219:0:117923:0:(lmv_intent.c:276:lmv_intent_open()) Process entered
00800000:00000040:3.0:1474324139.597221:0:117923:0:(lustre_lmv.h:170:lmv_name_to_stripe_index()) name simul_link.898 hash_type 2 idx 1
00800000:00000040:3.0:1474324139.597223:0:117923:0:(lmv_obd.c:1715:lmv_locate_target_for_name()) locate on mds 1 [0x30000cf20:0x1:0x0]
00800000:00000002:3.0:1474324139.597224:0:117923:0:(lmv_intent.c:316:lmv_intent_open()) OPEN_INTENT with fid1=[0x30000cf20:0x1:0x0], fid2=[0x0:0x0:0x0], name=&apos;simul_link.898&apos; -&amp;gt; mds #1
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Means client intent open (&lt;span class=&quot;error&quot;&gt;&amp;#91;0x30000cf20:0x1:0x0&amp;#93;&lt;/span&gt;/simul_link.898) RPC to the mds#1&lt;br/&gt;
The log on the mds1 (jet2-getstripe.log) shows that:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00000001:7.0:1474324139.598512:0:38638:0:(mdt_open.c:1198:mdt_reint_open()) Process entered
00000020:00000001:7.0:1474324139.598514:0:38638:0:(lprocfs_jobstats.c:272:lprocfs_job_stats_log()) Process entered
00000020:00000001:7.0:1474324139.598517:0:38638:0:(lprocfs_jobstats.c:323:lprocfs_job_stats_log()) Process leaving (rc=0 : 0 : 0)
00000004:00000002:7.0:1474324139.598518:0:38638:0:(mdt_open.c:1226:mdt_reint_open()) I am going to open [0x30000cf20:0x1:0x0]/(simul_link.898-&amp;gt;[0x0:0x0:0x0]) cr_flag=01 mode=0100000 msg_flag=0x0
...
00080000:00000001:7.0:1474324139.598600:0:38638:0:(osd_index.c:395:osd_dir_lookup()) Process entered
00080000:00000001:7.0:1474324139.598639:0:38638:0:(osd_index.c:415:osd_dir_lookup()) Process leaving (rc=1 : 1 : 1)
...
00000004:00000001:7.0:1474324139.599521:0:38638:0:(osp_trans.c:469:osp_remote_sync()) Process leaving (rc=18446744073709551614 : -2 : fffffffffffffffe)
00000004:00000001:7.0:1474324139.599522:0:38638:0:(osp_object.c:591:osp_attr_get()) Process leaving via out (rc=18446744073709551614 : -2 : 0xfffffffffffffffe)
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Means the mds#1 received the intent open RPC. It did lookup firstly and found the name entry &quot;simul_link.898&quot; existed on this MDT, but its FID is remote, then triggered osp_attr_get() to fetch the object&apos;s attribute when initialise the object. Unfortunately, the remote MDT returned -2 (-ENOENT) to this MDT. That the &quot;simul_link.898&quot; is dangling name entry. That is why the subsequent operation got -14 (-EFAULT) failure.&lt;/p&gt;

&lt;p&gt;Currently, I do not know what caused the dangling name entry. But I would suggest to run namespace LFSCK to fix related Lustre inconsistency. To be safe, you can run namespace LFSCK without &quot;-C&quot; option firstly, that will detect how many dangling name entries in the system but NOT auto repair them. Then you can check whether need to fix them. If you think it is necessary to re-create related lost MDT-objects, then re-run the namespace LFSCK with &quot;-C&quot; specified.&lt;/p&gt;</comment>
                            <comment id="166841" author="dinatale2" created="Wed, 21 Sep 2016 23:16:34 +0000"  >&lt;p&gt;We came up with an easier reproducer for this issue in case you need to collect more information. Details are below.&lt;/p&gt;

&lt;p&gt;Create a striped directory for this test. cd to that directory and create a simple file:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;echo &quot;hello world&quot; &amp;gt; afile&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;From there, create a script called &apos;linkme.sh&apos; with the following contents:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;#!/bin/bash
filename=$(hostname)_${RANDOM}
ln afile $filename
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Now, using srun, we can run the script across many nodes/cores w/ no timeout. Example below:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;srun -W 0 -N 47 -n $((47*36)) linkme.sh&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The script ran for a bit, but eventually we started seeing &quot;bad address&quot; errors. I&apos;ll continue to try and collect more information.&lt;/p&gt;</comment>
                            <comment id="166963" author="dinatale2" created="Thu, 22 Sep 2016 19:32:44 +0000"  >&lt;p&gt;Ran an lfsck namespace with -C and got the following LBUG on multiple MDTs.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2016-09-22 10:04:23 [493341.943717] LustreError: 127771:0:(lfsck_namespace.c:4452:lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed: 
2016-09-22 10:04:23 [493341.958848] LustreError: 127771:0:(lfsck_namespace.c:4452:lfsck_namespace_double_scan()) LBUG
2016-09-22 10:04:23 [493341.968781] Pid: 127771, comm: lfsck
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Have the following call stack on two MDTs.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;2016-09-22 10:03:52 Sep 22 10:03:52 [493315.464373] Kernel panic - not syncing: LBUG
2016-09-22 10:03:52 jet6 kernel: [49[493315.470430] CPU: 2 PID: 111809 Comm: lfsck Tainted: P           OE  ------------   3.10.0-327.28.2.1chaos.ch6.x86_64 #1
2016-09-22 10:03:52 3315.297027] Lus[493315.484175] Hardware name: Intel Corporation S2600WTTR/S2600WTTR, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016
2016-09-22 10:03:52 treError: 111809[493315.497715]  ffffffffa079be0f 0000000055805053 ffff882757e4fc78 ffffffff8164cae7
2016-09-22 10:03:52 :0:(lfsck_namesp[493315.507701]  ffff882757e4fcf8 ffffffff81645adf ffffffff00000008 ffff882757e4fd08
2016-09-22 10:03:52 ace.c:4452:lfsck[493315.517684]  ffff882757e4fca8 0000000055805053 ffffffffa1070e70 0000000000000246
2016-09-22 10:03:52 _namespace_doubl[493315.527666] Call Trace:
2016-09-22 10:03:52 e_scan()) ASSERT[493315.532060]  [&amp;lt;ffffffff8164cae7&amp;gt;] dump_stack+0x19/0x1b
2016-09-22 10:03:52 ION( list_empty([493315.539478]  [&amp;lt;ffffffff81645adf&amp;gt;] panic+0xd8/0x1e7
2016-09-22 10:03:52 &amp;amp;lad-&amp;gt;lad_req_li[493315.546501]  [&amp;lt;ffffffffa077fdeb&amp;gt;] lbug_with_loc+0xab/0xc0 [libcfs]
2016-09-22 10:03:52 st) ) failed: 
2016-09-22 10:03:52 [493315.555082]  [&amp;lt;ffffffffa102c2a6&amp;gt;] lfsck_namespace_double_scan+0x106/0x140 [lfsck]
2016-09-22 10:03:52 Sep 22 10:03:52 [493315.565122]  [&amp;lt;ffffffffa10234f9&amp;gt;] lfsck_double_scan+0x59/0x200 [lfsck]
2016-09-22 10:03:52 jet6 kernel: [49[493315.574086]  [&amp;lt;ffffffffa0d88fc4&amp;gt;] ? osd_zfs_otable_it_fini+0x64/0x110 [osd_zfs]
2016-09-22 10:03:52 3315.311863] Lus[493315.583931]  [&amp;lt;ffffffffa0d88fc4&amp;gt;] ? osd_zfs_otable_it_fini+0x64/0x110 [osd_zfs]
2016-09-22 10:03:52 treError: 111809[493315.593765]  [&amp;lt;ffffffff811c8bad&amp;gt;] ? kfree+0x12d/0x170
2016-09-22 10:03:52 :0:(lfsck_namesp[493315.601075]  [&amp;lt;ffffffffa1028044&amp;gt;] lfsck_master_engine+0x434/0x1310 [lfsck]
2016-09-22 10:03:52 ace.c:4452:lfsck[493315.610415]  [&amp;lt;ffffffff81015588&amp;gt;] ? __switch_to+0xf8/0x4d0
2016-09-22 10:03:52 _namespace_doubl[493315.618212]  [&amp;lt;ffffffff810bd4f0&amp;gt;] ? wake_up_state+0x20/0x20
2016-09-22 10:03:52 e_scan()) LBUG
2016-09-22 10:03:52 [493315.626108]  [&amp;lt;ffffffffa1027c10&amp;gt;] ? lfsck_master_oit_engine+0x1430/0x1430 [lfsck]
2016-09-22 10:03:52 [493315.636145]  [&amp;lt;ffffffff810a99bf&amp;gt;] kthread+0xcf/0xe0
2016-09-22 10:03:52 [493315.642238]  [&amp;lt;ffffffff810a98f0&amp;gt;] ? kthread_create_on_node+0x140/0x140
2016-09-22 10:03:52 [493315.650187]  [&amp;lt;ffffffff8165d9d8&amp;gt;] ret_from_fork+0x58/0x90
2016-09-22 10:03:52 [493315.656864]  [&amp;lt;ffffffff810a98f0&amp;gt;] ? kthread_create_on_node+0x140/0x140
2016-09-22 10:03:52 [493315.711916] drm_kms_helper: panic occurred, switching back to text console
2016-09-22 10:03:52 [493315.720378] ------------[ cut here ]------------
2016-09-22 10:03:52 [493315.726202] WARNING: at arch/x86/kernel/smp.c:124 native_smp_send_reschedule+0x5f/0x70()
2016-09-22 10:03:52 [493315.735902] Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgc(OE) osd_zfs(OE) lquota(OE) fid(OE) fld(OE) ptlrpc(OE) obdclass(OE) rpcsec_gss_krb5 ko2iblnd(OE) lnet(OE) sha512_generic crypto_null libcfs(OE) nfsv3 iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl kvm mlx5_ib pcspkr mlx5_core sb_edac lpc_ich edac_core mfd_core mei_me mei zfs(POE) zunicode(POE) zavl(POE) zcommon(POE) znvpair(POE) ses enclosure ipmi_devintf spl(OE) zlib_deflate sg i2c_i801 ioatdma shpchp ipmi_si ipmi_msghandler acpi_power_meter acpi_cpufreq binfmt_misc ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr nfsd nfs_acl ip_tables auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache dm_round_robin sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32_pclmul mgag200 crc32c_intel syscopyarea sysfillrect sysimgblt ghash_clmulni_intel i2c_algo_bit drm_kms_helper mxm_wmi ttm aesni_intel ixgbe lrw gf128mul ahci drm dca glue_helper mpt3sas libahci ptp i2c_core ablk_helper cryptd libata raid_class pps_core scsi_transport_sas mdio wmi sunrpc dm_mirror dm_region_hash dm_log scsi_transport_iscsi dm_multipath dm_mod
2016-09-22 10:03:52 [493315.859970] CPU: 2 PID: 0 Comm: swapper/2 Tainted: P           OE  ------------   3.10.0-327.28.2.1chaos.ch6.x86_64 #1
2016-09-22 10:03:52 [493315.872734] Hardware name: Intel Corporation S2600WTTR/S2600WTTR, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016
2016-09-22 10:03:53 [493315.885407]  0000000000000000 bcf7d7e5812e0014 ffff883f7e683d78 ffffffff8164cae7
2016-09-22 10:03:53 [493315.894536]  ffff883f7e683db0 ffffffff8107d6d0 0000000000000000 ffff883f7e6967c0
2016-09-22 10:03:53 [493315.903668]  000000011d5cacb8 ffff883f7e6167c0 0000000000000002 ffff883f7e683dc0
2016-09-22 10:03:53 [493315.912796] Call Trace:
2016-09-22 10:03:53 [493315.916347]  &amp;lt;IRQ&amp;gt;  [&amp;lt;ffffffff8164cae7&amp;gt;] dump_stack+0x19/0x1b
2016-09-22 10:03:53 [493315.923621]  [&amp;lt;ffffffff8107d6d0&amp;gt;] warn_slowpath_common+0x70/0xb0
2016-09-22 10:03:53 [493315.931168]  [&amp;lt;ffffffff8107d81a&amp;gt;] warn_slowpath_null+0x1a/0x20
2016-09-22 10:03:53 [493315.938512]  [&amp;lt;ffffffff81048fdf&amp;gt;] native_smp_send_reschedule+0x5f/0x70
2016-09-22 10:03:53 [493315.946646]  [&amp;lt;ffffffff810cb04d&amp;gt;] trigger_load_balance+0x18d/0x250
2016-09-22 10:03:53 [493315.954390]  [&amp;lt;ffffffff810bbdd3&amp;gt;] scheduler_tick+0x103/0x150
2016-09-22 10:03:53 [493315.961553]  [&amp;lt;ffffffff810e5800&amp;gt;] ? tick_sched_handle.isra.14+0x60/0x60
2016-09-22 10:03:53 [493315.969775]  [&amp;lt;ffffffff81091a06&amp;gt;] update_process_times+0x66/0x80
2016-09-22 10:03:53 [493315.977304]  [&amp;lt;ffffffff810e57c5&amp;gt;] tick_sched_handle.isra.14+0x25/0x60
2016-09-22 10:03:53 [493315.985310]  [&amp;lt;ffffffff810e5841&amp;gt;] tick_sched_timer+0x41/0x70
2016-09-22 10:03:53 [493315.992432]  [&amp;lt;ffffffff810adeda&amp;gt;] __hrtimer_run_queues+0xea/0x2c0
2016-09-22 10:03:53 [493316.000042]  [&amp;lt;ffffffff810ae4e0&amp;gt;] hrtimer_interrupt+0xb0/0x1e0
2016-09-22 10:03:53 [493316.007351]  [&amp;lt;ffffffff8104be47&amp;gt;] local_apic_timer_interrupt+0x37/0x60
2016-09-22 10:03:53 [493316.015442]  [&amp;lt;ffffffff8166000f&amp;gt;] smp_apic_timer_interrupt+0x3f/0x60
2016-09-22 10:03:53 [493316.023338]  [&amp;lt;ffffffff8165e6dd&amp;gt;] apic_timer_interrupt+0x6d/0x80
2016-09-22 10:03:53 [493316.030848]  &amp;lt;EOI&amp;gt;  [&amp;lt;ffffffff810dd69c&amp;gt;] ? ktime_get+0x4c/0xd0
2016-09-22 10:03:53 [493316.038194]  [&amp;lt;ffffffff810b8da6&amp;gt;] ? finish_task_switch+0x56/0x180
2016-09-22 10:03:53 [493316.045803]  [&amp;lt;ffffffff81651df0&amp;gt;] __schedule+0x2e0/0x940
2016-09-22 10:03:53 [493316.052533]  [&amp;lt;ffffffff81653709&amp;gt;] schedule_preempt_disabled+0x39/0x90
2016-09-22 10:03:53 [493316.060533]  [&amp;lt;ffffffff810db1f4&amp;gt;] cpu_startup_entry+0x184/0x2d0
2016-09-22 10:03:53 [493316.067949]  [&amp;lt;ffffffff81049eea&amp;gt;] start_secondary+0x1ca/0x240
2016-09-22 10:03:53 [493316.075162] ---[ end trace 28897805122ddeee ]---
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Filesystem info:&lt;br/&gt;
16 MDS, 4 OSS, running ZFS 0.7.0-0.3llnl and lustre 2.8.0 on a RHEL 7.2 based operating system (3.10.0-327.28.2.1chaos.ch6.x86_64).&lt;/p&gt;

&lt;p&gt;Also worth noting, once we have a directory with files that exhibit this &quot;bad address&quot; error, the directory cannot be removed.&lt;/p&gt;

&lt;p&gt;Let me know if you need more info.&lt;/p&gt;</comment>
                            <comment id="167062" author="dinatale2" created="Fri, 23 Sep 2016 15:37:40 +0000"  >&lt;p&gt;I&apos;m going to attempt to bring our filesystem back up this afternoon, if you could let me know if you have everything you need, that&apos;d be great! Thanks!&lt;/p&gt;</comment>
                            <comment id="167069" author="pjones" created="Fri, 23 Sep 2016 16:11:16 +0000"  >&lt;p&gt;Joe&lt;/p&gt;

&lt;p&gt;Fan Yong is based in China so mayl not see this question until Sunday evening by this time of day&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="167085" author="dinatale2" created="Fri, 23 Sep 2016 17:20:12 +0000"  >&lt;p&gt;Ah, thanks for letting me know, Peter. We are able to reproduce it if necessary, so I think it&apos;s safe to reboot our filesystem.&lt;/p&gt;</comment>
                            <comment id="167158" author="yong.fan" created="Sun, 25 Sep 2016 11:35:26 +0000"  >&lt;p&gt;I will make patch to fix the namespace LFSCK assertion.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;Also worth noting, once we have a directory with files that exhibit this &quot;bad address&quot; error, the directory cannot be removed.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;That is because there are dangling name entries under the parent directory, the dangling name entries cannot be removed via normal unlink/rmdir command, as to the parent directory are not empty. That is why the parent directory cannot be removed under such case.&lt;/p&gt;

&lt;p&gt;To resolve such trouble, you have to use the namespace LFSCK with &quot;-C&quot; option to fix the dangling name entries firstly, then removed them.&lt;/p&gt;</comment>
                            <comment id="167159" author="gerrit" created="Sun, 25 Sep 2016 11:35:54 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/22723&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22723&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; lfsck: cleanup lfsck requests list before exit&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 46dd7d98cb262fbbe1285b447cd763bfc80b27d4&lt;/p&gt;</comment>
                            <comment id="167327" author="morrone" created="Mon, 26 Sep 2016 18:43:44 +0000"  >&lt;blockquote&gt;&lt;p&gt;But I would suggest to run namespace LFSCK to fix related Lustre inconsistency.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;I had done that, but it did not fix the problem.  lfsck ran to completion and did not assert when I ran it.  The assertion is a new thing.  I have no idea why it crashed this time around.&lt;/p&gt;

</comment>
                            <comment id="167417" author="yong.fan" created="Tue, 27 Sep 2016 02:48:15 +0000"  >&lt;p&gt;I think you have specified &quot;-C&quot; option when run the namespace LFSCK completely, right?&lt;br/&gt;
Do you have the Lustre debug log (with &quot;lfsck&quot; debug enabled) when LFSCK ran? That will record which inconsistency have been detected and repaired (or failure). If you have not collected related information, can you show the me the lfsck status lproc output?&lt;/p&gt;

&lt;p&gt;Thanks! &lt;/p&gt;</comment>
                            <comment id="167652" author="dinatale2" created="Wed, 28 Sep 2016 22:00:58 +0000"  >&lt;p&gt;I went ahead and attached a log file called &quot;lfsck_namespace_state-9-28-2016.log&quot; which was obtained by running the following on each MDS:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;pdsh -g mds &apos;lctl get_param -n mdd.$(ldev -l | grep lquake-MDT).lfsck_namespace&apos; | dshbak -c&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Worth noting, when I restarted the filesystem, I had to stop the lfsck namespace check because the kernel panics would continue to occur because lfsck tried picking up where it left off.&lt;/p&gt;

&lt;p&gt;Also, we are creating the name dangling issue at will at the moment with the reproduction steps I provided in my Sept 21, 2016 comment (the one with the linkme.sh). I think that still needs to be addressed.&lt;/p&gt;

&lt;p&gt;I&apos;m also going to break out the lfsck call stack issue to a separate ticket, it is unclear whether or not it is related.&lt;/p&gt;</comment>
                            <comment id="167657" author="dinatale2" created="Wed, 28 Sep 2016 22:19:01 +0000"  >&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt; for lfsck assertion which we started discussing in this ticket.&lt;/p&gt;</comment>
                            <comment id="167841" author="yong.fan" created="Fri, 30 Sep 2016 02:58:28 +0000"  >&lt;p&gt;According to namespace LFSCK status, some dangling name entry should have been fixed:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grep dangling lfsck_namespace_state-9-28-2016.log 
33:dangling_repaired: 423
92:dangling_repaired: 442
151:dangling_repaired: 431
210:dangling_repaired: 437
269:dangling_repaired: 406
328:dangling_repaired: 437
387:dangling_repaired: 440
446:dangling_repaired: 403
505:dangling_repaired: 511
564:dangling_repaired: 434
623:dangling_repaired: 432
682:dangling_repaired: 434
741:dangling_repaired: 540
800:dangling_repaired: 429
859:dangling_repaired: 435
918:dangling_repaired: 411
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;But still some failures when try to repair the striped directories:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# grep failed lfsck_namespace_state-9-28-2016.log | grep -v 0
5:48:striped_shards_failed: 6
75:874:striped_shards_failed: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Unfortunately, if without the detailed LFSCK Lustre kernel debug logs, we cannot know what caused the LFSCK failure. If you can re-run the namespace LFSCK, then please enable &quot;lfsck&quot; debug on the MDTs, and collect the Lustre kernel debug logs.&lt;/p&gt;

&lt;p&gt;Currently, since the the namespace LFSCK failed to fix some inconsistency, if you have to remove those dangling entries some soon, then one possible solution is that: mount the backend as &quot;ZFS&quot; and remove those entries under &quot;ZFS&quot; mode directly. That will leave some stale OI mappings in the system, but it is almost harmless but space waste.&lt;/p&gt;</comment>
                            <comment id="167844" author="adilger" created="Fri, 30 Sep 2016 03:38:26 +0000"  >&lt;p&gt;It may also be possible to use &quot;lfs rm&quot; to remove dangling remote directory entries without trying to unlink the remote inode. That is intended for use in case of an MDT becoming permanently unavailable, but should also work in this case. &lt;/p&gt;</comment>
                            <comment id="167848" author="yong.fan" created="Fri, 30 Sep 2016 08:20:32 +0000"  >&lt;blockquote&gt;
&lt;p&gt;#!/bin/bash&lt;br/&gt;
filename=$(hostname)_${RANDOM}&lt;br/&gt;
ln afile $filename&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;I have ever tried above scripts with multiple clients run in parallel, but cannot reproduce the trouble.&lt;br/&gt;
On the other hand, analysis your scripts: it creates a regular file &apos;afile&apos;, then repeatedly hardlink to it. If the hardlink triggers the issue finally, since nobody unlink the source object &apos;afile&apos;, then the unique possible case (for dangling name entry) is that the FID stored in the hardlink name entry is wrong. But I cannot imagine how this can happen.&lt;/p&gt;

&lt;p&gt;Giuseppe, would you please to reproduce the issue as the way you mentioned with &quot;-1&quot; level Lustre kernel debug logs collected on the MDTs? Thanks!&lt;/p&gt;</comment>
                            <comment id="168388" author="dinatale2" created="Wed, 5 Oct 2016 17:48:35 +0000"  >&lt;p&gt;I can get you some logs soon. Our test system isn&apos;t happy right now. Working on getting it back up so I can reproduce this to get those logs. Stay tuned.&lt;/p&gt;</comment>
                            <comment id="169373" author="bhoagland" created="Wed, 12 Oct 2016 22:16:58 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://jira.whamcloud.com/secure/ViewProfile.jspa?name=dinatale2&quot; class=&quot;user-hover&quot; rel=&quot;dinatale2&quot;&gt;dinatale2&lt;/a&gt;,&lt;br/&gt;
Were you able to get the test system up and reproduce?&lt;/p&gt;</comment>
                            <comment id="169522" author="dinatale2" created="Thu, 13 Oct 2016 19:28:15 +0000"  >&lt;p&gt;Still having issues with it. Will attempt to reproduce this ASAP.&lt;/p&gt;</comment>
                            <comment id="169585" author="dinatale2" created="Fri, 14 Oct 2016 01:00:26 +0000"  >&lt;p&gt;Logs are now attached to this incident. The file names are jet-link-logs-part&lt;span class=&quot;error&quot;&gt;&amp;#91;1-4&amp;#93;&lt;/span&gt;.tar.gz. The part 1 gzip has errors.log in it which has a sampling of what shows up in the console so you can use that to track down a specific file in the logs. Let me know if you need anything else.&lt;/p&gt;</comment>
                            <comment id="170395" author="gerrit" created="Thu, 20 Oct 2016 10:35:47 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/22723/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22723/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; lfsck: cleanup lfsck requests list before exit&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 445da16c2ac0475b1c1077c822800b68cdbb7ce3&lt;/p&gt;</comment>
                            <comment id="170423" author="pjones" created="Thu, 20 Oct 2016 12:34:19 +0000"  >&lt;p&gt;Landed for 2.9&lt;/p&gt;</comment>
                            <comment id="170424" author="pjones" created="Thu, 20 Oct 2016 12:35:36 +0000"  >&lt;p&gt;Actually perhaps I was premature to mark as resolved here. Fan Yong, what did the patch tracked under this ticket that jus tlanded to master address? Is there still work to be tracked under this ticket?&lt;/p&gt;</comment>
                            <comment id="170547" author="dinatale2" created="Fri, 21 Oct 2016 00:32:35 +0000"  >&lt;p&gt;Peter,&lt;/p&gt;

&lt;p&gt;There is still work being tracked under this ticket. The logs I posted last week are to help find a resolution to this issue.&lt;/p&gt;

&lt;p&gt;The patch that landed was for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                            <comment id="170669" author="pjones" created="Sat, 22 Oct 2016 01:33:19 +0000"  >&lt;p&gt;So &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt; was fixed by &lt;a href=&quot;http://git.whamcloud.com/fs/lustre-release.git/commit/445da16c2ac0475b1c1077c822800b68cdbb7ce3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/fs/lustre-release.git/commit/445da16c2ac0475b1c1077c822800b68cdbb7ce3&lt;/a&gt; even though it used the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; JIRA reference in the commit message?&lt;/p&gt;</comment>
                            <comment id="170680" author="yong.fan" created="Sun, 23 Oct 2016 02:19:13 +0000"  >&lt;p&gt;Peter,&lt;/p&gt;

&lt;p&gt;As you can see in the comment history, to make &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; original issues to be clear, the new test failure about the LFSCK was split from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; description with new ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt;. The patch &lt;a href=&quot;http://review.whamcloud.com/22723/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22723/&lt;/a&gt; was used for resolving &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt; issue, but because the patch &lt;a href=&quot;http://review.whamcloud.com/22723/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/22723/&lt;/a&gt; was push to Gerrit before &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt; created, then such patch still used the old ticket number.&lt;/p&gt;

&lt;p&gt;So we can close the ticket &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8647&quot; title=&quot;lfsck_namespace_double_scan()) ASSERTION( list_empty(&amp;amp;lad-&amp;gt;lad_req_list) ) failed&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8647&quot;&gt;&lt;del&gt;LU-8647&lt;/del&gt;&lt;/a&gt; as resolved. There are still some work to be done for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt;. I am investigating the huge logs.&lt;/p&gt;</comment>
                            <comment id="170731" author="pjones" created="Mon, 24 Oct 2016 12:59:41 +0000"  >&lt;p&gt;Got it. For future reference it is possible to make adjustments to git commit messages when landing, so it would have been possible to use the correct JIRA reference without delaying things.&lt;/p&gt;</comment>
                            <comment id="171317" author="di.wang" created="Thu, 27 Oct 2016 06:48:10 +0000"  >&lt;p&gt;Just looked the debug log, it looks like update log is too long, which seems not right.  &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;.............
0x23:47025: 200000020:00000040:9.0:1476399235.972447:0:154190:0:(update_trans.c:93:top_multiple_thandle_dump())  cookie 0x23:47025: 1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;too much log cookies ( &amp;gt; 1k) for this transaction, each cookie can hold 32k update records. So I do not understand why link can generate such big record size. Hmm, even though the linkea size might be big in your test.  (Do we limit linkea size for zfs?)  the problem might be in &lt;br/&gt;
sub_updates_write.  and related with this patch &lt;a href=&quot;http://review.whamcloud.com/21334&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/21334&lt;/a&gt; , I will check.&lt;/p&gt;

&lt;p&gt;I suspect this test might reproduce the problem, sigh, I do not have zfs environment here,&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index c61e3bc..0a3a82c 100755
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -15196,6 +15196,29 @@ test_300q() {
 }
 run_test 300q &quot;create remote directory under orphan directory&quot;
 
+test_300r() {
+       [ $PARALLEL == &quot;yes&quot; ] &amp;amp;&amp;amp; skip &quot;skip parallel run&quot; &amp;amp;&amp;amp; return
+       [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] &amp;amp;&amp;amp;
+               skip &quot;Need MDS version at least 2.7.55&quot; &amp;amp;&amp;amp; return
+       [ $MDSCOUNT -lt 2 ] &amp;amp;&amp;amp; skip &quot;needs &amp;gt;= 2 MDTs&quot; &amp;amp;&amp;amp; return
+       local stripe_count
+       local file
+
+       mkdir $DIR/$tdir
+
+       $LFS setdirstripe -i1 -c3 $DIR/$tdir/remote_dir ||
+               error &quot;set striped dir error&quot;
+
+       touch $DIR/$tdir/$tfile
+       for ((i = 0; i &amp;lt; 50000; i++)); do
+               ln $DIR/$tdir/$tfile $DIR/$tdir/remote_dir/fffffffffffffffffffffffffffffffffffffffff-$i ||
+                       error &quot;ln remote file fails&quot;
+       done
+
+       return 0
+}
+run_test 300r &quot;test remote ln under striped directory&quot;
+
 prepare_remote_file() {
        mkdir $DIR/$tdir/src_dir ||
                error &quot;create remote source failed&quot;

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;





</comment>
                            <comment id="171450" author="di.wang" created="Thu, 27 Oct 2016 21:42:55 +0000"  >&lt;p&gt;Just did some tests on ZFS and it looks like the problem is because the linkEA on ZFS reach above  the llog chunk size (32768), which our current update llog system can not handle.  i.e. one update operation (update op + its parameter) size can not &amp;gt; llog chunk size (32KB).&lt;/p&gt;

&lt;p&gt;So is it ok to limit the linkea size here?&lt;/p&gt;</comment>
                            <comment id="171460" author="adilger" created="Thu, 27 Oct 2016 22:39:05 +0000"  >&lt;p&gt;Yes, I think it is reasonable to limit linkEA size in this case.  The Linux kernel xattr API is also similarly limited by the size of individual xattrs, and ldiskfs has a 4KB limit for xattrs, so the Lustre code is already expecting that not all links will be stored for a given file.&lt;/p&gt;</comment>
                            <comment id="171863" author="gerrit" created="Tue, 1 Nov 2016 04:32:56 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/23500&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/23500&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; linkea: linkEA size limitation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 0d8fe108f7b7f267fa790320954fc55e996af964&lt;/p&gt;</comment>
                            <comment id="173440" author="gerrit" created="Mon, 14 Nov 2016 12:25:30 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/23741&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/23741&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; lfsck: handle linkEA overflow&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 94f5d2fec9edb6e1e5359ceebea9882cb5bb2719&lt;/p&gt;</comment>
                            <comment id="179280" author="gerrit" created="Sun, 1 Jan 2017 01:57:49 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/23500/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/23500/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; linkea: linkEA size limitation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: e760042016bb5b12f9b21568304c02711930720f&lt;/p&gt;</comment>
                            <comment id="180480" author="dinatale2" created="Wed, 11 Jan 2017 18:06:27 +0000"  >&lt;p&gt;Before this closes, can these patches also be ported to the 2.8FE branch?&lt;/p&gt;</comment>
                            <comment id="180488" author="pjones" created="Wed, 11 Jan 2017 18:25:34 +0000"  >&lt;p&gt;Giuseppe&lt;/p&gt;

&lt;p&gt;The ticket will be marked resolved when the patches land to master but the ticket will remain on the LLNL prority list until the equivalent patches have been ported and landed to the 2.8 FE branch&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="181201" author="gerrit" created="Wed, 18 Jan 2017 18:59:08 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/23741/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/23741/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8569&quot; title=&quot;Sharded DNE directory full of files that don&amp;#39;t exist&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8569&quot;&gt;&lt;del&gt;LU-8569&lt;/del&gt;&lt;/a&gt; lfsck: handle linkEA overflow&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 048a8740ae26e3406a7eab3bca383a90490cef93&lt;/p&gt;</comment>
                            <comment id="181210" author="pjones" created="Wed, 18 Jan 2017 19:08:36 +0000"  >&lt;p&gt;All patches landed to master for 2.10. Ports to 2.8 and 2.9 FE branches will be tracked separately.&lt;/p&gt;</comment>
                            <comment id="181277" author="dinatale2" created="Thu, 19 Jan 2017 00:13:07 +0000"  >&lt;p&gt;Peter,&lt;/p&gt;

&lt;p&gt;Are there tasks created so I can keep track of the 2.8 FE port?&lt;/p&gt;

&lt;p&gt;Joe&lt;/p&gt;</comment>
                            <comment id="181284" author="pjones" created="Thu, 19 Jan 2017 00:33:59 +0000"  >&lt;p&gt;We&apos;ll post the links on the ticket and mark with llnlfixready when it&apos;s ready for you to pick up&lt;/p&gt;</comment>
                            <comment id="181537" author="dinatale2" created="Fri, 20 Jan 2017 01:37:28 +0000"  >&lt;p&gt;Apologies Peter, I went ahead and created &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9037&quot; title=&quot;Port LU-8569 to 2.8 FE Branch&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9037&quot;&gt;&lt;del&gt;LU-9037&lt;/del&gt;&lt;/a&gt; to keep track of the porting so those who are interested can keep track of it&apos;s progress.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="40166">LU-8647</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="27290">LU-5802</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="23152" name="getstripelogs.tar.gz" size="234" author="dinatale2" created="Mon, 19 Sep 2016 23:34:58 +0000"/>
                            <attachment id="23426" name="jet-link-logs-part1.tar.gz" size="246" author="dinatale2" created="Fri, 14 Oct 2016 00:56:23 +0000"/>
                            <attachment id="23427" name="jet-link-logs-part2.tar.gz" size="246" author="dinatale2" created="Fri, 14 Oct 2016 00:57:43 +0000"/>
                            <attachment id="23428" name="jet-link-logs-part3.tar.gz" size="246" author="dinatale2" created="Fri, 14 Oct 2016 00:58:32 +0000"/>
                            <attachment id="23429" name="jet-link-logs-part4.tar.gz" size="246" author="dinatale2" created="Fri, 14 Oct 2016 00:59:01 +0000"/>
                            <attachment id="23262" name="lfsck_namespace_state-9-28-2016.log" size="24647" author="dinatale2" created="Wed, 28 Sep 2016 21:43:11 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzymnj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>