<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:50:14 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5294] mdd_unlink() returning -7</title>
                <link>https://jira.whamcloud.com/browse/LU-5294</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Any client on this filesystem will get back -7 from an unlink or rm. Creates, reads, writes work fine. We tried multiple users, and found no difference. Upcall is successful:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;/usr/sbin/l_getidentity linkfarm-MDT0000 0&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;+trace and +rpctrace debugs were captured on the MDT while performing the unlink on a client 10.36.226.85@o2ib&lt;/p&gt;

&lt;p&gt;I can upload a full debug log. This is easy to recreate and capture, so just let me know which debug flags would useful.&lt;/p&gt;

&lt;p&gt;00000040:00000001:19.0:1404405371.756302:0:13436:0:(llog_osd.c:317:llog_osd_declare_write_rec()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000040:00000001:19.0:1404405371.756303:0:13436:0:(llog.c:714:llog_declare_write_rec()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000040:00000001:19.0:1404405371.756303:0:13436:0:(llog_cat.c:443:llog_cat_declare_add_rec()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000040:00000001:19.0:1404405371.756304:0:13436:0:(llog.c:790:llog_declare_add()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756305:0:13436:0:(osp_sync.c:206:osp_sync_declare_add()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756306:0:13436:0:(osp_object.c:322:osp_declare_object_destroy()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756307:0:13436:0:(lod_object.c:1044:lod_declare_object_destroy()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756309:0:13436:0:(lod_object.c:434:lod_declare_xattr_set()) Process entered&lt;br/&gt;
00000004:00000001:19.0:1404405371.756310:0:13436:0:(lod_object.c:464:lod_declare_xattr_set()) Process leaving (rc=18446744073709551609 : -7 : fffffffffffffff9)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756311:0:13436:0:(mdd_dir.c:1422:mdd_unlink()) Process leaving via stop (rc=18446744073709551609 : -7 : 0xfffffffffffffff9)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756313:0:13436:0:(osd_handler.c:915:osd_trans_stop()) Process entered&lt;br/&gt;
00040000:00000001:19.0:1404405371.756314:0:13436:0:(qsd_handler.c:1074:qsd_op_end()) Process entered&lt;br/&gt;
00040000:00000001:19.0:1404405371.756315:0:13436:0:(qsd_handler.c:1102:qsd_op_end()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756316:0:13436:0:(osd_handler.c:968:osd_trans_stop()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756317:0:13436:0:(mdt_reint.c:868:mdt_reint_unlink()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756317:0:13436:0:(mdt_handler.c:2791:mdt_object_unlock()) Process entered&lt;br/&gt;
00000004:00000001:19.0:1404405371.756318:0:13436:0:(mdt_handler.c:2739:mdt_save_lock()) Process entered&lt;/p&gt;

&lt;p&gt;...&lt;br/&gt;
00000004:00000001:19.0:1404405371.756396:0:13436:0:(mdt_internal.h:584:mdt_object_put()) Process entered&lt;br/&gt;
00000020:00000001:19.0:1404405371.756397:0:13436:0:(lustre_fid.h:715:fid_flatten32()) Process leaving (rc=4196255 : 4196255 : 40079f)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756398:0:13436:0:(mdt_internal.h:586:mdt_object_put()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756398:0:13436:0:(mdt_reint.c:1375:mdt_reint_rec()) Process leaving (rc=18446744073709551609 : -7 : fffffffffffffff9)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756399:0:13436:0:(mdt_handler.c:1832:mdt_reint_internal()) Process leaving&lt;br/&gt;
02000000:00000001:19.0:1404405371.756400:0:13436:0:(upcall_cache.c:276:upcall_cache_put_entry()) Process entered&lt;br/&gt;
02000000:00000001:19.0:1404405371.756400:0:13436:0:(upcall_cache.c:287:upcall_cache_put_entry()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756401:0:13436:0:(mdt_handler.c:429:mdt_client_compatibility()) Process entered&lt;br/&gt;
00000004:00000001:19.0:1404405371.756401:0:13436:0:(mdt_handler.c:433:mdt_client_compatibility()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756402:0:13436:0:(mdt_lib.c:572:mdt_fix_reply()) Process entered&lt;br/&gt;
00000004:00000001:19.0:1404405371.756403:0:13436:0:(mdt_lib.c:671:mdt_fix_reply()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756404:0:13436:0:(mdt_handler.c:1898:mdt_reint()) Process leaving (rc=18446744073709551609 : -7 : fffffffffffffff9)&lt;br/&gt;
00010000:00000001:19.0:1404405371.756405:0:13436:0:(ldlm_lib.c:2440:target_send_reply()) Process entered&lt;br/&gt;
00010000:00000001:19.0:1404405371.756406:0:13436:0:(ldlm_lib.c:2393:target_pack_pool_reply()) Process entered&lt;br/&gt;
00010000:00000001:19.0:1404405371.756407:0:13436:0:(ldlm_lib.c:2412:target_pack_pool_reply()) Process leaving (rc=0 : 0 : 0)&lt;/p&gt;

&lt;p&gt;...&lt;br/&gt;
00010000:00000001:19.0:1404405371.756416:0:13436:0:(ldlm_lib.c:2452:target_send_reply()) Process leaving&lt;br/&gt;
00000004:00000001:19.0:1404405371.756417:0:13436:0:(mdt_handler.c:3103:mdt_req_handle()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756417:0:13436:0:(mdt_handler.c:3429:mdt_handle0()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000004:00000001:19.0:1404405371.756418:0:13436:0:(mdt_handler.c:3463:mdt_handle_common()) Process leaving (rc=0 : 0 : 0)&lt;br/&gt;
00000100:00100000:19.0:1404405371.756421:0:13436:0:(service.c:2055:ptlrpc_server_handle_request()) Handled RPC pname:cluuid+ref:pid:xid:nid:opc mdt03_035:12cfc1a2-8c1a-ebe6-ebdb-eb076741d9d3+16:27222:x1472624384278960:12345-10.36.226.85@o2ib:36 Request procesed in 395us (422us total) trans 0 rc -7/-7&lt;br/&gt;
00000100:00100000:19.0:1404405371.756424:0:13436:0:(nrs_fifo.c:244:nrs_fifo_req_stop()) NRS stop fifo request from 12345-10.36.226.85@o2ib, seq: 2243836&lt;/p&gt;</description>
                <environment>RHEL 6.4, kernel 2.6.32_358.23.2.el6,</environment>
        <key id="25428">LU-5294</key>
            <summary>mdd_unlink() returning -7</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="green">Oleg Drokin</assignee>
                                    <reporter username="blakecaldwell">Blake Caldwell</reporter>
                        <labels>
                    </labels>
                <created>Thu, 3 Jul 2014 16:55:48 +0000</created>
                <updated>Mon, 14 Jul 2014 15:55:21 +0000</updated>
                            <resolved>Mon, 14 Jul 2014 15:55:21 +0000</resolved>
                                    <version>Lustre 2.4.3</version>
                                                        <due></due>
                            <votes>1</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="88123" author="jamesanunez" created="Thu, 3 Jul 2014 17:52:38 +0000"  >&lt;p&gt;Mike, &lt;/p&gt;

&lt;p&gt;Would you please take a look at this issue and comment?&lt;/p&gt;

&lt;p&gt;Thank you,&lt;br/&gt;
James&lt;/p&gt;</comment>
                            <comment id="88125" author="green" created="Thu, 3 Jul 2014 18:08:14 +0000"  >&lt;p&gt;Is this vanilla 2.4.3 (servers)?&lt;br/&gt;
Is this for any file, or a specific set of files, wide striping in use?&lt;br/&gt;
1.8 clients or any version clients&lt;/p&gt;</comment>
                            <comment id="88130" author="blakecaldwell" created="Thu, 3 Jul 2014 18:24:03 +0000"  >&lt;p&gt;This is the same 2.4.3 as the rest of our production servers with several patches listed below. It is for any file on the filesystem. Wide striping is not in use (only 1 OST) on this filesystem. Any client (including 1.8) can reproduce this error.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4006&quot; title=&quot;LNET Messages staying in Queue&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4006&quot;&gt;&lt;del&gt;LU-4006&lt;/del&gt;&lt;/a&gt; (landed to 2.6, not sure if we&#185;ve thoroughly tested)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4008&quot; title=&quot;Reduce vmalloc usage on MDS&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4008&quot;&gt;&lt;del&gt;LU-4008&lt;/del&gt;&lt;/a&gt; (3 older patches in our version, 2 landed to master, working&lt;br/&gt;
toward landing in 2.5)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4019&quot; title=&quot;today&amp;#39;s master stick on shutdown on test == sanity test 132: on lu_object_find_at&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4019&quot;&gt;&lt;del&gt;LU-4019&lt;/del&gt;&lt;/a&gt; (in 2.5)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4403&quot; title=&quot;ASSERTION( lock-&amp;gt;l_readers &amp;gt; 0 )&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4403&quot;&gt;&lt;del&gt;LU-4403&lt;/del&gt;&lt;/a&gt; (landed to 2.6)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4570&quot; title=&quot;Metadata slowdowns on production filesystem at ORNL&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4570&quot;&gt;&lt;del&gt;LU-4570&lt;/del&gt;&lt;/a&gt; (debug-only patch)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4719&quot; title=&quot;mdt_dump_lmm crashes for directories created with a  large stripe count.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4719&quot;&gt;&lt;del&gt;LU-4719&lt;/del&gt;&lt;/a&gt; (landed to 2.6)&lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; (marked as merged in Gerrit, but no &#179;Fix Version&quot; Jira)&lt;/p&gt;</comment>
                            <comment id="88144" author="green" created="Thu, 3 Jul 2014 21:09:51 +0000"  >&lt;p&gt;Hm, kind of weird in that apparently whatever was the function that returned -7, it&apos;s not in your trace.&lt;br/&gt;
It looks like one possible candidate would be osp_insert_update called from osp_md_declare_xattr_set(), but it should print a CERROR and it also has entry/exits inside of it so it should be visible.&lt;/p&gt;

&lt;p&gt;I assume you do not get any messages in dmesg on mds as well?&lt;/p&gt;

&lt;p&gt;Did this appear out of a sudden after you applied some patchesm or did this combination of patches worked until it stopped working?&lt;/p&gt;</comment>
                            <comment id="88148" author="blakecaldwell" created="Thu, 3 Jul 2014 21:40:16 +0000"  >&lt;p&gt;Nope, no dmesg output on mds. I&apos;m attaching a complete log. This didn&apos;t correlate to any patch changes (we&apos;ve been on this one for a couple months maybe now). We had strange things going on with DNS (outage this week) and syslog (was blocking on misconfigured server), but they have been resolved. I was initially looking for a correlation to an upcall result, but that part seems fine.&lt;/p&gt;</comment>
                            <comment id="88152" author="green" created="Thu, 3 Jul 2014 22:16:56 +0000"  >&lt;p&gt;Well, I assume you already trid to reboot the MDS and this did not clear the condition, so perhaps we should add a small patch as a firs tstep that would print the ops address that returned the failure and then we&apos;ll trace where did it came from and perhaps add more debug in there if it&apos;s still unclear of what&apos;s going on?&lt;br/&gt;
Would this be workable for you?&lt;/p&gt;</comment>
                            <comment id="88154" author="blakecaldwell" created="Thu, 3 Jul 2014 22:25:40 +0000"  >&lt;p&gt;We haven&apos;t rebooted the MDS yet, but since that sounds like the next step, we will plan on doing that. Probably Tuesday if we can hobble along in this mode for a bit.&lt;/p&gt;</comment>
                            <comment id="88160" author="green" created="Thu, 3 Jul 2014 23:06:05 +0000"  >&lt;p&gt;Is there anything else you will be able to do on the system before then?&lt;/p&gt;

&lt;p&gt;Also can you grab an mds log at -1 debug level while reproducing the error on the off chace it might catch more messages at some other level and shine some more light at what&apos;s going on?&lt;/p&gt;</comment>
                            <comment id="88161" author="jfc" created="Thu, 3 Jul 2014 23:28:59 +0000"  >&lt;p&gt;Thank you for picking this up Oleg.&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="88169" author="adilger" created="Fri, 4 Jul 2014 02:42:45 +0000"  >&lt;p&gt;It may be that this is caused by the file having a too-large ACL xattr?  I recall a similar problem in the past, maybe the patch is not landed on b2_4?&lt;/p&gt;

&lt;p&gt;Alternately, does this filesystem have wide striping enabled (for more than 160 OSTs)?&lt;/p&gt;</comment>
                            <comment id="88171" author="ezell" created="Fri, 4 Jul 2014 03:26:53 +0000"  >&lt;blockquote&gt;&lt;p&gt;It may be that this is caused by the file having a too-large ACL xattr? I recall a similar problem in the past, maybe the patch is not landed on b2_4?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;We don&apos;t use and typically disable ACLs on our file systems.  This seems to be happening for all files.&lt;/p&gt;

&lt;blockquote&gt;&lt;p&gt;Alternately, does this filesystem have wide striping enabled (for more than 160 OSTs)?&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;This is our &quot;link farm&quot; file system.  It houses symlinks for all users that point to one of the two large file systems.  It&apos;s just 1 OST, so no wide striping.&lt;/p&gt;</comment>
                            <comment id="88176" author="adilger" created="Fri, 4 Jul 2014 05:21:05 +0000"  >&lt;p&gt;Can you please post the output of &lt;tt&gt;dumpe2fs -h /dev/mdtdev&lt;/tt&gt; on the MDS. In particular, I&apos;d like to check the features that are enabled on the filesystem.&lt;/p&gt;

&lt;p&gt;Also, is this problem specific to some files, or is this happening for all files?  If it is specific to certain files, could you please run &lt;tt&gt;debugfs -c -R &quot;stat /ROOT/path/to/file&quot; /dev/mdtdev&lt;/tt&gt;, where &lt;tt&gt;/path/to/file&lt;/tt&gt; is just the part below the mountpoint (i.e. excluding the &quot;/mnt/linkfarm&quot; part, or whatever). &lt;/p&gt;</comment>
                            <comment id="88298" author="blakecaldwell" created="Mon, 7 Jul 2014 15:34:09 +0000"  >&lt;p&gt;Some updates after trying the above:&lt;/p&gt;

&lt;p&gt;The issue is still present after rebooting the mds. After setting lnet.printk to -1 it overwhelmed rsyslog, and necessitated a reboot.  We need to fix rsyslog to drop messages.&lt;/p&gt;

&lt;p&gt;Attached is a debug log with all options turned on in /proc/sys/lnet/debug&lt;/p&gt;

&lt;p&gt;Also, as requested, I&apos;m attaching the output from debugfs &quot;stat $FILE&quot; /dev/mdt and dumpe2fs -h /dev/mdtdev. The version of e2fsprogs is a little out of date, so I noticed that no &quot;parent&quot; fid was given (e2fsprogs-1.42.9.wc1-7.el6). Ideally it&apos;d be best t wait until tomorrow if we need to update that version on the running nodes.&lt;/p&gt;
</comment>
                            <comment id="88565" author="green" created="Wed, 9 Jul 2014 02:53:18 +0000"  >&lt;p&gt;hm, still nothing additionally useful in the full debug log.&lt;/p&gt;

&lt;p&gt;I inspected the code again. lod_object.c:464 looks like this in my 2.4.3 tree:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        rc = dt_declare_xattr_set(env, next, buf, name, fl, th);

        RETURN(rc);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So the error must have come from that is declared as:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;static inline int dt_declare_xattr_set(const struct lu_env *env,
                                      struct dt_object *dt,
                                      const struct lu_buf *buf,
                                      const char *name, int fl,
                                      struct thandle *th)
{
        LASSERT(dt);
        LASSERT(dt-&amp;gt;do_ops);
        LASSERT(dt-&amp;gt;do_ops-&amp;gt;do_declare_xattr_set);
        return dt-&amp;gt;do_ops-&amp;gt;do_declare_xattr_set(env, dt, buf, name, fl, th);
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Now, I checked the code for all definitions of do_declare_xattr_set and there are only four:&lt;br/&gt;
lod/lod_object.c:	.do_declare_xattr_set	= lod_declare_xattr_set, &amp;#8211; This one we are called from already. also it has entry/exit, so if there was a recursion, we&apos;d see it in the log.&lt;br/&gt;
osd-zfs/osd_object.c:	.do_declare_xattr_set	= osd_declare_xattr_set, &amp;#8211; This is zfs-only and you don&apos;t use ZFS, additionally there&apos;s ENTRY at the start of that function that we would see but don&apos;t&lt;br/&gt;
osp/osp_md_object.c:	.do_declare_xattr_set = osp_md_declare_xattr_set, &amp;#8211; This should only happen for DNE case that you don&apos;t seem to be having. Additionally while this function doe snot have ENTRY/EXIT, it calls into functions that do. Additionally there&apos;s CERROR in case of error there.&lt;br/&gt;
osd-ldiskfs/osd_handler.c:        .do_declare_xattr_set = osd_declare_xattr_set, &amp;#8211; This one would be the one that failed I would guess, except it&apos;s always returning 0:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;static int osd_declare_xattr_set(const struct lu_env *env,
                                 struct dt_object *dt,
                                 const struct lu_buf *buf, const char *name,
                                 int fl, struct thandle *handle)
{
        struct osd_thandle *oh;

        LASSERT(handle != NULL);

        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh-&amp;gt;ot_handle == NULL);

        osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET,
                             strcmp(name, XATTR_NAME_VERSION) == 0 ?
                             osd_dto_credits_noquota[DTO_ATTR_SET_BASE] :
                             osd_dto_credits_noquota[DTO_XATTR_SET]);

        return 0;
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Master version of osd_declare_xattr_set is defined a bit differently, but still always returns 0.&lt;br/&gt;
Does your version of osd_declare_xattr_set in osd-ldiskfs/ look the same?&lt;br/&gt;
If so, I am totally out of ideas about how ANY error could emerge from lod_declare_xattr_set.&lt;/p&gt;</comment>
                            <comment id="88667" author="blakecaldwell" created="Thu, 10 Jul 2014 00:34:33 +0000"  >&lt;p&gt;Thanks Oleg. From that I found that we have patch set 2 of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; rather than patch set 3 that was merged&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/9837/2..3/lustre/osd-ldiskfs/osd_handler.c,cm&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9837/2..3/lustre/osd-ldiskfs/osd_handler.c,cm&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The conditional that returns -E2BIG was marked &quot;unlikely&quot;. Any idea what&apos;s causing the comparison to fail now?&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/9837/1/lustre/osd-ldiskfs/osd_handler.c,cm&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9837/1/lustre/osd-ldiskfs/osd_handler.c,cm&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;The relevant pieces of our &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt;.patch:&lt;br/&gt;
@@ -2713,10 +2722,30 @@ static int osd_declare_xattr_set(const struct lu_env *env,&lt;br/&gt;
                                  const struct lu_buf *buf, const char *name,&lt;br/&gt;
                                  int fl, struct thandle *handle)&lt;br/&gt;
 {&lt;br/&gt;
+   struct osd_device  *osd = osd_dev(dt-&amp;gt;do_lu.lo_dev);&lt;br/&gt;
+   struct super_block *sb = osd_sb(osd);&lt;br/&gt;
    struct osd_thandle *oh;&lt;br/&gt;
+   int max_ea_size;&lt;br/&gt;
+   int ea_overhead;&lt;br/&gt;
+   &lt;/p&gt;

&lt;p&gt;    LASSERT(handle != NULL);&lt;/p&gt;

&lt;p&gt;+#if defined(LDISKFS_FEATURE_INCOMPAT_EA_INODE)&lt;br/&gt;
+   if (LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EA_INODE))&lt;br/&gt;
+       max_ea_size = LDISKFS_XATTR_MAX_LARGE_EA_SIZE;&lt;br/&gt;
+   else&lt;br/&gt;
+#endif&lt;br/&gt;
+       max_ea_size = sb-&amp;gt;s_blocksize;&lt;br/&gt;
+&lt;br/&gt;
+   /* we also need take account in the overhead,&lt;br/&gt;
+    * xattr_header + magic + xattr_entry_head */&lt;br/&gt;
+   ea_overhead = sizeof(struct ldiskfs_xattr_header) + sizeof(__u32) +&lt;br/&gt;
+             LDISKFS_XATTR_LEN(XATTR_NAME_MAX_LEN);&lt;br/&gt;
+&lt;br/&gt;
+   if (buf-&amp;gt;lb_len &amp;gt; max_ea_size - ea_overhead)&lt;br/&gt;
+       return -E2BIG;&lt;br/&gt;
+&lt;br/&gt;
    oh = container_of0(handle, struct osd_thandle, ot_super);&lt;br/&gt;
    LASSERT(oh-&amp;gt;ot_handle == NULL);&lt;/p&gt;</comment>
                            <comment id="88673" author="green" created="Thu, 10 Jul 2014 03:47:19 +0000"  >&lt;p&gt;So, according to comments in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt;, bull hit the exact same problem yo did with patchset 2 and Wangdi explains that he failed to take xattr overhead into account, that&apos;s why the patchset 3 was created.&lt;/p&gt;

&lt;p&gt;In the end I think you just need to drop patchet 2 from your tree and add patchset 3.&lt;/p&gt;</comment>
                            <comment id="88766" author="simmonsja" created="Thu, 10 Jul 2014 20:21:51 +0000"  >&lt;p&gt;I merged the lastest version of the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; patch as well as include the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4260&quot; title=&quot;ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4260&quot;&gt;&lt;del&gt;LU-4260&lt;/del&gt;&lt;/a&gt; fix. I have tested on the small scale with no problems. Once it is production and we have no problems it will be posted here.&lt;/p&gt;</comment>
                            <comment id="88767" author="simmonsja" created="Thu, 10 Jul 2014 20:22:41 +0000"  >&lt;p&gt;One more note. Lustre 2.5 already has all the correct needed fixes.&lt;/p&gt;</comment>
                            <comment id="88932" author="blakecaldwell" created="Mon, 14 Jul 2014 13:32:29 +0000"  >&lt;p&gt;Our new build with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; (patch set 3) and also &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4260&quot; title=&quot;ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4260&quot;&gt;&lt;del&gt;LU-4260&lt;/del&gt;&lt;/a&gt; resolved this issue. The file in question could be deleted. Thanks Oleg/James!&lt;/p&gt;</comment>
                            <comment id="88938" author="simmonsja" created="Mon, 14 Jul 2014 15:09:08 +0000"  >&lt;p&gt;This ticket can be closed.&lt;/p&gt;</comment>
                            <comment id="88943" author="jfc" created="Mon, 14 Jul 2014 15:55:21 +0000"  >&lt;p&gt;Thanks James!&lt;br/&gt;
~ jfc&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="15300" name="dumpe2fs.atlas-linkfarm.mdt0" size="1975" author="blakecaldwell" created="Mon, 7 Jul 2014 15:34:35 +0000"/>
                            <attachment id="15287" name="lustre.log.atlas-linkfarm" size="782715" author="blakecaldwell" created="Thu, 3 Jul 2014 21:40:32 +0000"/>
                            <attachment id="15301" name="lustre.log.atlas-linkfarm.all" size="3696185" author="blakecaldwell" created="Mon, 7 Jul 2014 15:36:21 +0000"/>
                            <attachment id="15303" name="test_file.debugfs_stat" size="1176" author="blakecaldwell" created="Mon, 7 Jul 2014 15:36:30 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwqk7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14769</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>