<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:50:42 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5346] lod_ah_init()) ASSERTION( lc-&gt;ldo_stripenr == 0 ) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-5346</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;If mdd_create_data() fails after calling mdd_object_make_hint() then the lod object will have ldo_stripenr set. A second call to mdd_create_data() will then trigger the failed assertion in lod_ah_init().&lt;/p&gt;

&lt;p&gt;This was found by memory allocation fault injection.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;llmount.sh
cd /mnt/lustre
sys_mknod f0
echo /root/lustre-release/lustre/osd-ldiskfs/osd_handler.c:945 &amp;gt; /proc/fs/lustre/alloc_fail # fail to allocate the transaction in osd_trans_create()
sys_open f0 w # open(&quot;f0&quot;, O_WRONLY)
sys_open f0 w # open(&quot;f0&quot;, O_WRONLY)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  338.633638] LustreError: 5233:0:(lod_object.c:2606:lod_ah_init()) ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:
[  338.635476] LustreError: 5233:0:(lod_object.c:2606:lod_ah_init()) LBUG
[  338.636652] Pid: 5233, comm: mdt00_005
[  338.637354]
[  338.637355] Call Trace:
[  338.638094]  [&amp;lt;ffffffffa02be8c5&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[  338.639359]  [&amp;lt;ffffffffa02beec7&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[  338.640490]  [&amp;lt;ffffffffa0d5e43c&amp;gt;] lod_ah_init+0xadc/0xb40 [lod]
[  338.641591]  [&amp;lt;ffffffffa0817339&amp;gt;] mdd_object_make_hint+0x139/0x180 [mdd]
[  338.642804]  [&amp;lt;ffffffffa08062a9&amp;gt;] mdd_create_data+0x359/0x7f0 [mdd]
[  338.644047]  [&amp;lt;ffffffffa0cc67dd&amp;gt;] mdt_mfd_open+0xc1d/0xf40 [mdt]
[  338.645182]  [&amp;lt;ffffffffa0cc6d00&amp;gt;] mdt_finish_open+0x200/0xc50 [mdt]
[  338.646351]  [&amp;lt;ffffffffa0cc1255&amp;gt;] ? mdt_object_open_lock+0x345/0x9d0 [mdt]
[  338.647603]  [&amp;lt;ffffffffa0cc7cce&amp;gt;] mdt_open_by_fid_lock+0x57e/0x900 [mdt]
[  338.648821]  [&amp;lt;ffffffffa0cc8c28&amp;gt;] mdt_reint_open+0x8c8/0x20b0 [mdt]
[  338.649974]  [&amp;lt;ffffffff815547cb&amp;gt;] ? _spin_unlock+0x2b/0x40
[  338.650985]  [&amp;lt;ffffffffa02dc60c&amp;gt;] ? upcall_cache_get_entry+0x3dc/0x8a0 [libcfs]
[  338.652347]  [&amp;lt;ffffffffa0475890&amp;gt;] ? lu_ucred+0x20/0x30 [obdclass]
[  338.653490]  [&amp;lt;ffffffffa0c93b85&amp;gt;] ? mdt_ucred+0x15/0x20 [mdt]
[  338.654580]  [&amp;lt;ffffffffa0cac90c&amp;gt;] ? mdt_root_squash+0x2c/0x3f0 [mdt]
[  338.655786]  [&amp;lt;ffffffffa06b4236&amp;gt;] ? __req_capsule_get+0x166/0x6e0 [ptlrpc]
[  338.657037]  [&amp;lt;ffffffffa0cb07a1&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
[  338.658132]  [&amp;lt;ffffffffa0c9baf3&amp;gt;] mdt_reint_internal+0x4c3/0x7c0 [mdt]
[  338.659314]  [&amp;lt;ffffffffa0c9bfe6&amp;gt;] mdt_intent_reint+0x1f6/0x520 [mdt]
[  338.660473]  [&amp;lt;ffffffffa0c9a6c9&amp;gt;] mdt_intent_policy+0x499/0xca0 [mdt]
[  338.661691]  [&amp;lt;ffffffffa0645422&amp;gt;] ldlm_lock_enqueue+0x302/0x920 [ptlrpc]
[  338.662931]  [&amp;lt;ffffffffa066d651&amp;gt;] ldlm_handle_enqueue0+0x341/0x11e0 [ptlrpc]
[  338.664249]  [&amp;lt;ffffffffa06ec9a2&amp;gt;] tgt_enqueue+0x62/0x1d0 [ptlrpc]
[  338.665403]  [&amp;lt;ffffffffa06ebc35&amp;gt;] tgt_request_handle+0x245/0xad0 [ptlrpc]
[  338.666663]  [&amp;lt;ffffffffa069ed91&amp;gt;] ptlrpc_main+0xcf1/0x1880 [ptlrpc]
[  338.667841]  [&amp;lt;ffffffffa069e0a0&amp;gt;] ? ptlrpc_main+0x0/0x1880 [ptlrpc]
[  338.668982]  [&amp;lt;ffffffff8109eab6&amp;gt;] kthread+0x96/0xa0
[  338.669902]  [&amp;lt;ffffffff8100c30a&amp;gt;] child_rip+0xa/0x20
[  338.670806]  [&amp;lt;ffffffff81554710&amp;gt;] ? _spin_unlock_irq+0x30/0x40
[  338.671868]  [&amp;lt;ffffffff8100bb10&amp;gt;] ? restore_args+0x0/0x30
[  338.672848]  [&amp;lt;ffffffff8109ea20&amp;gt;] ? kthread+0x0/0xa0
[  338.673761]  [&amp;lt;ffffffff8100c300&amp;gt;] ? child_rip+0x0/0x20
[  338.674690]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="25578">LU-5346</key>
            <summary>lod_ah_init()) ASSERTION( lc-&gt;ldo_stripenr == 0 ) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="jhammond">John Hammond</reporter>
                        <labels>
                            <label>lod</label>
                            <label>malloc</label>
                    </labels>
                <created>Mon, 14 Jul 2014 23:36:58 +0000</created>
                <updated>Tue, 23 Feb 2016 08:08:21 +0000</updated>
                                            <version>Lustre 2.6.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="90250" author="jhammond" created="Mon, 28 Jul 2014 23:00:30 +0000"  >&lt;p&gt;Alex can you suggest the best way to clean up the partially set striping data (ldo_stripenr, ldo_stripe) that is created by a failure in mdd_create_data()? Should we add a function to the DT API (dt_create_data_fini()) to handle this case?&lt;/p&gt;</comment>
                            <comment id="90259" author="bzzz" created="Tue, 29 Jul 2014 03:17:51 +0000"  >&lt;p&gt;given all this data created by lod_ah_init() is in-core only, I&apos;d suggest to either: 1) recognize we have it set and return immediately 2) just re-set everything from the beginning? would it be possible?&lt;/p&gt;</comment>
                            <comment id="90344" author="jhammond" created="Tue, 29 Jul 2014 18:06:37 +0000"  >&lt;p&gt;I&apos;m OK with that but it&apos;s not enough. If mdd_create_data() fails after lod_ah_init() has returned then we have an object with ldo_stripenr != 0 but ldo_stripe == NULL. Operations on this object will trigger several assertions on lod. If it fails after lod_declare_xattr_set() then we need to call lod_object_free_striping() but the API does not offer us a convenient way to do so.&lt;/p&gt;

&lt;p&gt;The first problem is not so hard to fix. I think that lod_ah_init() should not be setting ldo_stripenr. We have some holes in lod_object where lod_ah_init() could put a ldo_stripe_count_hint that we pickup later.&lt;/p&gt;

&lt;p&gt;The second problem of failure after declare is trickier.&lt;/p&gt;</comment>
                            <comment id="90355" author="bzzz" created="Tue, 29 Jul 2014 18:57:02 +0000"  >&lt;p&gt;iirc, we discussed a related issue in the past - currently locks are supposed to be taken &quot;inside&quot; transaction (still an open question how good is that, but benefits are known). this mean that intermediate state between &quot;declare&quot; and &quot;execute&quot; is exposed to other threads. which isn&apos;t good, of course. Ideally &quot;declare&quot; should be working with own copy of data (say, stored somewhere in transaction which is private), while &quot;execute&quot; should use that private data and apply changes atomically. this would solve this issue as well because that private data would be released automatically with -&amp;gt;do_trans_stop().  I&apos;ll think of that (again, sorry).&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwrfj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>14912</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>