<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:22:23 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2102] changelog_user_init_cb())  ASSERTION( rec-&gt;cur_hdr.lrh_type == CHANGELOG_USER_REC ) </title>
                <link>https://jira.whamcloud.com/browse/LU-2102</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Two of my nodes running sanity in a loop crashed with this overnight.&lt;br/&gt;
I have a crashdump from one of the occurrences if somebody needs something from there, but tell me soon while I still have the modules and vmlinux for it.&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[ 4343.790166] LDISKFS-fs (loop0): mounted filesystem with ordered data mode. quota=on. Opts: 
[ 4343.832370] Lustre: MGC192.168.10.210@tcp: Reactivating &lt;span class=&quot;code-keyword&quot;&gt;import&lt;/span&gt;
[ 4343.834877] Lustre: Found index 0 &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; lustre-MDT0000, updating log
[ 4343.841331] Lustre: Modifying parameter lustre-MDT0000-mdtlov.lov.stripesize in log lustre-MDT0000
[ 4343.847226] LustreError: 20504:0:(mgc_request.c:248:do_config_log_add()) failed processing sptlrpc log: -2
[ 4343.874879] Lustre: lustre-MDT0000: used disk, loading
[ 4343.875534] LustreError: 20552:0:(sec_config.c:1024:sptlrpc_target_local_copy_conf()) missing llog context
[ 4344.061652] Lustre: 20552:0:(mdt_lproc.c:418:lprocfs_wr_identity_upcall()) lustre-MDT0000: identity upcall set to /home/green/git/lustre-release/lustre/utils/l_getidentity
[ 4344.072933] Lustre: lustre-MDT0000: Temporarily refusing client connection from 0@lo
[ 4344.073894] LustreError: 11-0: an error occurred &lt;span class=&quot;code-keyword&quot;&gt;while&lt;/span&gt; communicating with 0@lo. The mds_connect operation failed with -11
[ 4344.080580] LustreError: 20504:0:(mdd_device.c:219:changelog_user_init_cb()) ASSERTION( rec-&amp;gt;cur_hdr.lrh_type == CHANGELOG_USER_REC ) failed: 
[ 4344.081572] LustreError: 20504:0:(mdd_device.c:219:changelog_user_init_cb()) LBUG
[ 4344.082363] Pid: 20504, comm: mount.lustre
[ 4344.082776] 
[ 4344.082776] Call Trace:
[ 4344.083461]  [&amp;lt;ffffffffa0b24915&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[ 4344.083980]  [&amp;lt;ffffffffa0b24f27&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[ 4344.084564]  [&amp;lt;ffffffffa05a9d57&amp;gt;] changelog_user_init_cb+0x127/0x170 [mdd]
[ 4344.086120]  [&amp;lt;ffffffffa044b568&amp;gt;] llog_reverse_process+0x5d8/0x9c0 [obdclass]
[ 4344.086707]  [&amp;lt;ffffffffa05a9c30&amp;gt;] ? changelog_user_init_cb+0x0/0x170 [mdd]
[ 4344.087253]  [&amp;lt;ffffffffa044e18e&amp;gt;] llog_cat_reverse_process_cb+0x17e/0x260 [obdclass]
[ 4344.088137]  [&amp;lt;ffffffffa044b568&amp;gt;] llog_reverse_process+0x5d8/0x9c0 [obdclass]
[ 4344.088699]  [&amp;lt;ffffffffa044e010&amp;gt;] ? llog_cat_reverse_process_cb+0x0/0x260 [obdclass]
[ 4344.089540]  [&amp;lt;ffffffffa044da30&amp;gt;] ? cat_cancel_cb+0x0/0x5e0 [obdclass]
[ 4344.090117]  [&amp;lt;ffffffffa044cdd8&amp;gt;] llog_cat_reverse_process+0x78/0x260 [obdclass]
[ 4344.090802]  [&amp;lt;ffffffffa05a9c30&amp;gt;] ? changelog_user_init_cb+0x0/0x170 [mdd]
[ 4344.091060]  [&amp;lt;ffffffffa044ca54&amp;gt;] ? llog_process+0x14/0x20 [obdclass]
[ 4344.091301]  [&amp;lt;ffffffffa05af69a&amp;gt;] mdd_prepare+0xe2a/0x1140 [mdd]
[ 4344.091948]  [&amp;lt;ffffffffa0c147da&amp;gt;] mdt_prepare+0x5a/0x14a0 [mdt]
[ 4344.092209]  [&amp;lt;ffffffffa04a2ade&amp;gt;] server_start_targets+0x147e/0x1d90 [obdclass]
[ 4344.092599]  [&amp;lt;ffffffffa048e050&amp;gt;] ? class_config_llog_handler+0x0/0x1800 [obdclass]
[ 4344.092967]  [&amp;lt;ffffffffa04a4798&amp;gt;] lustre_fill_super+0x13a8/0x1af0 [obdclass]
[ 4344.093195]  [&amp;lt;ffffffff8117d060&amp;gt;] ? set_anon_super+0x0/0x110
[ 4344.093411]  [&amp;lt;ffffffffa04a33f0&amp;gt;] ? lustre_fill_super+0x0/0x1af0 [obdclass]
[ 4344.093660]  [&amp;lt;ffffffff8117e4cf&amp;gt;] get_sb_nodev+0x5f/0xa0
[ 4344.093874]  [&amp;lt;ffffffffa048f955&amp;gt;] lustre_get_sb+0x25/0x30 [obdclass]
[ 4344.094090]  [&amp;lt;ffffffff8117e12b&amp;gt;] vfs_kern_mount+0x7b/0x1b0
[ 4344.094293]  [&amp;lt;ffffffff8117e2d2&amp;gt;] do_kern_mount+0x52/0x130
[ 4344.094504]  [&amp;lt;ffffffff8119c992&amp;gt;] do_mount+0x2d2/0x8c0
[ 4344.094714]  [&amp;lt;ffffffff8119d010&amp;gt;] sys_mount+0x90/0xe0
[ 4344.094911]  [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
[ 4344.095118] 
[ 4344.096049] Kernel panic - not syncing: LBUG
[ 4344.096051] Pid: 20504, comm: mount.lustre Not tainted 2.6.32-debug #6
[ 4344.096053] Call Trace:
[ 4344.096059]  [&amp;lt;ffffffff814f75e4&amp;gt;] ? panic+0xa0/0x168
[ 4344.096069]  [&amp;lt;ffffffffa0b24f7b&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
[ 4344.096076]  [&amp;lt;ffffffffa05a9d57&amp;gt;] ? changelog_user_init_cb+0x127/0x170 [mdd]
[ 4344.096092]  [&amp;lt;ffffffffa044b568&amp;gt;] ? llog_reverse_process+0x5d8/0x9c0 [obdclass]
[ 4344.096097]  [&amp;lt;ffffffffa05a9c30&amp;gt;] ? changelog_user_init_cb+0x0/0x170 [mdd]
[ 4344.096112]  [&amp;lt;ffffffffa044e18e&amp;gt;] ? llog_cat_reverse_process_cb+0x17e/0x260 [obdclass]
[ 4344.096127]  [&amp;lt;ffffffffa044b568&amp;gt;] ? llog_reverse_process+0x5d8/0x9c0 [obdclass]
[ 4344.096142]  [&amp;lt;ffffffffa044e010&amp;gt;] ? llog_cat_reverse_process_cb+0x0/0x260 [obdclass]
[ 4344.096157]  [&amp;lt;ffffffffa044da30&amp;gt;] ? cat_cancel_cb+0x0/0x5e0 [obdclass]
[ 4344.096171]  [&amp;lt;ffffffffa044cdd8&amp;gt;] ? llog_cat_reverse_process+0x78/0x260 [obdclass]
[ 4344.096176]  [&amp;lt;ffffffffa05a9c30&amp;gt;] ? changelog_user_init_cb+0x0/0x170 [mdd]
[ 4344.096191]  [&amp;lt;ffffffffa044ca54&amp;gt;] ? llog_process+0x14/0x20 [obdclass]
[ 4344.096196]  [&amp;lt;ffffffffa05af69a&amp;gt;] ? mdd_prepare+0xe2a/0x1140 [mdd]
[ 4344.096205]  [&amp;lt;ffffffffa0c147da&amp;gt;] ? mdt_prepare+0x5a/0x14a0 [mdt]
[ 4344.096223]  [&amp;lt;ffffffffa04a2ade&amp;gt;] ? server_start_targets+0x147e/0x1d90 [obdclass]
[ 4344.096241]  [&amp;lt;ffffffffa048e050&amp;gt;] ? class_config_llog_handler+0x0/0x1800 [obdclass]
[ 4344.096258]  [&amp;lt;ffffffffa04a4798&amp;gt;] ? lustre_fill_super+0x13a8/0x1af0 [obdclass]
[ 4344.096260]  [&amp;lt;ffffffff8117d060&amp;gt;] ? set_anon_super+0x0/0x110
[ 4344.096276]  [&amp;lt;ffffffffa04a33f0&amp;gt;] ? lustre_fill_super+0x0/0x1af0 [obdclass]
[ 4344.096278]  [&amp;lt;ffffffff8117e4cf&amp;gt;] ? get_sb_nodev+0x5f/0xa0
[ 4344.096295]  [&amp;lt;ffffffffa048f955&amp;gt;] ? lustre_get_sb+0x25/0x30 [obdclass]
[ 4344.096297]  [&amp;lt;ffffffff8117e12b&amp;gt;] ? vfs_kern_mount+0x7b/0x1b0
[ 4344.096299]  [&amp;lt;ffffffff8117e2d2&amp;gt;] ? do_kern_mount+0x52/0x130
[ 4344.096301]  [&amp;lt;ffffffff8119c992&amp;gt;] ? do_mount+0x2d2/0x8c0
[ 4344.096303]  [&amp;lt;ffffffff8119d010&amp;gt;] ? sys_mount+0x90/0xe0
[ 4344.096306]  [&amp;lt;ffffffff8100b0f2&amp;gt;] ? system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>single node with 4 or 8 cores</environment>
        <key id="16270">LU-2102</key>
            <summary>changelog_user_init_cb())  ASSERTION( rec-&gt;cur_hdr.lrh_type == CHANGELOG_USER_REC ) </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                    </labels>
                <created>Sun, 7 Oct 2012 13:35:30 +0000</created>
                <updated>Fri, 19 Apr 2013 16:22:32 +0000</updated>
                            <resolved>Fri, 19 Apr 2013 16:22:32 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="46135" author="tappro" created="Mon, 8 Oct 2012 06:49:43 +0000"  >&lt;p&gt;first though was this is padding record but such records are not setting bits in llog header and not processed as result. So I am going to replace ASSERT with ASSERTF() and put more debug info there at first&lt;/p&gt;</comment>
                            <comment id="46185" author="green" created="Mon, 8 Oct 2012 13:11:29 +0000"  >&lt;p&gt;Ok, just had another crash that Mike advises is related to this one:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;[21857.010264] Lustre: DEBUG MARKER: == replay-single test 52: time out lock replay (3764) == 09:25:53 (1349702753)
[21867.957058] LDISKFS-fs (loop0): recovery complete
[21867.958976] LDISKFS-fs (loop0): mounted filesystem with ordered data mode. quota=on. Opts: 
[21867.985231] LustreError: 29518:0:(client.c:1125:ptlrpc_import_delay_req()) @@@ IMP_INVALID  req@ffff88005ce79bf0 x1415264023371217/t0(0) o101-&amp;gt;MGC192.168.10.210@tcp@0@lo:26/25 lens 328/384 e 0 to 0 dl 0 ref 2 fl Rpc:/0/ffffffff rc 0/-1
[21867.986762] LustreError: 29518:0:(client.c:1125:ptlrpc_import_delay_req()) Skipped 5 previous similar messages
[21867.993177] LustreError: 18458:0:(llog_osd.c:579:llog_osd_next_block()) ASSERTION( last_rec-&amp;gt;lrh_index == tail-&amp;gt;lrt_index ) failed: 
[21867.994035] LustreError: 18458:0:(llog_osd.c:579:llog_osd_next_block()) LBUG
[21867.994525] Pid: 18458, comm: ll_mgs_0001
[21867.994842] 
[21867.994843] Call Trace:
[21867.995536]  [&amp;lt;ffffffffa0c6d915&amp;gt;] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[21867.996108]  [&amp;lt;ffffffffa0c6df27&amp;gt;] lbug_with_loc+0x47/0xb0 [libcfs]
[21867.996653]  [&amp;lt;ffffffffa0507ac5&amp;gt;] llog_osd_next_block+0xa55/0xa60 [obdclass]
[21867.996708]  [&amp;lt;ffffffffa066bdf6&amp;gt;] ? lustre_pack_reply_flags+0xb6/0x210 [ptlrpc]
[21867.996754]  [&amp;lt;ffffffffa068845c&amp;gt;] llog_origin_handle_next_block+0x55c/0x780 [ptlrpc]
[21867.996766]  [&amp;lt;ffffffffa0a6df73&amp;gt;] mgs_handle+0xb13/0x11e0 [mgs]
[21867.996782]  [&amp;lt;ffffffffa0c7e6d1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[21867.996826]  [&amp;lt;ffffffffa067a883&amp;gt;] ptlrpc_server_handle_request+0x463/0xe70 [ptlrpc]
[21867.996840]  [&amp;lt;ffffffffa0c6e66e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
[21867.996882]  [&amp;lt;ffffffffa0673571&amp;gt;] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc]
[21867.996887]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[21867.996932]  [&amp;lt;ffffffffa067d41a&amp;gt;] ptlrpc_main+0xb9a/0x1960 [ptlrpc]
[21867.996965]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.996968]  [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
[21867.997000]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.997031]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.997034]  [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
[21867.997035] 
[21867.998239] Kernel panic - not syncing: LBUG
[21867.998242] Pid: 18458, comm: ll_mgs_0001 Not tainted 2.6.32-debug #6
[21867.998244] Call Trace:
[21867.998252]  [&amp;lt;ffffffff814f75e4&amp;gt;] ? panic+0xa0/0x168
[21867.998267]  [&amp;lt;ffffffffa0c6df7b&amp;gt;] ? lbug_with_loc+0x9b/0xb0 [libcfs]
[21867.998305]  [&amp;lt;ffffffffa0507ac5&amp;gt;] ? llog_osd_next_block+0xa55/0xa60 [obdclass]
[21867.998349]  [&amp;lt;ffffffffa066bdf6&amp;gt;] ? lustre_pack_reply_flags+0xb6/0x210 [ptlrpc]
[21867.998393]  [&amp;lt;ffffffffa068845c&amp;gt;] ? llog_origin_handle_next_block+0x55c/0x780 [ptlrpc]
[21867.998404]  [&amp;lt;ffffffffa0a6df73&amp;gt;] ? mgs_handle+0xb13/0x11e0 [mgs]
[21867.998420]  [&amp;lt;ffffffffa0c7e6d1&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
[21867.998467]  [&amp;lt;ffffffffa067a883&amp;gt;] ? ptlrpc_server_handle_request+0x463/0xe70 [ptlrpc]
[21867.998478]  [&amp;lt;ffffffffa0c6e66e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
[21867.998511]  [&amp;lt;ffffffffa0673571&amp;gt;] ? ptlrpc_wait_event+0xb1/0x2a0 [ptlrpc]
[21867.998514]  [&amp;lt;ffffffff81051f73&amp;gt;] ? __wake_up+0x53/0x70
[21867.998546]  [&amp;lt;ffffffffa067d41a&amp;gt;] ? ptlrpc_main+0xb9a/0x1960 [ptlrpc]
[21867.998577]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.998580]  [&amp;lt;ffffffff8100c14a&amp;gt;] ? child_rip+0xa/0x20
[21867.998611]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.998642]  [&amp;lt;ffffffffa067c880&amp;gt;] ? ptlrpc_main+0x0/0x1960 [ptlrpc]
[21867.998645]  [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="46215" author="green" created="Mon, 8 Oct 2012 18:02:30 +0000"  >&lt;p&gt;Ok, I think I have a solid reproducer for the original issue.&lt;/p&gt;

&lt;p&gt;It seems it has to do with a full OST.&lt;br/&gt;
Some test (Still tracking) seems to not remove a big file after itself on error which leads to a bunch of tests to fail on the way (from around test 65) to the final test 900 remount where it finally dies.&lt;/p&gt;</comment>
                            <comment id="46216" author="green" created="Mon, 8 Oct 2012 18:03:42 +0000"  >&lt;p&gt;Ok, here&apos;s even easier case for you.&lt;br/&gt;
unbzip this file and try to mount it as lustre (it&apos;s mds fs image), you&apos;ll get the assertion failure right away.&lt;/p&gt;</comment>
                            <comment id="46219" author="green" created="Mon, 8 Oct 2012 18:38:37 +0000"  >&lt;p&gt;I just confirmed that when this bad MDT image was produced, OST0 had 0 space:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;/tmp/lustre-ost1      184M  184M     0 100% /mnt/ost1
/tmp/lustre-ost2      184M   55M  119M  32% /mnt/ost2
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;This is mounted as ldiskfs.&lt;/p&gt;

&lt;p&gt;Apparently there&apos;s some bed interaction between test 64 and test 65j or something that I file a separate bug for, but importane of this info here is that full ost seems to cause MDS to enter this state.&lt;/p&gt;</comment>
                            <comment id="46239" author="tappro" created="Tue, 9 Oct 2012 02:43:46 +0000"  >&lt;p&gt;Fix for changelog issue:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/4229&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4229&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="46305" author="tappro" created="Wed, 10 Oct 2012 02:10:39 +0000"  >&lt;p&gt;merged&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11957" name="lustre-mdt1-bad.bz2" size="2724146" author="green" created="Mon, 8 Oct 2012 18:03:42 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv5af:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4390</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>