<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:15:18 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8177] osp-syn threads in D state</title>
                <link>https://jira.whamcloud.com/browse/LU-8177</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;When trying to mount mdt all osp-syn threads stuck in &apos;D&apos; state. &lt;/p&gt;

&lt;p&gt;Debug logs are filled with these messages &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;00000004:00080000:8.0:1463850740.016156:0:14081:0:(osp_sync.c:317:osp_sync_request_commit_cb()) commit req ffff883ebf799800, transno 0
00000004:00080000:8.0:1463850740.016164:0:14081:0:(osp_sync.c:351:osp_sync_interpret()) reply req ffff883ebf799800/1, rc -2, transno 0
00000100:00100000:8.0:1463850740.016176:0:14081:0:(client.c:1872:ptlrpc_check_set()) Completed RPC pname:cluuid:pid:xid:nid:opc ptlrpcd_3:nbp2-MDT0000-mdtlov_UUID:14081:1534957896521600:10.151.26.98@o2ib:6
00000004:00080000:9.0:1463850740.016219:0:14087:0:(osp_sync.c:317:osp_sync_request_commit_cb()) commit req ffff883ebed48800, transno 0
00000004:00080000:9.0:1463850740.016226:0:14087:0:(osp_sync.c:351:osp_sync_interpret()) reply req ffff883ebed48800/1, rc -2, transno 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt; 

&lt;p&gt;I will upload full debug logs to ftp site.&lt;/p&gt;</description>
                <environment></environment>
        <key id="37117">LU-8177</key>
            <summary>osp-syn threads in D state</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="4">Incomplete</resolution>
                                        <assignee username="jfc">John Fuchs-Chesney</assignee>
                                    <reporter username="mhanafi">Mahmoud Hanafi</reporter>
                        <labels>
                    </labels>
                <created>Sat, 21 May 2016 17:28:28 +0000</created>
                <updated>Fri, 17 Jun 2016 23:15:17 +0000</updated>
                            <resolved>Fri, 17 Jun 2016 23:15:17 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>9</watches>
                                                                            <comments>
                            <comment id="153123" author="mhanafi" created="Sat, 21 May 2016 17:29:51 +0000"  >&lt;p&gt;Uploaded file to ftp:/uploads/LU8177/s600.debug.out.gz&lt;/p&gt;</comment>
                            <comment id="153124" author="jfc" created="Sat, 21 May 2016 18:05:47 +0000"  >&lt;p&gt;Mahmoud,&lt;/p&gt;

&lt;p&gt;Can you please clarify if you have a production site down emergency please? We rate that as a SEV-1 event, and you have selected SEV-4.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="153125" author="mhanafi" created="Sat, 21 May 2016 18:07:59 +0000"  >&lt;p&gt;Sorry it should be level 1. &lt;/p&gt;</comment>
                            <comment id="153126" author="jfc" created="Sat, 21 May 2016 18:08:09 +0000"  >&lt;p&gt;Email from Mahmoud: &quot;Sorry this should be severity1. The production site is down and unusable.&quot;&lt;/p&gt;

&lt;p&gt;~ jfc.&lt;/p&gt;</comment>
                            <comment id="153127" author="jfc" created="Sat, 21 May 2016 18:15:15 +0000"  >&lt;p&gt;Assigning to me &amp;#8211; Oleg is looking.&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="153128" author="green" created="Sat, 21 May 2016 18:19:40 +0000"  >&lt;p&gt;Are there any messages in dmesg on mds or osts?&lt;br/&gt;
Is this a normal mount after a normal shutdown? a failover after something else?&lt;br/&gt;
are the OSTs up?&lt;/p&gt;</comment>
                            <comment id="153129" author="bzzz" created="Sat, 21 May 2016 18:41:23 +0000"  >&lt;p&gt;I think this can be a dup of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7079&quot; title=&quot;OSP shouldn&amp;#39;t discard requests due to imp_peer_committed_transno&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7079&quot;&gt;&lt;del&gt;LU-7079&lt;/del&gt;&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="153130" author="bzzz" created="Sat, 21 May 2016 18:42:15 +0000"  >&lt;p&gt;basically some llog cancels got lost by mistake causing lots of IO to rescan llogs at startup.&lt;/p&gt;</comment>
                            <comment id="153131" author="mhanafi" created="Sat, 21 May 2016 18:58:31 +0000"  >&lt;p&gt;This was a remount after power down. &lt;br/&gt;
The OST are mounted. &lt;br/&gt;
The shutdown was normal.&lt;/p&gt;

&lt;p&gt;I unmounted all the OSTs the the mdt got mounted. Then I remounted the OSTs and the mdt got back to osp-sync in &apos;D&apos; state. but at least i am able to mount it on the client.&lt;/p&gt;

&lt;p&gt;So do we need to apply the patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7079&quot; title=&quot;OSP shouldn&amp;#39;t discard requests due to imp_peer_committed_transno&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7079&quot;&gt;&lt;del&gt;LU-7079&lt;/del&gt;&lt;/a&gt; and remount? or can we some how stop the osp-sync.&lt;/p&gt;</comment>
                            <comment id="153132" author="green" created="Sat, 21 May 2016 18:59:40 +0000"  >&lt;p&gt;Alex advises that the condition will clear on it&apos;s own after all llogs are reproceessed. the duration of that is hard to tell as it depends on number of those llogs.&lt;/p&gt;</comment>
                            <comment id="153133" author="green" created="Sat, 21 May 2016 19:01:58 +0000"  >&lt;p&gt;if you really need to clear the condition immediately, it&apos;s possible to unmount the MDT, mount it as ldiskfs, remove the stale llogs, unmount ldiskfs and remount mdt as lustre.&lt;br/&gt;
Perhaps not all of them needs removing but just hte really old ones (you can tell by the date).&lt;/p&gt;</comment>
                            <comment id="153134" author="mhanafi" created="Sat, 21 May 2016 19:04:10 +0000"  >&lt;p&gt;I looked in /O/1/d* and there where files going back to 2015. &lt;/p&gt;

&lt;p&gt;should i just delete everything in /0/1/* and remount?&lt;/p&gt;
</comment>
                            <comment id="153135" author="green" created="Sat, 21 May 2016 19:09:00 +0000"  >&lt;p&gt;do you use changelogs too?&lt;/p&gt;</comment>
                            <comment id="153136" author="mhanafi" created="Sat, 21 May 2016 19:15:09 +0000"  >&lt;p&gt;no we don&apos;t&lt;/p&gt;</comment>
                            <comment id="153137" author="green" created="Sat, 21 May 2016 19:15:22 +0000"  >&lt;p&gt;Generally since I believe your system is now mountable, it&apos;s safer to just let the sync threads to run their course. it would put additional load on the system, but should not be too bad.&lt;/p&gt;

&lt;p&gt;Once you apply lu7079 patch it should kill those records for good next time you reboot.&lt;br/&gt;
Without the patch some of the records would still be killed, but not all of them (and more might be amassed until next reboot) so you are looking at a similar situation next time you remount anyway.&lt;/p&gt;</comment>
                            <comment id="153138" author="mhanafi" created="Sat, 21 May 2016 19:26:42 +0000"  >&lt;p&gt;Ok thanks. You may lower the priority of the case. It did finish. &lt;/p&gt;
</comment>
                            <comment id="153139" author="jfc" created="Sat, 21 May 2016 19:30:13 +0000"  >&lt;p&gt;Thank you for the update Mahmoud.&lt;/p&gt;

&lt;p&gt;I think we&apos;ll keep the priority as it is for the time being (for recording purposes).&lt;/p&gt;

&lt;p&gt;Do you want us to keep the ticket open for a while longer? Or do you think this event is now resolved?&lt;/p&gt;

&lt;p&gt;Best regards,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="153140" author="mhanafi" created="Sat, 21 May 2016 19:45:17 +0000"  >&lt;p&gt;Please leave the case open for now.&lt;/p&gt;</comment>
                            <comment id="153145" author="jaylan" created="Sun, 22 May 2016 00:05:14 +0000"  >&lt;p&gt;We have a b2_7_fe version of the patch, but need a back port to b2_5_fe. Thanks!&lt;/p&gt;</comment>
                            <comment id="153295" author="yujian" created="Tue, 24 May 2016 05:18:57 +0000"  >&lt;p&gt;Hello Jay,&lt;/p&gt;

&lt;p&gt;Here is the back-ported patch for Lustre b2_5_fe branch: &lt;a href=&quot;http://review.whamcloud.com/20392&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/20392&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="153585" author="jaylan" created="Wed, 25 May 2016 23:33:06 +0000"  >&lt;p&gt;Thanks!&lt;/p&gt;</comment>
                            <comment id="154646" author="jfc" created="Fri, 3 Jun 2016 23:38:00 +0000"  >&lt;p&gt;Hello Mahmoud,&lt;/p&gt;

&lt;p&gt;Do you want us to continue to keep this ticket open?&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                            <comment id="156104" author="jfc" created="Fri, 17 Jun 2016 23:15:17 +0000"  >&lt;p&gt;Resolving as incomplete.&lt;/p&gt;

&lt;p&gt;Please let us know if any further work is required on this ticket.&lt;/p&gt;

&lt;p&gt;Thanks,&lt;br/&gt;
~ jfc.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="31840">LU-7079</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10490" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>End date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Fri, 3 Jun 2016 17:28:28 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzycev:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10020"><![CDATA[1]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                        <customfield id="customfield_10493" key="com.atlassian.jira.plugin.system.customfieldtypes:datepicker">
                        <customfieldname>Start date</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Mon, 23 May 2016 17:28:28 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                    </customfields>
    </item>
</channel>
</rss>