<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:12:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-990]  (lov_request.c:690:lov_update_create_set()) error creating fid xxx rc=-107</title>
                <link>https://jira.whamcloud.com/browse/LU-990</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;The customer saw the following error messages on MDS during 128 clients were creating the many small files. (mostly 1 milion files), then they also saw Input/output error to the some files, when they opened these files.&lt;/p&gt;

&lt;p&gt;(lov_request.c:690:lov_update_create_set()) error creating fid xxx rc=-107&lt;/p&gt;

&lt;p&gt;As far as we see the log files on OSSs, at the same time, there are many slow IO messages at the same time.&lt;/p&gt;

&lt;p&gt;-107 = 	-ENOTCONN, is this happened connection loss between server and client? (MDS to OSS as well?)&lt;/p&gt;

&lt;p&gt;Please advise.&lt;/p&gt;</description>
                <environment>lustre-1.8.3</environment>
        <key id="12888">LU-990</key>
            <summary> (lov_request.c:690:lov_update_create_set()) error creating fid xxx rc=-107</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                    </labels>
                <created>Fri, 13 Jan 2012 09:37:05 +0000</created>
                <updated>Thu, 6 Feb 2014 19:06:06 +0000</updated>
                            <resolved>Thu, 6 Feb 2014 19:06:06 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="26481" author="pjones" created="Fri, 13 Jan 2012 09:38:46 +0000"  >&lt;p&gt;Bobijam&lt;/p&gt;

&lt;p&gt;Could you please look into this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="26489" author="bobijam" created="Fri, 13 Jan 2012 10:44:32 +0000"  >&lt;p&gt;I saw several different kind of error messages in the logs:&lt;/p&gt;

&lt;p&gt;in OS1, there are many network error messages.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Dec 21 00:45:02 os1 kernel: LustreError: 11686:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.23@o2ib: -101
Dec 21 00:45:02 os1 kernel: LustreError: 11683:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.28@o2ib: -101
Dec 21 00:46:44 os1 kernel: LustreError: 11639:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.26@o2ib: -101
Dec 21 00:46:45 os1 kernel: LustreError: 11608:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.25@o2ib: -101
Dec 21 00:48:52 os1 kernel: LustreError: 11876:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.22@o2ib: -101
Dec 21 00:48:56 os1 kernel: LustreError: 11855:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.24@o2ib: -101
Dec 21 02:44:43 os1 kernel: LustreError: 11909:0:(o2iblnd_cb.c:1232:kiblnd_connect_peer()) Can&apos;t resolve addr for 172.20.1.1@o2ib: -101
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;#define	ENETUNREACH	101	/* Network is unreachable */&lt;/p&gt;

&lt;p&gt;on OS2&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Dec 22 00:54:22 os2 kernel: LustreError: 11568:0:(lib-move.c:2441:LNetPut()) Error sending PUT to 12345-172.20.1.61@o2ib: -113
Dec 22 00:54:22 os2 kernel: LustreError: 8091:0:(lib-move.c:2441:LNetPut()) Error sending PUT to 12345-172.20.1.59@o2ib: -113
Dec 22 16:19:55 os2 kernel: LustreError: 11667:0:(lib-move.c:2441:LNetPut()) Error sending PUT to 12345-172.20.1.56@o2ib: -113
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;#define	EHOSTUNREACH	113	/* No route to host */&lt;/p&gt;

&lt;p&gt;Are there network problems in the site?&lt;/p&gt;</comment>
                            <comment id="26495" author="ihara" created="Fri, 13 Jan 2012 11:19:06 +0000"  >&lt;p&gt;thanks. we asked about the network issue, but they said there are no error messages on the infiniband side.&lt;br/&gt;
But let me ask again. they did run an large job which uses a lot of CPU resources. client might be delay response or took a long time for anything.&lt;br/&gt;
even there were network issues, we can&apos;t see evicted messages. So, the client were not killed completely. &lt;/p&gt;

&lt;p&gt;any advise to prevent the this issue when if they run the large job again?&lt;/p&gt;</comment>
                            <comment id="26566" author="bobijam" created="Fri, 13 Jan 2012 21:06:39 +0000"  >&lt;p&gt;w/o debug log I don&apos;t know what exact problem there is while I do see many heavy IO load warnning messages in the syslogs.&lt;/p&gt;

&lt;p&gt;Please checkout &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-952&quot; title=&quot;Hung thread with HIGH OSS load&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-952&quot;&gt;&lt;del&gt;LU-952&lt;/del&gt;&lt;/a&gt;, there are discussion and a patch for high load issue. Disabling read only cache and write through cache will help this issue, and if not, please collect lustre debug log and upload.&lt;/p&gt;</comment>
                            <comment id="76373" author="ihara" created="Thu, 6 Feb 2014 19:02:35 +0000"  >&lt;p&gt;We haven&apos;t seen same issue again very much. Please close this issue.&lt;/p&gt;</comment>
                            <comment id="76374" author="pjones" created="Thu, 6 Feb 2014 19:06:06 +0000"  >&lt;p&gt;ok - thanks Ihara!&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvja7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6838</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>