<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:27:11 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16457] sanity-pcc test_101a: Error: &apos;could not map uid 500 to root in namespace&apos;
</title>
                <link>https://jira.whamcloud.com/browse/LU-16457</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Minh Diep &amp;lt;mdiep@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/b93b43b5-a8f2-4ae5-895a-e04966fbb5dd&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/b93b43b5-a8f2-4ae5-895a-e04966fbb5dd&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_101a failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;could not map uid 500 to root in namespace
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;== sanity-pcc test 101a: Test auto attach in mount namespace (simulated container) ========================================================== 17:56:00 (1672854960)&lt;br/&gt;
CMD: trevis-48vm4 cat /proc/sys/user/max_user_namespaces&lt;br/&gt;
CMD: trevis-48vm4 echo 10 &amp;gt; /proc/sys/user/max_user_namespaces&lt;br/&gt;
creating user namespace for 500&lt;br/&gt;
CMD: trevis-48vm4 runas -u 500 -g 500 unshare -Um sleep 600&lt;br/&gt;
trevis-48vm4: running as uid/gid/euid/egid 500/500/500/500, groups:&lt;br/&gt;
trevis-48vm4:  &lt;span class=&quot;error&quot;&gt;&amp;#91;unshare&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;-Um&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;sleep&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;600&amp;#93;&lt;/span&gt;&lt;br/&gt;
CMD: trevis-48vm4 pgrep sleep&lt;br/&gt;
pdsh@trevis-48vm3: trevis-48vm4: ssh exited with exit code 1&lt;br/&gt;
Created NS: child (sleep) pid &lt;br/&gt;
CMD: trevis-48vm4 runas -u 500 -g 500 newuidmap 0 500 1&lt;br/&gt;
trevis-48vm4: running as uid/gid/euid/egid 500/500/500/500, groups:&lt;br/&gt;
trevis-48vm4:  &lt;span class=&quot;error&quot;&gt;&amp;#91;newuidmap&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;0&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;500&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;1&amp;#93;&lt;/span&gt;&lt;br/&gt;
trevis-48vm4: newuidmap: Could not open proc directory for target 0&lt;br/&gt;
pdsh@trevis-48vm3: trevis-48vm4: ssh exited with exit code 1&lt;br/&gt;
 sanity-pcc test_101a: @@@@@@ FAIL: could not map uid 500 to root in namespace &lt;br/&gt;
  Trace dump:&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:6406:error()&lt;br/&gt;
  = /usr/lib64/lustre/tests/sanity-pcc.sh:1523:test_101a()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:6723:run_one()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:6770:run_one_logged()&lt;br/&gt;
  = /usr/lib64/lustre/tests/test-framework.sh:6611:run_test()&lt;br/&gt;
  = /usr/lib64/lustre/tests/sanity-pcc.sh:1611:main()&lt;br/&gt;
Dumping lctl log to /autotest/autotest-1/2023-01-04/lustre-b2_15_full-part-2_47_38_91d6a73b-aa04-4158-9b08-8e31c70d3c23//sanity-pcc.test_101a.*.1672854968.log&lt;br/&gt;
CMD: trevis-48vm3.trevis.whamcloud.com,trevis-48vm4,trevis-48vm5,trevis-48vm6 /usr/sbin/lctl dk &amp;gt; /autotest/autotest-1/2023-01-04/lustre-b2_15_full-part-2_47_38_91d6a73b-aa04-4158-9b08-8e31c70d3c23//sanity-pcc.test_101a.debug_log.\$(hostname -s).1672854968.log;&lt;br/&gt;
		dmesg &amp;gt; /autotest/autotest-1/2023-01-04/lustre-b2_15_full-part-2_47_38_91d6a73b-aa04-4158-9b08-8e31c70d3c23//sanity-pcc.test_101a.dmesg.\$(hostname -s).1672854968.log&lt;br/&gt;
CMD: trevis-48vm4 kill -9&lt;br/&gt;
trevis-48vm4: kill: usage: kill &lt;span class=&quot;error&quot;&gt;&amp;#91;-s sigspec | -n signum | -sigspec&amp;#93;&lt;/span&gt; pid | jobspec ... or kill -l &lt;span class=&quot;error&quot;&gt;&amp;#91;sigspec&amp;#93;&lt;/span&gt;&lt;br/&gt;
pdsh@trevis-48vm3: trevis-48vm4: ssh exited with exit code 2&lt;/p&gt;

&lt;p&gt;Notice same test passed on 2.15.2.RC1 failed in RC2&lt;/p&gt;





&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
sanity-pcc test_101a - could not map uid 500 to root in namespace&lt;/p&gt;</description>
                <environment></environment>
        <key id="73905">LU-16457</key>
            <summary>sanity-pcc test_101a: Error: &apos;could not map uid 500 to root in namespace&apos;
</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="sebastien">Sebastien Buisson</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 9 Jan 2023 22:12:28 +0000</created>
                <updated>Wed, 8 Feb 2023 05:47:26 +0000</updated>
                            <resolved>Fri, 3 Feb 2023 14:07:25 +0000</resolved>
                                    <version>Lustre 2.15.1</version>
                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="358349" author="adilger" created="Tue, 10 Jan 2023 00:53:55 +0000"  >&lt;p&gt;Looks like this may be a continuation of DCO-9004?&lt;/p&gt;</comment>
                            <comment id="358351" author="adilger" created="Tue, 10 Jan 2023 01:09:04 +0000"  >&lt;p&gt;It looks like the original error is &quot;&lt;tt&gt;newuidmap: Could not open proc directory for target 0&lt;/tt&gt;&quot; but I don&apos;t know much about what this test is doing. &lt;/p&gt;

&lt;p&gt;There was one failure on 2022-12-07 with &quot;&lt;tt&gt;execvp fails running newuidmap (2): No such file or directory&lt;/tt&gt;&quot; from DCO-9004, and then with the &quot;&lt;tt&gt;target 0&lt;/tt&gt;&quot; error on 2022-12-19 and 2022-12-21 on master (2/374 runs) and the one reported here on b2_15 (1/56 runs).&lt;/p&gt;</comment>
                            <comment id="358352" author="adilger" created="Tue, 10 Jan 2023 01:11:44 +0000"  >&lt;p&gt;Sebastien, could you please provide some analysis of what this failure means, how serious the impact of this failure is, and the likelihood of hitting it in production?  Is it a testing environment issue, a race in the code during configuration, during runtime, and if it breaks security or just an inconvenience?&lt;/p&gt;</comment>
                            <comment id="358394" author="sebastien" created="Tue, 10 Jan 2023 10:49:37 +0000"  >&lt;p&gt;&lt;tt&gt;newuidmap&lt;/tt&gt; is a system command not related to Lustre. I do not know much about what sanity-pcc test_101a is trying to do, but as far as I can see it starts by creating a user+mount namespace on the agent node. Then it maps user &lt;tt&gt;$RUNAS_ID&lt;/tt&gt; to root inside the namespace, via the &lt;tt&gt;newuidmap&lt;/tt&gt; command. This is where it fails in the various cases reported above, and it has not even started using PCC or Lustre.&lt;/p&gt;

&lt;p&gt;I checked recent test results, every time sanity-pcc test_101a fails with such an error, this is because the PID of the &lt;tt&gt;sleep&lt;/tt&gt; process launched inside the namespace cannot be found. This can be seen with the message:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Created NS: child (sleep) pid 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;which shows an empty &lt;tt&gt;$PID&lt;/tt&gt; variable. As a consequence, the subsequent &lt;tt&gt;newuidmap&lt;/tt&gt; call is incorrect, as it misses its first argument, the PID:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;trevis-48vm4: [newuidmap] [0] [500] [1]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;(this command requires at least 4 args).&lt;/p&gt;

&lt;p&gt;My advice would be to use a longer sleep in the namespace, and retry to create the namespace if the the PID of &lt;tt&gt;sleep&lt;/tt&gt; cannot be found.&lt;/p&gt;</comment>
                            <comment id="358428" author="adilger" created="Tue, 10 Jan 2023 15:49:04 +0000"  >&lt;p&gt;Another oddity i just noticed is that the failure cases all take just over 600s, which is the duration of the remote sleep command, while a pass takes about 30-40s (one pass took 130s, but none took longer). This makes me wonder if the problem is in the remote ssh to the agent node and not the &quot;&lt;tt&gt;sleep 2&lt;/tt&gt;&quot; that is waiting for it?&lt;/p&gt;</comment>
                            <comment id="358431" author="gerrit" created="Tue, 10 Jan 2023 15:57:44 +0000"  >&lt;p&gt;&quot;Andreas Dilger &amp;lt;adilger@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/49587&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/49587&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16457&quot; title=&quot;sanity-pcc test_101a: Error: &amp;#39;could not map uid 500 to root in namespace&amp;#39;
&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16457&quot;&gt;&lt;del&gt;LU-16457&lt;/del&gt;&lt;/a&gt; tests: wait for remote sleep in sanity-pcc/101a&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9927aad023e4bf7447823c34cc344090078af82b&lt;/p&gt;</comment>
                            <comment id="361483" author="gerrit" created="Fri, 3 Feb 2023 06:51:31 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/49587/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/49587/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16457&quot; title=&quot;sanity-pcc test_101a: Error: &amp;#39;could not map uid 500 to root in namespace&amp;#39;
&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16457&quot;&gt;&lt;del&gt;LU-16457&lt;/del&gt;&lt;/a&gt; tests: wait for remote sleep in sanity-pcc/101a&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 4b47c233b308dcfefe77a6a493c01d3b4fc59bbe&lt;/p&gt;</comment>
                            <comment id="361521" author="pjones" created="Fri, 3 Feb 2023 14:07:25 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i039fz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>