<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:25:21 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16251] Fill jobid in an atomic way</title>
                <link>https://jira.whamcloud.com/browse/LU-16251</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;As per DDN-3114:&lt;/p&gt;

&lt;p&gt;jobid_interpret_string() is not an atomic operation. It is possible that two processes hit the same file+offset (so the same request) but with different jobid schemes. So that jobid_interpret_string() is re-entered and generate a &quot;combined&quot; jobid.&lt;/p&gt;</description>
                <environment></environment>
        <key id="72855">LU-16251</key>
            <summary>Fill jobid in an atomic way</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="flei">Feng Lei </assignee>
                                    <reporter username="flei">Feng Lei </reporter>
                        <labels>
                    </labels>
                <created>Wed, 19 Oct 2022 03:05:28 +0000</created>
                <updated>Thu, 5 Jan 2023 00:32:44 +0000</updated>
                            <resolved>Tue, 8 Nov 2022 13:58:23 +0000</resolved>
                                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="350090" author="gerrit" created="Wed, 19 Oct 2022 04:18:13 +0000"  >&lt;p&gt;&quot;Feng Lei &amp;lt;flei@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/48915&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/48915&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16251&quot; title=&quot;Fill jobid in an atomic way&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16251&quot;&gt;&lt;del&gt;LU-16251&lt;/del&gt;&lt;/a&gt; obdclass: fill jobid in an atomic way&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: d4c2887f4b0c170b918e65fff72ae4e76bd82d4a&lt;/p&gt;</comment>
                            <comment id="350920" author="flei" created="Thu, 27 Oct 2022 01:34:57 +0000"  >&lt;p&gt;The bug is reproduced after inserting a msleep(1) in jobid_interpret_string(). Here is the patch to reproduce the bug:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c
index 1b4bdd3168..b9b33044f8 100644
--- a/lustre/obdclass/jobid.c
+++ b/lustre/obdclass/jobid.c
@@ -32,6 +32,7 @@
&#160; */

&#160;#define DEBUG_SUBSYSTEM S_RPC
+#include &amp;lt;linux/delay.h&amp;gt;
&#160;#include &amp;lt;linux/user_namespace.h&amp;gt;
&#160;#include &amp;lt;linux/uidgid.h&amp;gt;
&#160;#include &amp;lt;linux/utsname.h&amp;gt;
@@ -696,6 +697,7 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid,
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; l = snprintf(jobid, joblen, &quot;%u&quot;, current-&amp;gt;pid);
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; break;
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; case &apos;u&apos;: /* user ID */
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; msleep(1);
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; l = snprintf(jobid, joblen, &quot;%u&quot;,
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;from_kuid(&amp;amp;init_user_ns, current_fsuid()));
&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; break;
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index 437d86aa96..bf47268373 100755
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -28908,6 +28908,27 @@ test_906() {
&#160;}
&#160;run_test 906 &quot;Simple test for io_uring I/O engine via fio&quot;

+
+test_1001() {
+ &#160; &#160; &#160; local file=$DIR/$tdir/$tfile
+
+ &#160; &#160; &#160; test_mkdir $DIR/$tdir
+
+ &#160; &#160; &#160; for i in \{1..2}; do
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; echo &quot;start worker $i...&quot;
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; dd if=/dev/zero of=$file bs=1M count=100 &amp;amp;
+ &#160; &#160; &#160; done
+
+ &#160; &#160; &#160; echo &quot;wait for all workers done...&quot;
+ &#160; &#160; &#160; wait
+
+ &#160; &#160; &#160; $LCTL get_param *.*.job_stats | grep job_id | grep &apos;dd\.$&apos;
+ &#160; &#160; &#160; if (( $? == 0)); then
+ &#160; &#160; &#160; &#160; &#160; &#160; &#160; error &quot;found corrupted jobid&quot;
+ &#160; &#160; &#160; fi
+}
+run_test 1001 &quot;test corrupted jobid&quot;
+
&#160;complete $SECONDS
&#160;[ -f $EXT2_DEV ] &amp;amp;&amp;amp; rm $EXT2_DEV || true
&#160;check_and_cleanup_lustre
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The test result:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-bash&quot;&gt;
== sanity test 1001: test corrupted jobid ================ 09:20:20 (1666833620)
start worker 1...
start worker 2...
wait &lt;span class=&quot;code-object&quot;&gt;for&lt;/span&gt; all workers &lt;span class=&quot;code-object&quot;&gt;done&lt;/span&gt;...
100+0 records &lt;span class=&quot;code-object&quot;&gt;in&lt;/span&gt;
100+0 records out
104857600 bytes (105 MB, 100 MiB) copied, 1.63228 s, 64.2 MB/s
100+0 records &lt;span class=&quot;code-object&quot;&gt;in&lt;/span&gt;
100+0 records out
104857600 bytes (105 MB, 100 MiB) copied, 1.65471 s, 63.4 MB/s
- job_id: &#160; &#160; &#160; &#160; &#160;dd.
&#160;sanity test_1001: @@@@@@ FAIL: found corrupted jobid&#160;
&#160; Trace dump:
&#160; = ./../tests/test-framework.sh:6524:error()
&#160; = sanity.sh:29016:test_1001()
&#160; = ./../tests/test-framework.sh:6860:run_one()
&#160; = ./../tests/test-framework.sh:6910:run_one_logged()
&#160; = ./../tests/test-framework.sh:6732:run_test()
&#160; = sanity.sh:29019:main()
Dumping lctl log to /tmp/test_logs/1666833512/sanity.test_1001.*.1666833622.log
FAIL 1001 (3s)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;In this case we generate a corrupted jobid &quot;dd.&quot; without the uid because there is a delay before appending uid to jobid. It should also be possible to generate other kinds of corruption easily if we insert the delay at other points and trigger it with other kinds of setting.&lt;/p&gt;</comment>
                            <comment id="352107" author="gerrit" created="Tue, 8 Nov 2022 08:53:13 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/48915/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/48915/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16251&quot; title=&quot;Fill jobid in an atomic way&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16251&quot;&gt;&lt;del&gt;LU-16251&lt;/del&gt;&lt;/a&gt; obdclass: fill jobid in a safe way&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9a0a89520e8b57bd63a9343fe3cdc56c61c41f6d&lt;/p&gt;</comment>
                            <comment id="352147" author="pjones" created="Tue, 8 Nov 2022 13:58:23 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                            <comment id="352517" author="james beal" created="Thu, 10 Nov 2022 10:29:41 +0000"  >&lt;p&gt;I have just had a quick look at this patch given I am talking around this in a few weeks time.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;Given:&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&#160;
- job_id:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; kworker/u518:6eamtrynka
&#160; snapshot_time:&#160;&#160; 1668009798
&#160; read_bytes:&#160;&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 1, unit: bytes, min:&#160; 663552, max:&#160; 663552, sum:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 663552 }
&#160; write_bytes:&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 0, unit: bytes, min:&#160;&#160;&#160;&#160;&#160;&#160; 0, max:&#160;&#160;&#160;&#160;&#160;&#160; 0, sum:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 0 }
&#160;
- job_id:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; kworker/u130:0
&#160; snapshot_time:&#160;&#160; 1668009796
&#160; read_bytes:&#160;&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160; 21459, unit: bytes, min:&#160;&#160;&#160; 4096, max: 2297856, sum:&#160;&#160;&#160;&#160;&#160;&#160; 160104448 }
&#160; write_bytes:&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 0, unit: bytes, min:&#160;&#160;&#160;&#160;&#160;&#160; 0, max:&#160;&#160;&#160;&#160;&#160;&#160; 0, sum:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 0 }
&#160;
- job_id:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;thread-pool-14113912
&#160; snapshot_time:&#160;&#160; 1668009775
&#160; read_bytes:&#160;&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 2, unit: bytes, min:&#160;&#160; 36864, max:&#160; 983040, sum:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 1019904 }
&#160; write_bytes:&#160;&#160;&#160;&#160; { samples:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; 0, unit: bytes, min:&#160;&#160;&#160;&#160;&#160;&#160; 0, max:&#160;&#160;&#160;&#160;&#160;&#160; 0, sum:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160; &#160;&#160;&#160;&#160;&#160;&#160;0 }
&#160;
[root@lus23-oss1 ~]# lctl get_param obdfilter.lus23-OST0000.job_stats | grep job_id | grep thread-pool | wc -l
126
[root@lus23-oss1 ~]# lctl get_param obdfilter.lus23-OST0000.job_stats | grep job_id | grep kworker | wc -l
155
[root@lus23-oss1 ~]# lctl get_param obdfilter.lus23-OST0000.job_stats | grep job_id | wc -l
330
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;And &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/48915/4/lustre/obdclass/jobid.c#478&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/48915/4/lustre/obdclass/jobid.c#478&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;should thread-pool be on the internal list ( I don&apos;t know the answer its an honest question ).&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;( and for user communtity, Is this a is aserver side fix or a client fix ).&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="72719">LU-16228</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0338f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>