<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:02:26 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-6695] Jobstats breaks when &quot;Too long env variable.&quot; errors occur</title>
                <link>https://jira.whamcloud.com/browse/LU-6695</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We have &lt;b&gt;&quot;Too long env variable&quot;&lt;/b&gt; errors on a Lustre cluster at Stanford leading to broken JobStats report (using SLURM_JOB_ID). Jobids associated with processes reporting these errors are just ignored:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LNetError: 15288:0:(linux-curproc.c:241:cfs_get_environ()) Too long env variable.
LNetError: 15288:0:(linux-curproc.c:241:cfs_get_environ()) Skipped 2097 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;In our case, user process environ size is a bit more than 32K.&lt;br/&gt;
It seems the problem comes from lustre_get_jobid() which uses the process environ variable to store some info when jobstats is enabled, but cfs_get_environ() is not able to handle large environ (which may be wise). However, we think an user shouldn&apos;t be able to disable jobstats like that. A change to cfs_get_environ() might not be enough. Please advice.&lt;/p&gt;

&lt;p&gt;Please find below the commands used to track the issue:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@gpu-13-1 ~]# ps uw -q 15288
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
suuser   15288 98.4  6.2 108826468 4144960 ?   Sl   13:55 235:46 terachem run.in

[root@gpu-13-1 ~]# cat /proc/15288/environ | wc -c
32936

[root@gpu-13-1 ~]# scontrol pidinfo 15288
Slurm job id 2376464 ends at Sun Jun 07 13:55:09 2015
slurm_get_rem_time is 159433

[root@gpu-13-1 ~]# squeue -j 2376464
             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           2376464      slac temp800_   suuser  R    3:43:25      1 gpu-13-1

[root@gpu-13-1 ~]# lsof -p 15288 | grep /scratch
terachem 15288 suuser    1w   REG 2395,496332    386348 144116383972642817 /scratch/users/suuser/FeC2_catalyst/temp800_noFeECP_nanoFeC2/chunk_0080/run.out
terachem 15288 suuser    2w   REG 2395,496332        43 144116383972642818 /scratch/users/suuser/FeC2_catalyst/temp800_noFeECP_nanoFeC2/chunk_0080/run.err

[root@gpu-13-1 ~]# ls -l /scratch/users/suuser/FeC2_catalyst/temp800_noFeECP_nanoFeC2/chunk_0080/run.out
-rw-r--r-- 1 suuser sugrp 386636 Jun  5 17:40 /scratch/users/suuser/FeC2_catalyst/temp800_noFeECP_nanoFeC2/chunk_0080/run.out
[root@gpu-13-1 ~]# date
Fri Jun  5 17:40:12 PDT 2015
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;fsname is regal mounted on /scratch.&lt;br/&gt;
No jobstats report seen from this job:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@rcf-mgnt ~]# clush -w regal-oss[00-07] lctl get_param obdfilter.*.job_stats \| grep 2376464
clush: regal-oss07: exited with exit code 1
clush: regal-oss06: exited with exit code 1
clush: regal-oss00: exited with exit code 1
clush: regal-oss01: exited with exit code 1
clush: regal-oss04: exited with exit code 1
clush: regal-oss03: exited with exit code 1
clush: regal-oss02: exited with exit code 1
clush: regal-oss05: exited with exit code 1

[root@regal-mds1 ~]# lctl get_param mdt.regal-MDT0000.job_stats | grep 2376464
[root@regal-mds1 ~]# 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="30534">LU-6695</key>
            <summary>Jobstats breaks when &quot;Too long env variable.&quot; errors occur</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="niu">Niu Yawei</assignee>
                                    <reporter username="sthiell">Stephane Thiell</reporter>
                        <labels>
                    </labels>
                <created>Sat, 6 Jun 2015 01:37:41 +0000</created>
                <updated>Mon, 31 Aug 2015 16:02:59 +0000</updated>
                            <resolved>Tue, 16 Jun 2015 13:30:17 +0000</resolved>
                                    <version>Lustre 2.5.3</version>
                                    <fixVersion>Lustre 2.8.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="117664" author="ezell" created="Sat, 6 Jun 2015 03:43:44 +0000"  >&lt;p&gt;We have also seen this during our recent testing of jobstats at ORNL.&lt;/p&gt;</comment>
                            <comment id="117702" author="niu" created="Mon, 8 Jun 2015 02:09:15 +0000"  >&lt;blockquote&gt;
&lt;p&gt;In our case, user process environ size is a bit more than 32K.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;Do you mean a single env variable is larger than 32k or the whole environ? cfs_get_environ() can&apos;t handle the variable which is larger than page size.&lt;/p&gt;</comment>
                            <comment id="117704" author="sthiell" created="Mon, 8 Jun 2015 03:17:03 +0000"  >&lt;p&gt;Hi Niu,&lt;br/&gt;
Oh, I meant the whole environ. I&apos;ve just checked and the two largest variables are PATH and LD_LIBRARY_PATH with 17559 and 6979 bytes, respectively, each one containing a large set of paths.&lt;/p&gt;</comment>
                            <comment id="117709" author="niu" created="Mon, 8 Jun 2015 06:00:03 +0000"  >&lt;p&gt;I see, we didn&apos;t expect such long env variables. Looks we&apos;d just skip these long variables in cfs_get_environ().&lt;/p&gt;</comment>
                            <comment id="117712" author="gerrit" created="Mon, 8 Jun 2015 06:41:30 +0000"  >&lt;p&gt;Niu Yawei (yawei.niu@intel.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/15177&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15177&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6695&quot; title=&quot;Jobstats breaks when &amp;quot;Too long env variable.&amp;quot; errors occur&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6695&quot;&gt;&lt;del&gt;LU-6695&lt;/del&gt;&lt;/a&gt; jobstats: skip too long env variables&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 6463d85889467ce564c9fdcf0a792562d2c1aae6&lt;/p&gt;</comment>
                            <comment id="118651" author="gerrit" created="Tue, 16 Jun 2015 08:46:28 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/15177/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/15177/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-6695&quot; title=&quot;Jobstats breaks when &amp;quot;Too long env variable.&amp;quot; errors occur&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-6695&quot;&gt;&lt;del&gt;LU-6695&lt;/del&gt;&lt;/a&gt; jobstats: skip too long env variables&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 3c8a2d49ef4a17aad2973475178aea794b669f38&lt;/p&gt;</comment>
                            <comment id="118671" author="pjones" created="Tue, 16 Jun 2015 13:30:17 +0000"  >&lt;p&gt;Landed for 2.8&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzxf3b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>