<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:02:39 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13601] page allocation failure  during mount </title>
                <link>https://jira.whamcloud.com/browse/LU-13601</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;A customer mounts several lustre fs , 10th fs is failed to mount due to a memory allocation failure.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[7335638.554981] mount.lustre: page allocation failure: order:4, mode:0xc050
[7335638.554990] CPU: 0 PID: 303506 Comm: mount.lustre Kdump: loaded Tainted: G           OE  ------------ T 3.10.0-957.41.1.el7.x86
_64 #1
[7335638.554992] Hardware name: Penguin Computing XE2142e-OEM/S2600BPS, BIOS SE5C620.86B.00.01.0016.020120190930 02/01/2019
[7335638.554994] Call Trace:
[7335638.555012]  [&amp;lt;ffffffffae565ac0&amp;gt;] dump_stack+0x19/0x1b
[7335638.555022]  [&amp;lt;ffffffffadfbe200&amp;gt;] warn_alloc_failed+0x110/0x180
[7335638.555025]  [&amp;lt;ffffffffadfc2cbf&amp;gt;] __alloc_pages_nodemask+0x9df/0xbe0
[7335638.555029]  [&amp;lt;ffffffffae00fce8&amp;gt;] alloc_pages_current+0x98/0x110
[7335638.555035]  [&amp;lt;ffffffffadfddb68&amp;gt;] kmalloc_order+0x18/0x40
[7335638.555042]  [&amp;lt;ffffffffae01b066&amp;gt;] kmalloc_order_trace+0x26/0xa0
[7335638.555095]  [&amp;lt;ffffffffc0f94eec&amp;gt;] ll_init_sbi+0x4c/0x660 [lustre]
[7335638.555154]  [&amp;lt;ffffffffc0ae7be2&amp;gt;] ? lustre_start_mgc+0x4d2/0x2b00 [obdclass]
[7335638.555157]  [&amp;lt;ffffffffae01df36&amp;gt;] ? kmem_cache_alloc_trace+0x1d6/0x200
[7335638.555169]  [&amp;lt;ffffffffc0fa145a&amp;gt;] ? ll_fill_super+0x7a/0x14e0 [lustre]
[7335638.555179]  [&amp;lt;ffffffffc0fa14b5&amp;gt;] ll_fill_super+0xd5/0x14e0 [lustre]
[7335638.555196]  [&amp;lt;ffffffffc0aeac04&amp;gt;] lustre_fill_super+0x264/0xb70 [obdclass]
[7335638.555210]  [&amp;lt;ffffffffc0aea9a0&amp;gt;] ? lustre_common_put_super+0x270/0x270 [obdclass]
[7335638.555216]  [&amp;lt;ffffffffae046e3f&amp;gt;] mount_nodev+0x4f/0xb0
[7335638.555230]  [&amp;lt;ffffffffc0ae07c8&amp;gt;] lustre_mount+0x38/0x60 [obdclass]
[7335638.555232]  [&amp;lt;ffffffffae0479be&amp;gt;] mount_fs+0x3e/0x1b0
[7335638.555237]  [&amp;lt;ffffffffae065607&amp;gt;] vfs_kern_mount+0x67/0x110
[7335638.555238]  [&amp;lt;ffffffffae067c2f&amp;gt;] do_mount+0x1ef/0xce0
[7335638.555242]  [&amp;lt;ffffffffae03fe1a&amp;gt;] ? __check_object_size+0x1ca/0x250
[7335638.555244]  [&amp;lt;ffffffffae01dd9c&amp;gt;] ? kmem_cache_alloc_trace+0x3c/0x200
[7335638.555246]  [&amp;lt;ffffffffae068a63&amp;gt;] SyS_mount+0x83/0xd0
[7335638.555249]  [&amp;lt;ffffffffae578ddb&amp;gt;] system_call_fastpath+0x22/0x27
[7335638.555251] Mem-Info:
[7335638.555264] active_anon:85113660 inactive_anon:4989547 isolated_anon:0
 active_file:1793179 inactive_file:2080266 isolated_file:0
 unevictable:0 dirty:26440 writeback:34 unstable:0
 slab_reclaimable:836114 slab_unreclaimable:1111382
 mapped:810319 shmem:25487119 pagetables:149686 bounce:0
 free:243878 free_pcp:287 free_cma:0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="59320">LU-13601</key>
            <summary>page allocation failure  during mount </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="adilger">Andreas Dilger</assignee>
                                    <reporter username="zam">Alexander Zarochentsev</reporter>
                        <labels>
                    </labels>
                <created>Mon, 25 May 2020 19:31:22 +0000</created>
                <updated>Fri, 3 Dec 2021 21:38:32 +0000</updated>
                            <resolved>Fri, 3 Dec 2021 21:38:32 +0000</resolved>
                                    <version>Upstream</version>
                    <version>Lustre 2.12.4</version>
                                    <fixVersion>Lustre 2.15.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="271087" author="zam" created="Mon, 25 May 2020 19:36:30 +0000"  >&lt;p&gt;Allocation of struct ll_sb_info fails in ll_init_sbi() b/c memory is fragmented and ll_sb_info is more than 50kB in size, requires order 4 allocation.&lt;br/&gt;
A quick trace of page allocation during mount/unmount and a simple file copy test shows no other page allocation with order &amp;gt; 3 not protected by OBD_ALLOC_LARGE except&lt;br/&gt;
this ll_init_sbi()&lt;/p&gt;</comment>
                            <comment id="271088" author="gerrit" created="Mon, 25 May 2020 19:37:56 +0000"  >&lt;p&gt;Alexander Zarochentsev (alexander.zarochentsev@hpe.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/38713&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/38713&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13601&quot; title=&quot;page allocation failure  during mount &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13601&quot;&gt;&lt;del&gt;LU-13601&lt;/del&gt;&lt;/a&gt; llite: OBD_ALLOC_LARGE for ll_sb_info&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 541cc5cf186d5de2a4fb624219acad7f35a19f1b&lt;/p&gt;</comment>
                            <comment id="271091" author="adilger" created="Mon, 25 May 2020 22:36:30 +0000"  >&lt;p&gt;It looks like the root cause of this very large allocation is because of &lt;tt&gt;ll_rw_extents_info&lt;/tt&gt; and &lt;tt&gt;ll_rw_process_info&lt;/tt&gt; stats that are almost never used:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;$ pahole lustre/llite/lustre.ko | grep -A 80 &apos;^struct ll_sb_info&apos;
struct ll_sb_info {
        spinlock_t                 ll_lock;              /*     0     4 */
        spinlock_t                 ll_pp_extent_lock;    /*     4     4 */
        spinlock_t                 ll_process_lock;      /*     8     4 */
        struct obd_uuid            ll_sb_uuid;           /*    12    40 */

        /* XXX 4 bytes hole, try to pack */

        struct obd_export *        ll_md_exp;            /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct obd_export *        ll_dt_exp;            /*    64     8 */
        struct obd_device *        ll_md_obd;            /*    72     8 */
        struct obd_device *        ll_dt_obd;            /*    80     8 */
        struct dentry *            ll_debugfs_entry;     /*    88     8 */
        struct lu_fid              ll_root_fid;          /*    96    16 */
        int                        ll_flags;             /*   112     4 */
        unsigned int               ll_umounting:1;       /*   116:31  4 */
        unsigned int               ll_xattr_cache_enabled:1; /*   116:30  4 */
        unsigned int               ll_xattr_cache_set:1; /*   116:29  4 */
        unsigned int               ll_client_common_fill_super_succeeded:1; /*   116:28  4 */
        unsigned int               ll_checksum_set:1;    /*   116:27  4 */

        /* XXX 27 bits hole, try to pack */

        struct lustre_client_ocd   ll_lco;               /*   120    56 */
        /* --- cacheline 2 boundary (128 bytes) was 48 bytes ago --- */
        struct lprocfs_stats *     ll_stats;             /*   176     8 */
        struct cl_client_cache *   ll_cache;             /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct lprocfs_stats *     ll_ra_stats;          /*   192     8 */
        struct ll_ra_info          ll_ra_info;           /*   200    32 */
        unsigned int               ll_namelen;           /*   232     4 */

        /* XXX 4 bytes hole, try to pack */

        struct file_operations *   ll_fop;               /*   240     8 */
        struct lu_site *           ll_site;              /*   248     8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        struct cl_device *         ll_cl;                /*   256     8 */
        struct ll_rw_extents_info  ll_rw_extents_info;   /*   264  5896 */
        /* --- cacheline 96 boundary (6144 bytes) was 16 bytes ago --- */
        int                        ll_extent_process_count; /*  6160     4 */

        /* XXX 4 bytes hole, try to pack */

        struct ll_rw_process_info  ll_rw_process_info[10]; /*  6168   640 */
        /* --- cacheline 106 boundary (6784 bytes) was 24 bytes ago --- */
        unsigned int               ll_offset_process_count; /*  6808     4 */
        enum stats_track_type      ll_stats_track_type;  /* 13224     4 */
        int                        ll_rw_stats_on;       /* 13228     4 */
        unsigned int               ll_sa_running_max;    /* 13232     4 */
        unsigned int               ll_sa_max;            /* 13236     4 */
        atomic_t                   ll_sa_total;          /* 13240     4 */
        atomic_t                   ll_sa_wrong;          /* 13244     4 */
        /* --- cacheline 207 boundary (13248 bytes) --- */
        atomic_t                   ll_sa_running;        /* 13248     4 */
        atomic_t                   ll_agl_total;         /* 13252     4 */
        dev_t                      ll_sdev_orig;         /* 13256     4 */

        /* XXX 4 bytes hole, try to pack */

        struct root_squash_info    ll_squash;            /* 13264    56 */
        /* --- cacheline 208 boundary (13312 bytes) was 8 bytes ago --- */
        struct path                ll_mnt;               /* 13320    16 */
        unsigned int               ll_stat_blksize;      /* 13336     4 */
        unsigned int               ll_statfs_max_age;    /* 13340     4 */
        struct kset                ll_kset;              /* 13344    96 */
        /* --- cacheline 210 boundary (13440 bytes) --- */
        struct completion          ll_kobj_unregister;   /* 13440    32 */

        /* size: 13472, cachelines: 211, members: 47 */
        /* sum members: 13452, holes: 5, sum holes: 20 */
        /* bit holes: 1, sum bit holes: 27 bits */
        /* last cacheline: 32 bytes */
};
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;What about dynamically allocating those stats structures when they are first used, and freeing them at unmount time?  That would bring the &lt;tt&gt;ll_sb_info&lt;/tt&gt; allocation size down to 1176 bytes for almost all uses, and it would never fail allocation at that size.&lt;/p&gt;</comment>
                            <comment id="271156" author="zam" created="Tue, 26 May 2020 15:38:19 +0000"  >&lt;p&gt;Andreas, we have &lt;a href=&quot;https://review.whamcloud.com/#/c/31236/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/31236/&lt;/a&gt; landed to our branch and it increases size of ll_sb_info up to 52kB, while master&apos;s version has only &lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) p sizeof(struct ll_sb_info)
$1 = 13568
(gdb)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I am going to abandon the patch.&lt;/p&gt;</comment>
                            <comment id="271187" author="adilger" created="Tue, 26 May 2020 19:16:16 +0000"  >&lt;p&gt;I think especially in this case it would make sense to allocate the stats structs in the superblock dynamically when these stats are first enabled. &lt;/p&gt;</comment>
                            <comment id="282518" author="pjones" created="Sun, 18 Oct 2020 15:14:54 +0000"  >&lt;p&gt;So should this ticket be closed as Will Not Fix?&lt;/p&gt;</comment>
                            <comment id="286960" author="adilger" created="Tue, 8 Dec 2020 06:53:51 +0000"  >&lt;p&gt;I&apos;d rather fix the problem than close the ticket.  Allocating &lt;tt&gt;ll_rw_extents_info&lt;/tt&gt; (5896 bytes), &lt;tt&gt;ll_rw_offset_info&lt;/tt&gt; (6400 bytes), and &lt;tt&gt;ll_rw_process_info&lt;/tt&gt; (640 bytes) only when these stats are enabled via &lt;tt&gt;ll_rw_extents_stats_pp_seq_write()&lt;/tt&gt;, &lt;tt&gt;ll_rw_extents_stats_seq_write()&lt;/tt&gt;, or &lt;tt&gt;ll_rw_offset_stats_seq_write()&lt;/tt&gt; is straight forward to implement, and should really be done before patch &lt;a href=&quot;https://review.whamcloud.com/31236&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/31236&lt;/a&gt; lands.&lt;/p&gt;

&lt;p&gt;The &lt;tt&gt;struct obd_histogram&lt;/tt&gt; in &lt;tt&gt;struct client_obd&lt;/tt&gt; and &lt;tt&gt;struct lmv_obd&lt;/tt&gt; are also very large. I&apos;ll push a prototype patch which allocates the histograms on demand.&lt;/p&gt;</comment>
                            <comment id="286964" author="gerrit" created="Tue, 8 Dec 2020 08:35:45 +0000"  >&lt;p&gt;Andreas Dilger (adilger@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/40901&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40901&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13601&quot; title=&quot;page allocation failure  during mount &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13601&quot;&gt;&lt;del&gt;LU-13601&lt;/del&gt;&lt;/a&gt; llite: avoid needless large allocations&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f1cf5c17ddd59a32bd6b0f0de1bb4e65f1009424&lt;/p&gt;</comment>
                            <comment id="320030" author="gerrit" created="Fri, 3 Dec 2021 19:54:29 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/40901/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40901/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13601&quot; title=&quot;page allocation failure  during mount &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13601&quot;&gt;&lt;del&gt;LU-13601&lt;/del&gt;&lt;/a&gt; llite: avoid needless large stats alloc&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9490fd9bb84dc277bd103bf16286fc26882e5b5e&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="22860">LU-4533</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="61287">LU-14055</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i01173:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>