<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:35:55 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3671] why are permission changes synchronous?</title>
                <link>https://jira.whamcloud.com/browse/LU-3671</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;&lt;tt&gt;mdd_acl_set()&lt;/tt&gt; sets the ACL xattr synchronously if &lt;tt&gt;/proc/fs/lustre/mdd/lustre-MDT0000/sync_permission&lt;/tt&gt; is set (it is set by default).  This can have a large performance impact for operations like &lt;tt&gt;cp -a&lt;/tt&gt; or &lt;tt&gt;tar --xattr&lt;/tt&gt;.  Should this be the default behavior?  How does this relate to other permissions-related changed like chown, chmod, etc.? It would be good to explain the security implications of this setting in code comments and in the Lustre manual.&lt;/p&gt;

&lt;p&gt;Some background information:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://wiki.lustre.org/index.php/Architecture_-_Version_Based_Recovery#Permissions&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://wiki.lustre.org/index.php/Architecture_-_Version_Based_Recovery#Permissions&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;https://projectlava.xyratex.com/show_bug.cgi?id=15390&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://projectlava.xyratex.com/show_bug.cgi?id=15390&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://git.whamcloud.com/gitweb?p=lustre.git;a=commit;h=0509770778396825ba1d7f1b42be5993aac1d6f3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/gitweb?p=lustre.git;a=commit;h=0509770778396825ba1d7f1b42be5993aac1d6f3&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;LLNL-bug-ID: TOSS-2207&lt;/p&gt;</description>
                <environment></environment>
        <key id="20098">LU-3671</key>
            <summary>why are permission changes synchronous?</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="laisiyao">Lai Siyao</assignee>
                                    <reporter username="nedbass">Ned Bass</reporter>
                        <labels>
                            <label>zfs</label>
                    </labels>
                <created>Tue, 30 Jul 2013 18:39:57 +0000</created>
                <updated>Tue, 6 Nov 2018 10:53:57 +0000</updated>
                            <resolved>Wed, 4 Sep 2013 18:56:01 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                    <fixVersion>Lustre 2.5.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="63291" author="morrone" created="Tue, 30 Jul 2013 18:47:24 +0000"  >&lt;p&gt;This (and/or &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3660&quot; title=&quot;Can&amp;#39;t disable ACL support with ZFS MDT&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3660&quot;&gt;&lt;del&gt;LU-3660&lt;/del&gt;&lt;/a&gt;) is LLNL&apos;s top support priority.&lt;/p&gt;</comment>
                            <comment id="63298" author="nedbass" created="Tue, 30 Jul 2013 19:25:21 +0000"  >&lt;p&gt;This setting does apply to chmod, chown, etc.  In a VM environment with ZFS backend, chmod or chown of 60 files takes about 15s with sync_permission=1, versus 100ms with sync_permission=0.&lt;/p&gt;</comment>
                            <comment id="63302" author="pjones" created="Tue, 30 Jul 2013 19:52:52 +0000"  >&lt;p&gt;Lai&lt;/p&gt;

&lt;p&gt;This seems related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3660&quot; title=&quot;Can&amp;#39;t disable ACL support with ZFS MDT&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3660&quot;&gt;&lt;del&gt;LU-3660&lt;/del&gt;&lt;/a&gt; which you are already working on&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="63316" author="nedbass" created="Tue, 30 Jul 2013 22:13:07 +0000"  >&lt;p&gt;To clarify our immediate priority here, how risky would it be to turn off &lt;tt&gt;sync_permission&lt;/tt&gt;?  We&apos;re feeling significant pain due to this issue on our production filesystems, so we&apos;d like to take prompt action if its reasonably safe to do so.&lt;/p&gt;

&lt;p&gt;My impression is that we&apos;re trying to prevent something like an &lt;tt&gt;open()&lt;/tt&gt; getting incorrectly replayed before a &lt;tt&gt;chmod()&lt;/tt&gt; during recovery, allowing a client to circumvent permissions.   This seems fairly low risk, but I&apos;m not sure I understand this issue completely.&lt;/p&gt;</comment>
                            <comment id="63361" author="tappro" created="Wed, 31 Jul 2013 12:22:10 +0000"  >&lt;p&gt;IIRC, there is no harm or bug related to sync_permission, that is just an attempt to make any permission changes persistent and be not lost due to recovery. The sync_permission guarantees that changes are persistently stored when reply is got. With sync_permission == 0 we might have lost changes due to recovery if client node isn&apos;t alive, e.g. powered off right after command was executed and return success but changes weren&apos;t committed yet on server.&lt;/p&gt;</comment>
                            <comment id="63379" author="nedbass" created="Wed, 31 Jul 2013 15:35:51 +0000"  >&lt;p&gt;Mikhail, thanks, but in that case I don&apos;t understand the optimizations discussed by Robert and Andreas in bz15390.  They claim the sync is only needed on directories, and only when the permissions are being reduced.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://bugzilla.lustre.org/show_bug.cgi?id=15390&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.lustre.org/show_bug.cgi?id=15390&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="63763" author="laisiyao" created="Wed, 7 Aug 2013 10:49:32 +0000"  >&lt;p&gt;I made a patch according to the optimization proposal mentioned above: &lt;a href=&quot;http://review.whamcloud.com/7257&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7257&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="65644" author="jlevi" created="Tue, 3 Sep 2013 19:49:09 +0000"  >&lt;p&gt;Did the patch landed fix this issue?&lt;/p&gt;</comment>
                            <comment id="65647" author="nedbass" created="Tue, 3 Sep 2013 20:13:15 +0000"  >&lt;p&gt;The patch addresses the performance issue, but not the lack of documentation.  I would like to see a step-by-step example scenario in which an asynchronous permission update leads to an inconsistent or insecure state.  The example should explain why the problem is only relevant for directories.  In other words, explain in precise terms the risks of disabling &lt;tt&gt;sync_permission&lt;/tt&gt;.&lt;/p&gt;</comment>
                            <comment id="65664" author="adilger" created="Tue, 3 Sep 2013 22:43:05 +0000"  >&lt;p&gt;Ned,&lt;br/&gt;
the specific problem that is being avoided here relates to Version Based Recovery (VBR), is a mechanism to allow some clients to recover from an MDS failure even in the (not so uncommon) case that one or more other active clients do not reconnect MDS and do their own recovery.  Basic version-based recovery in the following situation:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# umask is 022, so dir1 is created with rwxr-xr-x permission
client1$ mkdir /lustre/dir1
client2$ mkdir /lustre/dir2
client1$ chmod 700 /lustre/dir1
client3$ touch /lustre/dir1/file3
client4$ touch /lustre/dir1/file4
:
:
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;If client2 fails at the same time as the MDS (right after dir1 is created) and does not participate during MDS recovery, old Lustre recovery would prevent client&lt;span class=&quot;error&quot;&gt;&amp;#91;34&amp;#93;&lt;/span&gt; from creating file&lt;span class=&quot;error&quot;&gt;&amp;#91;34&amp;#93;&lt;/span&gt; because there would be a gap in the MDS transaction sequence, even though there is no dependency between these files and dir2.  Similarly, if client1 failed, then file2 and file3 would not be able to recover, even if the dir1 creation was committed on the MDT before if crashed.&lt;/p&gt;

&lt;p&gt;With VBR, the replay for file3 and file4 would be dependent on the version of dir1 (transaction number in which dir1 was created/last modified), and not on each other.  That would allow the files to be recreated from any running client, and only files created by the failing node would be lost.&lt;/p&gt;

&lt;p&gt;The &lt;tt&gt;sync_permission&lt;/tt&gt; flag is concerned with avoiding the case where client1 fails after creating dir1 and running chmod, but the MDS only committed the mkdir and not the chmod before it fails.  That would potentially allow the file creations to be replayed in a directory that does not have the correct permissions.&lt;/p&gt;

&lt;p&gt;Mike, thinking about this further, is the version of dir1 changed by the chmod so that the later file creates are dependent upon the new version of dir1 and not the old one?  That would also prevent the later files to be created without any sync at all, though in most cases where permission changes are not being done this would increase the number of unreplayable RPCs in case of MDS failure.  Could you please further clarify what specific problem the &lt;tt&gt;sync_permission&lt;/tt&gt; behaviour is avoiding?&lt;/p&gt;</comment>
                            <comment id="65669" author="rread" created="Tue, 3 Sep 2013 23:56:00 +0000"  >&lt;p&gt;Suggestion: That example (once the details have been confirmed) would make an excellent addition to the manual.&lt;/p&gt;</comment>
                            <comment id="65673" author="nedbass" created="Wed, 4 Sep 2013 00:58:46 +0000"  >&lt;p&gt;Andreas, thanks for the explanation, but the pieces still aren&apos;t quite fitting together for me.  In your example, dir1 perms change from 755 to 700, so non-owners lack write permission before and after the chmod. I&apos;m not sure if that&apos;s what you intended.&lt;/p&gt;

&lt;p&gt;Given the optimization recently landed, we&apos;re only interested in the case where permissions are reduced.  In that case, I imagine the spirit of the example is that &lt;tt&gt;touch&lt;/tt&gt; would normally succeed &lt;em&gt;before&lt;/em&gt; the &lt;tt&gt;chmod&lt;/tt&gt;, but fail &lt;em&gt;after&lt;/em&gt;.  But if the &lt;tt&gt;chmod&lt;/tt&gt; isn&apos;t replayed in recovery, client3 cannot distinguish that from the case where client1 never ran chmod (because it crashed), and therefore it is perfectly consistent to allow the touch to succeed.  The directory would still have the old permissions which permitted file creation.&lt;/p&gt;</comment>
                            <comment id="65693" author="nedbass" created="Wed, 4 Sep 2013 06:22:24 +0000"  >&lt;p&gt;I think I understand the point of the example now. The touch is predicated on the knowledge that chmod succeeded and the directory has secure permissions.  If the chmod is lost, the directory contents become exposed to unauthorized users.  I was assuming in my last comment that client3 couldn&apos;t see the results of the chmod until it was committed to disk, but now I suspect that it could.  Do I have that right?&lt;/p&gt;</comment>
                            <comment id="65711" author="tappro" created="Wed, 4 Sep 2013 10:16:12 +0000"  >&lt;p&gt;Andreas, yes, chmod changed version of directory, so depended replays will be denied. But nothing prevents just regular creations in that directory after recovery, it will stay without permission for a while. So sync_permission flag is not just about recovery but more like policy when server guarantee all such changes to be done for sure, without sync_permission all works as before but permission might be lost, note this problem exists in all Lustre versions and is not about VBR. All further optimization are just attempts to find some balance between security and performance. &lt;/p&gt;</comment>
                            <comment id="65763" author="adilger" created="Wed, 4 Sep 2013 18:35:39 +0000"  >&lt;p&gt;Ned, your second assessment is correct, and I should have made that more clear in my comment.  The danger is that the user thinks the chmod (or chown by root) succeeded, but it was lost during recovery, and this exposes files in the directory that shouldn&apos;t be visible to other users.  Mike is correct that if file3/4 are created before MDS recovery (while the chmod/chown is still in the MDS cache) then they will be refused during replay because the dir1 permission change was lost when client1 failed and the creates will depend on the now-missing dir1 version.  However, if file3/4 are created after MDS recovery (with chmod/chown lost) they will succeed.&lt;/p&gt;

&lt;p&gt;I guess &lt;tt&gt;sync_permissions&lt;/tt&gt; is over &amp;amp; above what POSIX requires even of a local filesystem, since a local filesystem would lose the chmod after a crash and the user might forget to re-do it, but at least there was a visible interruption to the user when the local node crashed.  With a distributed filesystem, the failing node might be out of sight of the user.&lt;/p&gt;</comment>
                            <comment id="65765" author="adilger" created="Wed, 4 Sep 2013 18:56:01 +0000"  >&lt;p&gt;I filed &lt;a href=&quot;https://jira.whamcloud.com/browse/LUDOC-180&quot; title=&quot;documentation for the MDS sync_permission /proc tunable&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LUDOC-180&quot;&gt;LUDOC-180&lt;/a&gt; to track the documentation for this /proc tunable, and this one can be closed since the patch to avoid sync operations for regular files and non-permission setattrs has landed for 2.5.0.&lt;/p&gt;

&lt;p&gt;It could potentially also be cherry-picked for 2.4.x and 2.1.x.&lt;/p&gt;
</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="20775">LUDOC-180</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="20083">LU-3660</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="32416">LU-7239</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvwpz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9459</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>