summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c64
-rw-r--r--fs/afs/dir_silly.c38
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fs_operation.c10
-rw-r--r--fs/afs/fs_probe.c11
-rw-r--r--fs/afs/inode.c91
-rw-r--r--fs/afs/internal.h37
-rw-r--r--fs/afs/main.c3
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/server.c3
-rw-r--r--fs/afs/write.c12
-rw-r--r--fs/afs/yfsclient.c93
-rw-r--r--fs/aio.c2
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/ext4/Makefile3
-rw-r--r--fs/ext4/dir.c16
-rw-r--r--fs/ext4/ext4.h27
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c26
-rw-r--r--fs/ext4/ioctl.c65
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/super.c124
-rw-r--r--fs/ext4/verity.c5
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/ext4/xattr.h1
-rw-r--r--fs/ext4/xattr_hurd.c51
-rw-r--r--fs/io-wq.c108
-rw-r--r--fs/io-wq.h4
-rw-r--r--fs/io_uring.c177
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/summary.h4
-rw-r--r--fs/proc/bootconfig.c15
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/squashfs/squashfs_fs.h16
37 files changed, 726 insertions, 334 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index aa1d34141ea3..96757f3abd74 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -648,7 +648,7 @@ static void afs_do_lookup_success(struct afs_operation *op)
vp = &op->file[0];
abort_code = vp->scb.status.abort_code;
if (abort_code != 0) {
- op->abort_code = abort_code;
+ op->ac.abort_code = abort_code;
op->error = afs_abort_to_error(abort_code);
}
break;
@@ -696,10 +696,11 @@ static const struct afs_operation_ops afs_inline_bulk_status_operation = {
.success = afs_do_lookup_success,
};
-static const struct afs_operation_ops afs_fetch_status_operation = {
+static const struct afs_operation_ops afs_lookup_fetch_status_operation = {
.issue_afs_rpc = afs_fs_fetch_status,
.issue_yfs_rpc = yfs_fs_fetch_status,
.success = afs_do_lookup_success,
+ .aborted = afs_check_for_remote_deletion,
};
/*
@@ -844,7 +845,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* to FS.FetchStatus for op->file[1].
*/
op->fetch_status.which = 1;
- op->ops = &afs_fetch_status_operation;
+ op->ops = &afs_lookup_fetch_status_operation;
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
}
@@ -1236,6 +1237,17 @@ void afs_d_release(struct dentry *dentry)
_enter("%pd", dentry);
}
+void afs_check_for_remote_deletion(struct afs_operation *op)
+{
+ struct afs_vnode *vnode = op->file[0].vnode;
+
+ switch (op->ac.abort_code) {
+ case VNOVNODE:
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ afs_break_callback(vnode, afs_cb_break_for_deleted);
+ }
+}
+
/*
* Create a new inode for create/mkdir/symlink
*/
@@ -1268,7 +1280,7 @@ static void afs_vnode_new_inode(struct afs_operation *op)
static void afs_create_success(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, op->file[0].vnode);
+ op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
afs_update_dentry_version(op, &op->file[0], op->dentry);
afs_vnode_new_inode(op);
@@ -1302,6 +1314,7 @@ static const struct afs_operation_ops afs_mkdir_operation = {
.issue_afs_rpc = afs_fs_make_dir,
.issue_yfs_rpc = yfs_fs_make_dir,
.success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
.put = afs_create_put,
};
@@ -1325,6 +1338,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
op->dentry = dentry;
op->create.mode = S_IFDIR | mode;
op->create.reason = afs_edit_dir_for_mkdir;
@@ -1350,7 +1364,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
static void afs_rmdir_success(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, op->file[0].vnode);
+ op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
afs_update_dentry_version(op, &op->file[0], op->dentry);
}
@@ -1382,6 +1396,7 @@ static const struct afs_operation_ops afs_rmdir_operation = {
.issue_afs_rpc = afs_fs_remove_dir,
.issue_yfs_rpc = yfs_fs_remove_dir,
.success = afs_rmdir_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_rmdir_edit_dir,
.put = afs_rmdir_put,
};
@@ -1404,6 +1419,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
op->dentry = dentry;
op->ops = &afs_rmdir_operation;
@@ -1479,7 +1495,8 @@ static void afs_dir_remove_link(struct afs_operation *op)
static void afs_unlink_success(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, op->file[0].vnode);
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_check_dir_conflict(op, &op->file[0]);
afs_vnode_commit_status(op, &op->file[0]);
afs_vnode_commit_status(op, &op->file[1]);
afs_update_dentry_version(op, &op->file[0], op->dentry);
@@ -1511,6 +1528,7 @@ static const struct afs_operation_ops afs_unlink_operation = {
.issue_afs_rpc = afs_fs_remove_file,
.issue_yfs_rpc = yfs_fs_remove_file,
.success = afs_unlink_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_unlink_edit_dir,
.put = afs_unlink_put,
};
@@ -1537,6 +1555,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
/* Try to make sure we have a callback promise on the victim. */
ret = afs_validate(vnode, op->key);
@@ -1561,9 +1580,25 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
op->file[1].vnode = vnode;
+ op->file[1].update_ctime = true;
+ op->file[1].op_unlinked = true;
op->dentry = dentry;
op->ops = &afs_unlink_operation;
- return afs_do_sync_operation(op);
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+
+ /* If there was a conflict with a third party, check the status of the
+ * unlinked vnode.
+ */
+ if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ op->file[1].update_ctime = false;
+ op->fetch_status.which = 1;
+ op->ops = &afs_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+
+ return afs_put_operation(op);
error:
return afs_put_operation(op);
@@ -1573,6 +1608,7 @@ static const struct afs_operation_ops afs_create_operation = {
.issue_afs_rpc = afs_fs_create_file,
.issue_yfs_rpc = yfs_fs_create_file,
.success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
.put = afs_create_put,
};
@@ -1601,6 +1637,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
op->dentry = dentry;
op->create.mode = S_IFREG | mode;
@@ -1620,6 +1657,7 @@ static void afs_link_success(struct afs_operation *op)
struct afs_vnode_param *vp = &op->file[1];
_enter("op=%08x", op->debug_id);
+ op->ctime = dvp->scb.status.mtime_client;
afs_vnode_commit_status(op, dvp);
afs_vnode_commit_status(op, vp);
afs_update_dentry_version(op, dvp, op->dentry);
@@ -1640,6 +1678,7 @@ static const struct afs_operation_ops afs_link_operation = {
.issue_afs_rpc = afs_fs_link,
.issue_yfs_rpc = yfs_fs_link,
.success = afs_link_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
.put = afs_link_put,
};
@@ -1672,6 +1711,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, vnode);
op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
op->dentry = dentry;
op->dentry_2 = from;
@@ -1689,6 +1730,7 @@ static const struct afs_operation_ops afs_symlink_operation = {
.issue_afs_rpc = afs_fs_symlink,
.issue_yfs_rpc = yfs_fs_symlink,
.success = afs_create_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
.put = afs_create_put,
};
@@ -1740,9 +1782,13 @@ static void afs_rename_success(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
+ op->ctime = op->file[0].scb.status.mtime_client;
+ afs_check_dir_conflict(op, &op->file[1]);
afs_vnode_commit_status(op, &op->file[0]);
- if (op->file[1].vnode != op->file[0].vnode)
+ if (op->file[1].vnode != op->file[0].vnode) {
+ op->ctime = op->file[1].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[1]);
+ }
}
static void afs_rename_edit_dir(struct afs_operation *op)
@@ -1860,6 +1906,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
op->file[0].dv_delta = 1;
op->file[1].dv_delta = 1;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
op->dentry = old_dentry;
op->dentry_2 = new_dentry;
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index b14e3d9a25e2..04f75a44f243 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -16,6 +16,7 @@ static void afs_silly_rename_success(struct afs_operation *op)
{
_enter("op=%08x", op->debug_id);
+ afs_check_dir_conflict(op, &op->file[0]);
afs_vnode_commit_status(op, &op->file[0]);
}
@@ -69,6 +70,11 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
return PTR_ERR(op);
afs_op_set_vnode(op, 0, dvnode);
+ afs_op_set_vnode(op, 1, dvnode);
+ op->file[0].dv_delta = 1;
+ op->file[1].dv_delta = 1;
+ op->file[0].update_ctime = true;
+ op->file[1].update_ctime = true;
op->dentry = old;
op->dentry_2 = new;
@@ -129,6 +135,7 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode,
switch (ret) {
case 0:
/* The rename succeeded. */
+ set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags);
d_move(dentry, sdentry);
break;
case -ERESTARTSYS:
@@ -148,19 +155,11 @@ out:
static void afs_silly_unlink_success(struct afs_operation *op)
{
- struct afs_vnode *vnode = op->file[1].vnode;
-
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, op->file[0].vnode);
+ afs_check_dir_conflict(op, &op->file[0]);
afs_vnode_commit_status(op, &op->file[0]);
afs_vnode_commit_status(op, &op->file[1]);
afs_update_dentry_version(op, &op->file[0], op->dentry);
-
- drop_nlink(&vnode->vfs_inode);
- if (vnode->vfs_inode.i_nlink == 0) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- }
}
static void afs_silly_unlink_edit_dir(struct afs_operation *op)
@@ -181,6 +180,7 @@ static const struct afs_operation_ops afs_silly_unlink_operation = {
.issue_afs_rpc = afs_fs_remove_file,
.issue_yfs_rpc = yfs_fs_remove_file,
.success = afs_silly_unlink_success,
+ .aborted = afs_check_for_remote_deletion,
.edit_dir = afs_silly_unlink_edit_dir,
};
@@ -200,12 +200,30 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, vnode);
+ op->file[0].dv_delta = 1;
+ op->file[0].update_ctime = true;
+ op->file[1].op_unlinked = true;
+ op->file[1].update_ctime = true;
op->dentry = dentry;
op->ops = &afs_silly_unlink_operation;
trace_afs_silly_rename(vnode, true);
- return afs_do_sync_operation(op);
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+
+ /* If there was a conflict with a third party, check the status of the
+ * unlinked vnode.
+ */
+ if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ op->file[1].update_ctime = false;
+ op->fetch_status.which = 1;
+ op->ops = &afs_fetch_status_operation;
+ afs_begin_vnode_operation(op);
+ afs_wait_for_operation(op);
+ }
+
+ return afs_put_operation(op);
}
/*
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 506c47471b42..6f6ed1605cfe 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -225,7 +225,6 @@ static void afs_fetch_data_success(struct afs_operation *op)
struct afs_vnode *vnode = op->file[0].vnode;
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, vnode);
afs_vnode_commit_status(op, &op->file[0]);
afs_stat_v(vnode, n_fetches);
atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
@@ -240,6 +239,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
+ .aborted = afs_check_for_remote_deletion,
.put = afs_fetch_data_put,
};
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 71eea2a908c7..ffb8575345ca 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -175,10 +175,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
static void afs_lock_success(struct afs_operation *op)
{
- struct afs_vnode *vnode = op->file[0].vnode;
-
_enter("op=%08x", op->debug_id);
- afs_check_for_remote_deletion(op, vnode);
afs_vnode_commit_status(op, &op->file[0]);
}
@@ -186,6 +183,7 @@ static const struct afs_operation_ops afs_set_lock_operation = {
.issue_afs_rpc = afs_fs_set_lock,
.issue_yfs_rpc = yfs_fs_set_lock,
.success = afs_lock_success,
+ .aborted = afs_check_for_remote_deletion,
};
/*
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 2d2dff5688a4..c264839b2fd0 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -187,9 +187,17 @@ void afs_wait_for_operation(struct afs_operation *op)
op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
}
- if (op->error == 0) {
+ switch (op->error) {
+ case 0:
_debug("success");
op->ops->success(op);
+ break;
+ case -ECONNABORTED:
+ if (op->ops->aborted)
+ op->ops->aborted(op);
+ break;
+ default:
+ break;
}
afs_end_vnode_operation(op);
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index b34f74b0f319..5d9ef517cf81 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -314,7 +314,7 @@ void afs_fs_probe_timer(struct timer_list *timer)
{
struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
- if (!queue_work(afs_wq, &net->fs_prober))
+ if (!net->live || !queue_work(afs_wq, &net->fs_prober))
afs_dec_servers_outstanding(net);
}
@@ -458,3 +458,12 @@ dont_wait:
return -ETIME;
return -EDESTADDRREQ;
}
+
+/*
+ * Clean up the probing when the namespace is killed off.
+ */
+void afs_fs_probe_cleanup(struct afs_net *net)
+{
+ if (del_timer_sync(&net->fs_probe_timer))
+ afs_dec_servers_outstanding(net);
+}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index cd0a0060950b..1d13d2e882ad 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -165,9 +165,11 @@ static void afs_apply_status(struct afs_operation *op,
{
struct afs_file_status *status = &vp->scb.status;
struct afs_vnode *vnode = vp->vnode;
+ struct inode *inode = &vnode->vfs_inode;
struct timespec64 t;
umode_t mode;
bool data_changed = false;
+ bool change_size = vp->set_size;
_enter("{%llx:%llu.%u} %s",
vp->fid.vid, vp->fid.vnode, vp->fid.unique,
@@ -186,25 +188,25 @@ static void afs_apply_status(struct afs_operation *op,
}
if (status->nlink != vnode->status.nlink)
- set_nlink(&vnode->vfs_inode, status->nlink);
+ set_nlink(inode, status->nlink);
if (status->owner != vnode->status.owner)
- vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
+ inode->i_uid = make_kuid(&init_user_ns, status->owner);
if (status->group != vnode->status.group)
- vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
+ inode->i_gid = make_kgid(&init_user_ns, status->group);
if (status->mode != vnode->status.mode) {
- mode = vnode->vfs_inode.i_mode;
+ mode = inode->i_mode;
mode &= ~S_IALLUGO;
mode |= status->mode;
- WRITE_ONCE(vnode->vfs_inode.i_mode, mode);
+ WRITE_ONCE(inode->i_mode, mode);
}
t = status->mtime_client;
- vnode->vfs_inode.i_ctime = t;
- vnode->vfs_inode.i_mtime = t;
- vnode->vfs_inode.i_atime = t;
+ inode->i_mtime = t;
+ if (vp->update_ctime)
+ inode->i_ctime = op->ctime;
if (vnode->status.data_version != status->data_version)
data_changed = true;
@@ -226,6 +228,7 @@ static void afs_apply_status(struct afs_operation *op,
} else {
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
}
+ change_size = true;
} else if (vnode->status.type == AFS_FTYPE_DIR) {
/* Expected directory change is handled elsewhere so
* that we can locally edit the directory and save on a
@@ -233,11 +236,22 @@ static void afs_apply_status(struct afs_operation *op,
*/
if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
data_changed = false;
+ change_size = true;
}
if (data_changed) {
- inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- afs_set_i_size(vnode, status->size);
+ inode_set_iversion_raw(inode, status->data_version);
+
+ /* Only update the size if the data version jumped. If the
+ * file is being modified locally, then we might have our own
+ * idea of what the size should be that's not the same as
+ * what's on the server.
+ */
+ if (change_size) {
+ afs_set_i_size(vnode, status->size);
+ inode->i_ctime = t;
+ inode->i_atime = t;
+ }
}
}
@@ -267,32 +281,39 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
_enter("");
- ASSERTCMP(op->error, ==, 0);
-
write_seqlock(&vnode->cb_lock);
if (vp->scb.have_error) {
+ /* A YFS server will return this from RemoveFile2 and AFS and
+ * YFS will return this from InlineBulkStatus.
+ */
if (vp->scb.status.abort_code == VNOVNODE) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
clear_nlink(&vnode->vfs_inode);
__afs_break_callback(vnode, afs_cb_break_for_deleted);
+ op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
}
- } else {
- if (vp->scb.have_status)
- afs_apply_status(op, vp);
+ } else if (vp->scb.have_status) {
+ afs_apply_status(op, vp);
if (vp->scb.have_cb)
afs_apply_callback(op, vp);
+ } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
+ drop_nlink(&vnode->vfs_inode);
+ if (vnode->vfs_inode.i_nlink == 0) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ __afs_break_callback(vnode, afs_cb_break_for_deleted);
+ }
}
write_sequnlock(&vnode->cb_lock);
- if (op->error == 0 && vp->scb.have_status)
+ if (vp->scb.have_status)
afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb);
}
static void afs_fetch_status_success(struct afs_operation *op)
{
- struct afs_vnode_param *vp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
struct afs_vnode *vnode = vp->vnode;
int ret;
@@ -306,10 +327,11 @@ static void afs_fetch_status_success(struct afs_operation *op)
}
}
-static const struct afs_operation_ops afs_fetch_status_operation = {
+const struct afs_operation_ops afs_fetch_status_operation = {
.issue_afs_rpc = afs_fs_fetch_status,
.issue_yfs_rpc = yfs_fs_fetch_status,
.success = afs_fetch_status_success,
+ .aborted = afs_check_for_remote_deletion,
};
/*
@@ -716,6 +738,9 @@ int afs_getattr(const struct path *path, struct kstat *stat,
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
generic_fillattr(inode, stat);
+ if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
+ stat->nlink > 0)
+ stat->nlink -= 1;
} while (need_seqretry(&vnode->cb_lock, seq));
done_seqretry(&vnode->cb_lock, seq);
@@ -785,7 +810,15 @@ void afs_evict_inode(struct inode *inode)
static void afs_setattr_success(struct afs_operation *op)
{
+ struct inode *inode = &op->file[0].vnode->vfs_inode;
+
afs_vnode_commit_status(op, &op->file[0]);
+ if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+ loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size;
+ if (size > i_size)
+ pagecache_isize_extended(inode, i_size, size);
+ truncate_pagecache(inode, size);
+ }
}
static const struct afs_operation_ops afs_setattr_operation = {
@@ -801,17 +834,31 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct afs_operation *op;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
+ int ret;
_enter("{%llx:%llu},{n=%pd},%x",
vnode->fid.vid, vnode->fid.vnode, dentry,
attr->ia_valid);
if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
- ATTR_MTIME))) {
+ ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET |
+ ATTR_TOUCH))) {
_leave(" = 0 [unsupported]");
return 0;
}
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (!S_ISREG(vnode->vfs_inode.i_mode))
+ return -EISDIR;
+
+ ret = inode_newsize_ok(&vnode->vfs_inode, attr->ia_size);
+ if (ret)
+ return ret;
+
+ if (attr->ia_size == i_size_read(&vnode->vfs_inode))
+ attr->ia_valid &= ~ATTR_SIZE;
+ }
+
/* flush any dirty data outstanding on a regular file */
if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
@@ -825,8 +872,12 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
afs_op_set_vnode(op, 0, vnode);
op->setattr.attr = attr;
- if (attr->ia_valid & ATTR_SIZE)
+ if (attr->ia_valid & ATTR_SIZE) {
op->file[0].dv_delta = 1;
+ op->file[0].set_size = true;
+ }
+ op->ctime = attr->ia_ctime;
+ op->file[0].update_ctime = 1;
op->ops = &afs_setattr_operation;
return afs_do_sync_operation(op);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 0c9806ef2a19..d520535ddb62 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -634,6 +634,7 @@ struct afs_vnode {
#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
+#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
@@ -744,8 +745,11 @@ struct afs_vnode_param {
afs_dataversion_t dv_before; /* Data version before the call */
unsigned int cb_break_before; /* cb_break + cb_s_break before the call */
u8 dv_delta; /* Expected change in data version */
- bool put_vnode; /* T if we have a ref on the vnode */
- bool need_io_lock; /* T if we need the I/O lock on this */
+ bool put_vnode:1; /* T if we have a ref on the vnode */
+ bool need_io_lock:1; /* T if we need the I/O lock on this */
+ bool update_ctime:1; /* Need to update the ctime */
+ bool set_size:1; /* Must update i_size */
+ bool op_unlinked:1; /* True if file was unlinked by op */
};
/*
@@ -766,9 +770,9 @@ struct afs_operation {
struct dentry *dentry; /* Dentry to be altered */
struct dentry *dentry_2; /* Second dentry to be altered */
struct timespec64 mtime; /* Modification time to record */
+ struct timespec64 ctime; /* Change time to set */
short nr_files; /* Number of entries in file[], more_files */
short error;
- unsigned int abort_code;
unsigned int debug_id;
unsigned int cb_v_break; /* Volume break counter before op */
@@ -837,6 +841,7 @@ struct afs_operation {
#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
+#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
};
/*
@@ -932,6 +937,7 @@ extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
extern void afs_d_release(struct dentry *);
+extern void afs_check_for_remote_deletion(struct afs_operation *);
/*
* dir_edit.c
@@ -1059,10 +1065,13 @@ extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
extern void afs_probe_fileserver(struct afs_net *, struct afs_server *);
extern void afs_fs_probe_dispatcher(struct work_struct *);
extern int afs_wait_for_one_fs_probe(struct afs_server *, bool);
+extern void afs_fs_probe_cleanup(struct afs_net *);
/*
* inode.c
*/
+extern const struct afs_operation_ops afs_fetch_status_operation;
+
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
@@ -1435,7 +1444,6 @@ extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
/*
* yfsclient.c
*/
-extern void yfs_fs_fetch_file_status(struct afs_operation *);
extern void yfs_fs_fetch_data(struct afs_operation *);
extern void yfs_fs_create_file(struct afs_operation *);
extern void yfs_fs_make_dir(struct afs_operation *);
@@ -1481,15 +1489,6 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
return &vnode->vfs_inode;
}
-static inline void afs_check_for_remote_deletion(struct afs_operation *op,
- struct afs_vnode *vnode)
-{
- if (op->error == -ENOENT) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- afs_break_callback(vnode, afs_cb_break_for_deleted);
- }
-}
-
/*
* Note that a dentry got changed. We need to set d_fsdata to the data version
* number derived from the result of the operation. It doesn't matter if
@@ -1504,6 +1503,18 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
(void *)(unsigned long)dir_vp->scb.status.data_version;
}
+/*
+ * Check for a conflicting operation on a directory that we just unlinked from.
+ * If someone managed to sneak a link or an unlink in on the file we just
+ * unlinked, we won't be able to trust nlink on an AFS file (but not YFS).
+ */
+static inline void afs_check_dir_conflict(struct afs_operation *op,
+ struct afs_vnode_param *dvp)
+{
+ if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version)
+ op->flags |= AFS_OPERATION_DIR_CONFLICT;
+}
+
static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
{
trace_afs_io_error(call->debug_id, -EIO, where);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 9c79c91e8005..31b472f7c734 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -100,6 +100,7 @@ static int __net_init afs_net_init(struct net *net_ns)
timer_setup(&net->fs_timer, afs_servers_timer, 0);
INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher);
timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0);
+ atomic_set(&net->servers_outstanding, 1);
ret = -ENOMEM;
sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
@@ -130,6 +131,7 @@ static int __net_init afs_net_init(struct net *net_ns)
error_open_socket:
net->live = false;
+ afs_fs_probe_cleanup(net);
afs_cell_purge(net);
afs_purge_servers(net);
error_cell_init:
@@ -150,6 +152,7 @@ static void __net_exit afs_net_exit(struct net *net_ns)
struct afs_net *net = afs_net(net_ns);
net->live = false;
+ afs_fs_probe_cleanup(net);
afs_cell_purge(net);
afs_purge_servers(net);
afs_close_socket(net);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 52b19e9c1535..5334f1bd2bca 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -83,6 +83,7 @@ int afs_abort_to_error(u32 abort_code)
case UAENOLCK: return -ENOLCK;
case UAENOTEMPTY: return -ENOTEMPTY;
case UAELOOP: return -ELOOP;
+ case UAEOVERFLOW: return -EOVERFLOW;
case UAENOMEDIUM: return -ENOMEDIUM;
case UAEDQUOT: return -EDQUOT;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 039e3488511c..e82e452e2612 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -605,11 +605,12 @@ void afs_purge_servers(struct afs_net *net)
_enter("");
if (del_timer_sync(&net->fs_timer))
- atomic_dec(&net->servers_outstanding);
+ afs_dec_servers_outstanding(net);
afs_queue_server_manager(net);
_debug("wait");
+ atomic_dec(&net->servers_outstanding);
wait_var_event(&net->servers_outstanding,
!atomic_read(&net->servers_outstanding));
_leave("");
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 768497f82aee..7437806332d9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -194,11 +194,11 @@ int afs_write_end(struct file *file, struct address_space *mapping,
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size) {
- spin_lock(&vnode->wb_lock);
+ write_seqlock(&vnode->cb_lock);
i_size = i_size_read(&vnode->vfs_inode);
if (maybe_i_size > i_size)
i_size_write(&vnode->vfs_inode, maybe_i_size);
- spin_unlock(&vnode->wb_lock);
+ write_sequnlock(&vnode->cb_lock);
}
if (!PageUptodate(page)) {
@@ -393,6 +393,7 @@ static void afs_store_data_success(struct afs_operation *op)
{
struct afs_vnode *vnode = op->file[0].vnode;
+ op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (op->error == 0) {
afs_pages_written_back(vnode, op->store.first, op->store.last);
@@ -491,6 +492,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
unsigned long count, priv;
unsigned n, offset, to, f, t;
pgoff_t start, first, last;
+ loff_t i_size, end;
int loop, ret;
_enter(",%lx", primary_page->index);
@@ -591,7 +593,12 @@ no_more:
first = primary_page->index;
last = first + count - 1;
+ end = (loff_t)last * PAGE_SIZE + to;
+ i_size = i_size_read(&vnode->vfs_inode);
+
_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
+ if (end > i_size)
+ to = i_size & ~PAGE_MASK;
ret = afs_store_data(mapping, first, last, offset, to);
switch (ret) {
@@ -844,6 +851,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
vmf->page->index, priv);
SetPagePrivate(vmf->page);
set_page_private(vmf->page, priv);
+ file_update_time(file);
sb_end_pagefault(inode->i_sb);
return VM_FAULT_LOCKED;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 52d5af5fcd44..8c24fdc899e3 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -330,29 +330,6 @@ static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
}
/*
- * Deliver a reply that's a status, callback and volsync.
- */
-static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call)
-{
- struct afs_operation *op = call->op;
- const __be32 *bp;
- int ret;
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb);
- xdr_decode_YFSCallBack(&bp, call, &op->file[0].scb);
- xdr_decode_YFSVolSync(&bp, &op->volsync);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
-/*
* Deliver reply data to operations that just return a file status and a volume
* sync record.
*/
@@ -375,48 +352,6 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
}
/*
- * YFS.FetchStatus operation type
- */
-static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
- .name = "YFS.FetchStatus(vnode)",
- .op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_status_cb_and_volsync,
- .destructor = afs_flat_call_destructor,
-};
-
-/*
- * Fetch the status information for a file.
- */
-void yfs_fs_fetch_file_status(struct afs_operation *op)
-{
- struct afs_vnode_param *vp = &op->file[0];
- struct afs_call *call;
- __be32 *bp;
-
- _enter(",%x,{%llx:%llu},,",
- key_serial(op->key), vp->fid.vid, vp->fid.vnode);
-
- call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus_vnode,
- sizeof(__be32) * 2 +
- sizeof(struct yfs_xdr_YFSFid),
- sizeof(struct yfs_xdr_YFSFetchStatus) +
- sizeof(struct yfs_xdr_YFSCallBack) +
- sizeof(struct yfs_xdr_YFSVolSync));
- if (!call)
- return afs_op_nomem(op);
-
- /* marshall the parameters */
- bp = call->request;
- bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
- bp = xdr_encode_u32(bp, 0); /* RPC flags */
- bp = xdr_encode_YFSFid(bp, &vp->fid);
- yfs_check_req(call, bp);
-
- trace_afs_make_fs_call(call, &vp->fid);
- afs_make_op_call(op, call, GFP_NOFS);
-}
-
-/*
* Deliver reply data to an YFS.FetchData64.
*/
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
@@ -1605,12 +1540,36 @@ void yfs_fs_release_lock(struct afs_operation *op)
}
/*
+ * Deliver a reply to YFS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFetchStatus(&bp, call, &vp->scb);
+ xdr_decode_YFSCallBack(&bp, call, &vp->scb);
+ xdr_decode_YFSVolSync(&bp, &op->volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
* YFS.FetchStatus operation type
*/
static const struct afs_call_type yfs_RXYFSFetchStatus = {
.name = "YFS.FetchStatus",
.op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_status_cb_and_volsync,
+ .deliver = yfs_deliver_fs_fetch_status,
.destructor = afs_flat_call_destructor,
};
@@ -1619,7 +1578,7 @@ static const struct afs_call_type yfs_RXYFSFetchStatus = {
*/
void yfs_fs_fetch_status(struct afs_operation *op)
{
- struct afs_vnode_param *vp = &op->file[0];
+ struct afs_vnode_param *vp = &op->file[op->fetch_status.which];
struct afs_call *call;
__be32 *bp;
diff --git a/fs/aio.c b/fs/aio.c
index 7ecddc2f38db..91e7cc4a9f17 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -67,7 +67,7 @@ struct aio_ring {
unsigned header_length; /* size of aio_ring */
- struct io_event io_events[0];
+ struct io_event io_events[];
}; /* 128 bytes + ring size */
/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 47860e589388..0ae656e022fd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ static void bdev_write_inode(struct block_device *bdev)
}
/* Kill _all_ buffers and pagecache , dirty or not.. */
-void kill_bdev(struct block_device *bdev)
+static void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
@@ -84,8 +84,7 @@ void kill_bdev(struct block_device *bdev)
invalidate_bh_lrus();
truncate_inode_pages(mapping, 0);
-}
-EXPORT_SYMBOL(kill_bdev);
+}
/* Invalidate clean unused buffers and pagecache. */
void invalidate_bdev(struct block_device *bdev)
@@ -1565,10 +1564,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
*/
if (!for_part) {
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
+ if (ret != 0)
return ret;
- }
}
restart:
@@ -1637,8 +1634,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, mode, 1);
- if (ret)
+ if (ret) {
+ bdput(whole);
goto out_clear;
+ }
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
@@ -1688,7 +1687,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_unblock_events(disk);
put_disk_and_module(disk);
out:
- bdput(bdev);
return ret;
}
@@ -1755,6 +1753,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
bdput(whole);
}
+ if (res)
+ bdput(bdev);
+
return res;
}
EXPORT_SYMBOL(blkdev_get);
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 4ccb3c9189d8..2e42f47a7f98 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \
indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \
mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
- super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o
+ super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
+ xattr_user.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c654205f648d..1d82336b1cd4 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
struct qstr qstr = {.name = str, .len = len };
const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
+ char strbuf[DNAME_INLINE_LEN];
if (!inode || !IS_CASEFOLDED(inode) ||
!EXT4_SB(inode->i_sb)->s_encoding) {
@@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
return memcmp(str, name->name, len);
}
+ /*
+ * If the dentry name is stored in-line, then it may be concurrently
+ * modified by a rename. If this happens, the VFS will eventually retry
+ * the lookup, so it doesn't matter what ->d_compare() returns.
+ * However, it's unsafe to call utf8_strncasecmp() with an unstable
+ * string. Therefore, we have to copy the name into a temporary buffer.
+ */
+ if (len <= DNAME_INLINE_LEN - 1) {
+ memcpy(strbuf, str, len);
+ strbuf[len] = 0;
+ qstr.name = strbuf;
+ /* prevent compiler from optimizing out the temporary buffer */
+ barrier();
+ }
+
return ext4_ci_compare(inode, name, &qstr, false);
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b08841f70b69..42f5060f3cdf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -426,13 +426,16 @@ struct flex_groups {
#define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
/* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */
+
+#define EXT4_DAX_FL 0x02000000 /* Inode is DAX */
+
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
@@ -440,14 +443,16 @@ struct flex_groups {
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
- EXT4_PROJINHERIT_FL)
+ EXT4_PROJINHERIT_FL | \
+ EXT4_DAX_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
- EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
+ EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\
+ EXT4_DAX_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\
@@ -459,6 +464,10 @@ struct flex_groups {
/* The only flags that should be swapped */
#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
+/* Flags which are mutually exclusive to DAX */
+#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\
+ EXT4_JOURNAL_DATA_FL)
+
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
@@ -499,6 +508,7 @@ enum {
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
/* 22 was formerly EXT4_INODE_EOFBLOCKS */
+ EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */
@@ -1135,9 +1145,9 @@ struct ext4_inode_info {
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#ifdef CONFIG_FS_DAX
-#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */
#else
-#define EXT4_MOUNT_DAX 0
+#define EXT4_MOUNT_DAX_ALWAYS 0
#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
@@ -1180,6 +1190,8 @@ struct ext4_inode_info {
blocks */
#define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated
file systems */
+#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */
+#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */
#define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly
specified journal checksum */
@@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
*/
#define EXT4_FLAGS_RESIZING 0
#define EXT4_FLAGS_SHUTDOWN 1
+#define EXT4_FLAGS_BDEV_IS_DAX 2
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
{
@@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
-extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7d088ff1e902..221f240eae60 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2844,7 +2844,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial.pclu = EXT4_B2C(sbi, pblk);
partial.state = nofree;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 54d324e80fe5..df25d38d6539 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1116,7 +1116,7 @@ got:
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, true);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 40ec5c7ef0d3..10dd470876b3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
-static bool ext4_should_use_dax(struct inode *inode)
+static bool ext4_should_enable_dax(struct inode *inode)
{
- if (!test_opt(inode->i_sb, DAX))
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ if (test_opt2(inode->i_sb, DAX_NEVER))
return false;
if (!S_ISREG(inode->i_mode))
return false;
@@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode)
return false;
if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
return false;
- return true;
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
+ return false;
+ if (test_opt(inode->i_sb, DAX_ALWAYS))
+ return true;
+
+ return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
}
-void ext4_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode, bool init)
{
unsigned int flags = EXT4_I(inode)->i_flags;
unsigned int new_fl = 0;
+ WARN_ON_ONCE(IS_DAX(inode) && init);
+
if (flags & EXT4_SYNC_FL)
new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
@@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (ext4_should_use_dax(inode))
+
+ /* Because of the way inode_set_flags() works we must preserve S_DAX
+ * here if already set. */
+ new_fl |= (inode->i_flags & S_DAX);
+ if (init && ext4_should_enable_dax(inode))
new_fl |= S_DAX;
+
if (flags & EXT4_ENCRYPT_FL)
new_fl |= S_ENCRYPTED;
if (flags & EXT4_CASEFOLD_FL)
@@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* not initialized on a new filesystem. */
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, true);
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2162db0c747d..999cf6add39c 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
return 0;
}
+static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ if (S_ISDIR(inode->i_mode))
+ return;
+
+ if (test_opt2(inode->i_sb, DAX_NEVER) ||
+ test_opt(inode->i_sb, DAX_ALWAYS))
+ return;
+
+ if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
+ d_mark_dontcache(inode);
+}
+
+static bool dax_compatible(struct inode *inode, unsigned int oldflags,
+ unsigned int flags)
+{
+ if (flags & EXT4_DAX_FL) {
+ if ((oldflags & EXT4_DAX_MUT_EXCL) ||
+ ext4_test_inode_state(inode,
+ EXT4_STATE_VERITY_IN_PROGRESS)) {
+ return false;
+ }
+ }
+
+ if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
+ return false;
+
+ return true;
+}
+
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
{
@@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
int err = -EPERM, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
- unsigned int jflag;
struct super_block *sb = inode->i_sb;
/* Is it quota file? Do not allow user to mess with it */
@@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode,
oldflags = ei->i_flags;
- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
if (err)
goto flags_out;
@@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode,
* The JOURNAL_DATA flag can only be changed by
* the relevant capability.
*/
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
if (!capable(CAP_SYS_RESOURCE))
goto flags_out;
}
+
+ if (!dax_compatible(inode, oldflags, flags)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+
if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
migrate = 1;
@@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
if (err)
goto flags_err;
+ ext4_dax_dontcache(inode, flags);
+
for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
if (!(mask & EXT4_FL_USER_MODIFIABLE))
continue;
@@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_clear_inode_flag(inode, i);
}
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
+
inode->i_ctime = current_time(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -390,17 +427,18 @@ flags_err:
if (err)
goto flags_out;
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
/*
* Changes to the journaling mode can cause unsafe changes to
- * S_DAX if we are using the DAX mount option.
+ * S_DAX if the inode is DAX
*/
- if (test_opt(inode->i_sb, DAX)) {
+ if (IS_DAX(inode)) {
err = -EBUSY;
goto flags_out;
}
- err = ext4_change_inode_journal_flag(inode, jflag);
+ err = ext4_change_inode_journal_flag(inode,
+ flags & EXT4_JOURNAL_DATA_FL);
if (err)
goto flags_out;
}
@@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
xflags |= FS_XFLAG_NOATIME;
if (iflags & EXT4_PROJINHERIT_FL)
xflags |= FS_XFLAG_PROJINHERIT;
+ if (iflags & EXT4_DAX_FL)
+ xflags |= FS_XFLAG_DAX;
return xflags;
}
#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
+ FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \
+ FS_XFLAG_DAX)
/* Transfer xflags flags to internal */
static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
@@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
iflags |= EXT4_NOATIME_FL;
if (xflags & FS_XFLAG_PROJINHERIT)
iflags |= EXT4_PROJINHERIT_FL;
+ if (xflags & FS_XFLAG_DAX)
+ iflags |= EXT4_DAX_FL;
return iflags;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a9083113a8c0..c0a331e2feb0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
- seq = *this_cpu_ptr(&discard_pa_seq);
+ seq = this_cpu_read(discard_pa_seq);
if (!ext4_mb_use_preallocated(ac)) {
ac->ac_op = EXT4_MB_HISTORY_ALLOC;
ext4_mb_normalize_request(ac, ar);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c668f6b42374..330957ed1f05 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb)
smp_wmb();
sb->s_flags |= SB_RDONLY;
} else if (test_opt(sb, ERRORS_PANIC)) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
@@ -725,23 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function,
va_end(args);
if (sb_rdonly(sb) == 0) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
* before ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
panic("EXT4-fs panic from previous error\n");
- }
}
void __ext4_msg(struct super_block *sb,
@@ -1324,6 +1318,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
return -EINVAL;
+ if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
+ return -EOPNOTSUPP;
+
res = ext4_convert_inline_data(inode);
if (res)
return res;
@@ -1349,7 +1346,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
}
return res;
}
@@ -1376,7 +1373,7 @@ retry:
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
@@ -1514,7 +1511,8 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1581,6 +1579,9 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_dax, "dax"},
+ {Opt_dax_always, "dax=always"},
+ {Opt_dax_inode, "dax=inode"},
+ {Opt_dax_never, "dax=never"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_warn_on_error, "warn_on_error"},
@@ -1729,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
+#define MOPT_SKIP 0x0800
static const struct mount_opts {
int token;
@@ -1778,7 +1780,13 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
+ {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -2123,13 +2131,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
- } else if (token == Opt_dax) {
+ } else if (token == Opt_dax || token == Opt_dax_always ||
+ token == Opt_dax_inode || token == Opt_dax_never) {
#ifdef CONFIG_FS_DAX
- ext4_msg(sb, KERN_WARNING,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
+ switch (token) {
+ case Opt_dax:
+ case Opt_dax_always:
+ if (is_remount &&
+ (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
+ fail_dax_change_remount:
+ ext4_msg(sb, KERN_ERR, "can't change "
+ "dax mount option while remounting");
+ return -1;
+ }
+ if (is_remount &&
+ (test_opt(sb, DATA_FLAGS) ==
+ EXT4_MOUNT_JOURNAL_DATA)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -1;
+ }
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ break;
+ case Opt_dax_never:
+ if (is_remount &&
+ (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
+ goto fail_dax_change_remount;
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ break;
+ case Opt_dax_inode:
+ if (is_remount &&
+ ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
+ goto fail_dax_change_remount;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ /* Strictly for printing options */
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
+ break;
+ }
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
#endif
} else if (token == Opt_data_err_abort) {
@@ -2293,7 +2344,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
+ (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */
@@ -2353,6 +2404,17 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
fscrypt_show_test_dummy_encryption(seq, sep, sb);
+ if (test_opt(sb, DAX_ALWAYS)) {
+ if (IS_EXT2_SB(sb))
+ SEQ_OPTS_PUTS("dax");
+ else
+ SEQ_OPTS_PUTS("dax=always");
+ } else if (test_opt2(sb, DAX_NEVER)) {
+ SEQ_OPTS_PUTS("dax=never");
+ } else if (test_opt2(sb, DAX_INODE)) {
+ SEQ_OPTS_PUTS("dax=inode");
+ }
+
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -2383,6 +2445,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
err = -EROFS;
+ goto done;
}
if (read_only)
goto done;
@@ -4017,7 +4080,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and delalloc");
goto failed_mount;
}
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
goto failed_mount;
@@ -4127,13 +4190,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (bdev_dax_supported(sb->s_bdev, blocksize))
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
- if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
goto failed_mount;
@@ -5447,12 +5513,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- err = -EINVAL;
- goto restore_opts;
- }
} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -5468,12 +5528,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
- ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
- }
-
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index dec1244dd062..bbd5e7e0632b 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp)
handle_t *handle;
int err;
+ if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
+ return -EINVAL;
+
if (ext4_verity_in_progress(inode))
return -EBUSY;
@@ -241,7 +244,7 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
if (err)
goto out_stop;
ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
out_stop:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 9b29a40738ac..7d2f6576d954 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -93,6 +93,7 @@ static const struct xattr_handler * const ext4_xattr_handler_map[] = {
#ifdef CONFIG_EXT4_FS_SECURITY
[EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
#endif
+ [EXT4_XATTR_INDEX_HURD] = &ext4_xattr_hurd_handler,
};
const struct xattr_handler *ext4_xattr_handlers[] = {
@@ -105,6 +106,7 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
#ifdef CONFIG_EXT4_FS_SECURITY
&ext4_xattr_security_handler,
#endif
+ &ext4_xattr_hurd_handler,
NULL
};
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ffe21ac77f78..730b91fa0dd7 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -124,6 +124,7 @@ struct ext4_xattr_inode_array {
extern const struct xattr_handler ext4_xattr_user_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
+extern const struct xattr_handler ext4_xattr_hurd_handler;
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c
new file mode 100644
index 000000000000..8cfa74a56361
--- /dev/null
+++ b/fs/ext4/xattr_hurd.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/fs/ext4/xattr_hurd.c
+ * Handler for extended gnu attributes for the Hurd.
+ *
+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ * Copyright (C) 2020 by Jan (janneke) Nieuwenhuizen, <janneke@gnu.org>
+ */
+
+#include <linux/init.h>
+#include <linux/string.h>
+#include "ext4.h"
+#include "xattr.h"
+
+static bool
+ext4_xattr_hurd_list(struct dentry *dentry)
+{
+ return test_opt(dentry->d_sb, XATTR_USER);
+}
+
+static int
+ext4_xattr_hurd_get(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, void *buffer, size_t size)
+{
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+
+ return ext4_xattr_get(inode, EXT4_XATTR_INDEX_HURD,
+ name, buffer, size);
+}
+
+static int
+ext4_xattr_hurd_set(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ if (!test_opt(inode->i_sb, XATTR_USER))
+ return -EOPNOTSUPP;
+
+ return ext4_xattr_set(inode, EXT4_XATTR_INDEX_HURD,
+ name, value, size, flags);
+}
+
+const struct xattr_handler ext4_xattr_hurd_handler = {
+ .prefix = XATTR_HURD_PREFIX,
+ .list = ext4_xattr_hurd_list,
+ .get = ext4_xattr_hurd_get,
+ .set = ext4_xattr_hurd_set,
+};
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 0b65a912b036..47c5f3aeb460 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -903,13 +903,15 @@ void io_wq_cancel_all(struct io_wq *wq)
struct io_cb_cancel_data {
work_cancel_fn *fn;
void *data;
+ int nr_running;
+ int nr_pending;
+ bool cancel_all;
};
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
unsigned long flags;
- bool ret = false;
/*
* Hold the lock to avoid ->cur_work going out of scope, caller
@@ -920,74 +922,90 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
match->fn(worker->cur_work, match->data)) {
send_sig(SIGINT, worker->task, 1);
- ret = true;
+ match->nr_running++;
}
spin_unlock_irqrestore(&worker->lock, flags);
- return ret;
+ return match->nr_running && !match->cancel_all;
}
-static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
- struct io_cb_cancel_data *match)
+static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+ struct io_cb_cancel_data *match)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
unsigned long flags;
- bool found = false;
- /*
- * First check pending list, if we're lucky we can just remove it
- * from there. CANCEL_OK means that the work is returned as-new,
- * no completion will be posted for it.
- */
+retry:
spin_lock_irqsave(&wqe->lock, flags);
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
+ if (!match->fn(work, match->data))
+ continue;
- if (match->fn(work, match->data)) {
- wq_list_del(&wqe->work_list, node, prev);
- found = true;
- break;
- }
- }
- spin_unlock_irqrestore(&wqe->lock, flags);
-
- if (found) {
+ wq_list_del(&wqe->work_list, node, prev);
+ spin_unlock_irqrestore(&wqe->lock, flags);
io_run_cancel(work, wqe);
- return IO_WQ_CANCEL_OK;
+ match->nr_pending++;
+ if (!match->cancel_all)
+ return;
+
+ /* not safe to continue after unlock */
+ goto retry;
}
+ spin_unlock_irqrestore(&wqe->lock, flags);
+}
- /*
- * Now check if a free (going busy) or busy worker has the work
- * currently running. If we find it there, we'll return CANCEL_RUNNING
- * as an indication that we attempt to signal cancellation. The
- * completion will run normally in this case.
- */
+static void io_wqe_cancel_running_work(struct io_wqe *wqe,
+ struct io_cb_cancel_data *match)
+{
rcu_read_lock();
- found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
+ io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
rcu_read_unlock();
- return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
}
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
- void *data)
+ void *data, bool cancel_all)
{
struct io_cb_cancel_data match = {
- .fn = cancel,
- .data = data,
+ .fn = cancel,
+ .data = data,
+ .cancel_all = cancel_all,
};
- enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int node;
+ /*
+ * First check pending list, if we're lucky we can just remove it
+ * from there. CANCEL_OK means that the work is returned as-new,
+ * no completion will be posted for it.
+ */
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
- ret = io_wqe_cancel_work(wqe, &match);
- if (ret != IO_WQ_CANCEL_NOTFOUND)
- break;
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all)
+ return IO_WQ_CANCEL_OK;
}
- return ret;
+ /*
+ * Now check if a free (going busy) or busy worker has the work
+ * currently running. If we find it there, we'll return CANCEL_RUNNING
+ * as an indication that we attempt to signal cancellation. The
+ * completion will run normally in this case.
+ */
+ for_each_node(node) {
+ struct io_wqe *wqe = wq->wqes[node];
+
+ io_wqe_cancel_running_work(wqe, &match);
+ if (match.nr_running && !match.cancel_all)
+ return IO_WQ_CANCEL_RUNNING;
+ }
+
+ if (match.nr_running)
+ return IO_WQ_CANCEL_RUNNING;
+ if (match.nr_pending)
+ return IO_WQ_CANCEL_OK;
+ return IO_WQ_CANCEL_NOTFOUND;
}
static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
@@ -997,21 +1015,7 @@ static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{
- return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork);
-}
-
-static bool io_wq_pid_match(struct io_wq_work *work, void *data)
-{
- pid_t pid = (pid_t) (unsigned long) data;
-
- return work->task_pid == pid;
-}
-
-enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
-{
- void *data = (void *) (unsigned long) pid;
-
- return io_wq_cancel_cb(wq, io_wq_pid_match, data);
+ return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false);
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 8e138fa88b9f..071f1a997800 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -90,7 +90,6 @@ struct io_wq_work {
const struct cred *creds;
struct fs_struct *fs;
unsigned flags;
- pid_t task_pid;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
@@ -125,12 +124,11 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work)
void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
-enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
- void *data);
+ void *data, bool cancel_all);
struct task_struct *io_wq_get_task(struct io_wq *wq);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 155f3d830ddb..a78201b96179 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -541,6 +541,7 @@ enum {
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_QUEUE_TIMEOUT_BIT,
REQ_F_WORK_INITIALIZED_BIT,
+ REQ_F_TASK_PINNED_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -598,6 +599,8 @@ enum {
REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
+ /* req->task is refcounted */
+ REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT),
};
struct async_poll {
@@ -910,6 +913,21 @@ struct sock *io_uring_get_socket(struct file *file)
}
EXPORT_SYMBOL(io_uring_get_socket);
+static void io_get_req_task(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_TASK_PINNED)
+ return;
+ get_task_struct(req->task);
+ req->flags |= REQ_F_TASK_PINNED;
+}
+
+/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
+static void __io_put_req_task(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_TASK_PINNED)
+ put_task_struct(req->task);
+}
+
static void io_file_put_work(struct work_struct *work);
/*
@@ -1045,8 +1063,6 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
}
spin_unlock(&current->fs->lock);
}
- if (!req->work.task_pid)
- req->work.task_pid = task_pid_vnr(current);
}
static inline void io_req_work_drop_env(struct io_kiocb *req)
@@ -1087,6 +1103,7 @@ static inline void io_prep_async_work(struct io_kiocb *req,
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
+ io_req_init_async(req);
io_req_work_grab_env(req, def);
*link = io_prep_linked_timeout(req);
@@ -1398,9 +1415,7 @@ static void __io_req_aux_free(struct io_kiocb *req)
kfree(req->io);
if (req->file)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
- if (req->task)
- put_task_struct(req->task);
-
+ __io_put_req_task(req);
io_req_work_drop_env(req);
}
@@ -1727,6 +1742,18 @@ static int io_put_kbuf(struct io_kiocb *req)
return cflags;
}
+static void io_iopoll_queue(struct list_head *again)
+{
+ struct io_kiocb *req;
+
+ do {
+ req = list_first_entry(again, struct io_kiocb, list);
+ list_del(&req->list);
+ refcount_inc(&req->refs);
+ io_queue_async_work(req);
+ } while (!list_empty(again));
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -1735,12 +1762,21 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
{
struct req_batch rb;
struct io_kiocb *req;
+ LIST_HEAD(again);
+
+ /* order with ->result store in io_complete_rw_iopoll() */
+ smp_rmb();
rb.to_free = rb.need_iter = 0;
while (!list_empty(done)) {
int cflags = 0;
req = list_first_entry(done, struct io_kiocb, list);
+ if (READ_ONCE(req->result) == -EAGAIN) {
+ req->iopoll_completed = 0;
+ list_move_tail(&req->list, &again);
+ continue;
+ }
list_del(&req->list);
if (req->flags & REQ_F_BUFFER_SELECTED)
@@ -1758,18 +1794,9 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (ctx->flags & IORING_SETUP_SQPOLL)
io_cqring_ev_posted(ctx);
io_free_req_many(ctx, &rb);
-}
-static void io_iopoll_queue(struct list_head *again)
-{
- struct io_kiocb *req;
-
- do {
- req = list_first_entry(again, struct io_kiocb, list);
- list_del(&req->list);
- refcount_inc(&req->refs);
- io_queue_async_work(req);
- } while (!list_empty(again));
+ if (!list_empty(&again))
+ io_iopoll_queue(&again);
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
@@ -1777,7 +1804,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
- LIST_HEAD(again);
bool spin;
int ret;
@@ -1803,13 +1829,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (!list_empty(&done))
break;
- if (req->result == -EAGAIN) {
- list_move_tail(&req->list, &again);
- continue;
- }
- if (!list_empty(&again))
- break;
-
ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
if (ret < 0)
break;
@@ -1822,9 +1841,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (!list_empty(&done))
io_iopoll_complete(ctx, nr_events, &done);
- if (!list_empty(&again))
- io_iopoll_queue(&again);
-
return ret;
}
@@ -1973,11 +1989,15 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if (res != req->result)
+ if (res != -EAGAIN && res != req->result)
req_set_fail_links(req);
- req->result = res;
- if (res != -EAGAIN)
+
+ WRITE_ONCE(req->result, res);
+ /* order with io_poll_complete() checking ->result */
+ if (res != -EAGAIN) {
+ smp_wmb();
WRITE_ONCE(req->iopoll_completed, 1);
+ }
}
/*
@@ -2650,8 +2670,8 @@ copy_iov:
}
}
out_free:
- kfree(iovec);
- req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (!(req->flags & REQ_F_NEED_CLEANUP))
+ kfree(iovec);
return ret;
}
@@ -2773,8 +2793,8 @@ copy_iov:
}
}
out_free:
- req->flags &= ~REQ_F_NEED_CLEANUP;
- kfree(iovec);
+ if (!(req->flags & REQ_F_NEED_CLEANUP))
+ kfree(iovec);
return ret;
}
@@ -4236,6 +4256,28 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
__io_queue_proc(&pt->req->apoll->poll, pt, head);
}
+static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
+{
+ struct mm_struct *mm = current->mm;
+
+ if (mm) {
+ kthread_unuse_mm(mm);
+ mmput(mm);
+ }
+}
+
+static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
+{
+ if (io_op_defs[req->opcode].needs_mm && !current->mm) {
+ if (unlikely(!mmget_not_zero(ctx->sqo_mm)))
+ return -EFAULT;
+ kthread_use_mm(ctx->sqo_mm);
+ }
+
+ return 0;
+}
+
static void io_async_task_func(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
@@ -4270,11 +4312,16 @@ static void io_async_task_func(struct callback_head *cb)
if (!canceled) {
__set_current_state(TASK_RUNNING);
+ if (io_sq_thread_acquire_mm(ctx, req)) {
+ io_cqring_add_event(req, -EFAULT);
+ goto end_req;
+ }
mutex_lock(&ctx->uring_lock);
__io_queue_sqe(req, NULL);
mutex_unlock(&ctx->uring_lock);
} else {
io_cqring_ev_posted(ctx);
+end_req:
req_set_fail_links(req);
io_double_put_req(req);
}
@@ -4366,8 +4413,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
memcpy(&apoll->work, &req->work, sizeof(req->work));
had_io = req->io != NULL;
- get_task_struct(current);
- req->task = current;
+ io_get_req_task(req);
req->apoll = apoll;
INIT_HLIST_NODE(&req->hash_node);
@@ -4555,8 +4601,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
- get_task_struct(current);
- req->task = current;
+ io_get_req_task(req);
return 0;
}
@@ -4772,7 +4817,7 @@ static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
enum io_wq_cancel cancel_ret;
int ret = 0;
- cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr);
+ cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
switch (cancel_ret) {
case IO_WQ_CANCEL_OK:
ret = 0;
@@ -5817,17 +5862,14 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->flags = 0;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
- req->task = NULL;
+ req->task = current;
req->result = 0;
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
- if (io_op_defs[req->opcode].needs_mm && !current->mm) {
- if (unlikely(!mmget_not_zero(ctx->sqo_mm)))
- return -EFAULT;
- kthread_use_mm(ctx->sqo_mm);
- }
+ if (unlikely(io_sq_thread_acquire_mm(ctx, req)))
+ return -EFAULT;
sqe_flags = READ_ONCE(sqe->flags);
/* enforce forwards compatibility on users */
@@ -5936,16 +5978,6 @@ fail_req:
return submitted;
}
-static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
-{
- struct mm_struct *mm = current->mm;
-
- if (mm) {
- kthread_unuse_mm(mm);
- mmput(mm);
- }
-}
-
static int io_sq_thread(void *data)
{
struct io_ring_ctx *ctx = data;
@@ -7331,7 +7363,17 @@ static void io_ring_exit_work(struct work_struct *work)
if (ctx->rings)
io_cqring_overflow_flush(ctx, true);
- wait_for_completion(&ctx->ref_comp);
+ /*
+ * If we're doing polled IO and end up having requests being
+ * submitted async (out-of-line), then completions can come in while
+ * we're waiting for refs to drop. We need to reap these manually,
+ * as nobody else will be looking for them.
+ */
+ while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
+ io_iopoll_reap_events(ctx);
+ if (ctx->rings)
+ io_cqring_overflow_flush(ctx, true);
+ }
io_ring_ctx_free(ctx);
}
@@ -7365,9 +7407,22 @@ static int io_uring_release(struct inode *inode, struct file *file)
return 0;
}
+static bool io_wq_files_match(struct io_wq_work *work, void *data)
+{
+ struct files_struct *files = data;
+
+ return work->files == files;
+}
+
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
+ if (list_empty_careful(&ctx->inflight_list))
+ return;
+
+ /* cancel all at once, should be faster than doing it one by one*/
+ io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true);
+
while (!list_empty_careful(&ctx->inflight_list)) {
struct io_kiocb *cancel_req = NULL, *req;
DEFINE_WAIT(wait);
@@ -7423,6 +7478,14 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
}
}
+static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct task_struct *task = data;
+
+ return req->task == task;
+}
+
static int io_uring_flush(struct file *file, void *data)
{
struct io_ring_ctx *ctx = file->private_data;
@@ -7433,7 +7496,7 @@ static int io_uring_flush(struct file *file, void *data)
* If the task is going away, cancel work it may have pending
*/
if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
- io_wq_cancel_pid(ctx->io_wq, task_pid_vnr(current));
+ io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
return 0;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a49d0e670ddf..e4944436e733 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
init_waitqueue_head(&journal->j_wait_commit);
init_waitqueue_head(&journal->j_wait_updates);
init_waitqueue_head(&journal->j_wait_reserved);
+ mutex_init(&journal->j_abort_mutex);
mutex_init(&journal->j_barrier);
mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock);
@@ -1402,7 +1403,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
printk(KERN_ERR "JBD2: Error %d detected when updating "
"journal superblock for %s.\n", ret,
journal->j_devname);
- jbd2_journal_abort(journal, ret);
+ if (!is_journal_aborted(journal))
+ jbd2_journal_abort(journal, ret);
}
return ret;
@@ -2154,6 +2156,13 @@ void jbd2_journal_abort(journal_t *journal, int errno)
transaction_t *transaction;
/*
+ * Lock the aborting procedure until everything is done, this avoid
+ * races between filesystem's error handling flow (e.g. ext4_abort()),
+ * ensure panic after the error info is written into journal's
+ * superblock.
+ */
+ mutex_lock(&journal->j_abort_mutex);
+ /*
* ESHUTDOWN always takes precedence because a file system check
* caused by any other journal abort error is not required after
* a shutdown triggered.
@@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
journal->j_errno = errno;
jbd2_journal_update_sb_errno(journal);
}
+ mutex_unlock(&journal->j_abort_mutex);
return;
}
@@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
* layer could realise that a filesystem check is needed.
*/
jbd2_journal_update_sb_errno(journal);
-
- write_lock(&journal->j_state_lock);
- journal->j_flags |= JBD2_REC_ERR;
- write_unlock(&journal->j_state_lock);
+ mutex_unlock(&journal->j_abort_mutex);
}
/**
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 0637271f3770..8ff4d1a1e774 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -259,7 +259,7 @@ struct jffs2_full_dirent
uint32_t ino; /* == zero for unlink */
unsigned int nhash;
unsigned char type;
- unsigned char name[0];
+ unsigned char name[];
};
/*
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 60207a2ae952..e4131cb1f1d4 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -61,7 +61,7 @@ struct jffs2_sum_dirent_flash
jint32_t ino; /* == zero for unlink */
uint8_t nsize; /* dirent name size */
uint8_t type; /* dirent type */
- uint8_t name[0]; /* dirent name */
+ uint8_t name[]; /* dirent name */
} __attribute__((packed));
struct jffs2_sum_xattr_flash
@@ -117,7 +117,7 @@ struct jffs2_sum_dirent_mem
jint32_t ino; /* == zero for unlink */
uint8_t nsize; /* dirent name size */
uint8_t type; /* dirent type */
- uint8_t name[0]; /* dirent name */
+ uint8_t name[]; /* dirent name */
} __attribute__((packed));
struct jffs2_sum_xattr_mem
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
index 9955d75c0585..ad31ec4ad627 100644
--- a/fs/proc/bootconfig.c
+++ b/fs/proc/bootconfig.c
@@ -26,8 +26,9 @@ static int boot_config_proc_show(struct seq_file *m, void *v)
static int __init copy_xbc_key_value_list(char *dst, size_t size)
{
struct xbc_node *leaf, *vnode;
- const char *val;
char *key, *end = dst + size;
+ const char *val;
+ char q;
int ret = 0;
key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL);
@@ -41,16 +42,20 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
break;
dst += ret;
vnode = xbc_node_get_child(leaf);
- if (vnode && xbc_node_is_array(vnode)) {
+ if (vnode) {
xbc_array_for_each_value(vnode, val) {
- ret = snprintf(dst, rest(dst, end), "\"%s\"%s",
- val, vnode->next ? ", " : "\n");
+ if (strchr(val, '"'))
+ q = '\'';
+ else
+ q = '"';
+ ret = snprintf(dst, rest(dst, end), "%c%s%c%s",
+ q, val, q, vnode->next ? ", " : "\n");
if (ret < 0)
goto out;
dst += ret;
}
} else {
- ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val);
+ ret = snprintf(dst, rest(dst, end), "\"\"\n");
if (ret < 0)
break;
dst += ret;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8ba492d44e68..e502414b3556 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
* Using bounce buffer to bypass the
* hardened user copy kernel text checks.
*/
- if (probe_kernel_read(buf, (void *) start, tsz)) {
+ if (copy_from_kernel_nofault(buf, (void *)start,
+ tsz)) {
if (clear_user(buffer, tsz)) {
ret = -EFAULT;
goto out;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 7187bd1a30ea..8d64edb80ebf 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -262,7 +262,7 @@ struct squashfs_dir_index {
__le32 index;
__le32 start_block;
__le32 size;
- unsigned char name[0];
+ unsigned char name[];
};
struct squashfs_base_inode {
@@ -327,7 +327,7 @@ struct squashfs_symlink_inode {
__le32 inode_number;
__le32 nlink;
__le32 symlink_size;
- char symlink[0];
+ char symlink[];
};
struct squashfs_reg_inode {
@@ -341,7 +341,7 @@ struct squashfs_reg_inode {
__le32 fragment;
__le32 offset;
__le32 file_size;
- __le16 block_list[0];
+ __le16 block_list[];
};
struct squashfs_lreg_inode {
@@ -358,7 +358,7 @@ struct squashfs_lreg_inode {
__le32 fragment;
__le32 offset;
__le32 xattr;
- __le16 block_list[0];
+ __le16 block_list[];
};
struct squashfs_dir_inode {
@@ -389,7 +389,7 @@ struct squashfs_ldir_inode {
__le16 i_count;
__le16 offset;
__le32 xattr;
- struct squashfs_dir_index index[0];
+ struct squashfs_dir_index index[];
};
union squashfs_inode {
@@ -410,7 +410,7 @@ struct squashfs_dir_entry {
__le16 inode_number;
__le16 type;
__le16 size;
- char name[0];
+ char name[];
};
struct squashfs_dir_header {
@@ -428,12 +428,12 @@ struct squashfs_fragment_entry {
struct squashfs_xattr_entry {
__le16 type;
__le16 size;
- char data[0];
+ char data[];
};
struct squashfs_xattr_val {
__le32 vsize;
- char value[0];
+ char value[];
};
struct squashfs_xattr_id {