summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c87
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_dir.c17
-rw-r--r--fs/9p/vfs_file.c326
-rw-r--r--fs/9p/vfs_inode.c34
-rw-r--r--fs/9p/vfs_inode_dotl.c16
-rw-r--r--fs/9p/vfs_super.c8
-rw-r--r--fs/9p/xattr.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/dir_fplus.c1
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/adfs/super.c20
-rw-r--r--fs/affs/affs.h28
-rw-r--r--fs/affs/amigaffs.c10
-rw-r--r--fs/affs/file.c15
-rw-r--r--fs/affs/inode.c34
-rw-r--r--fs/affs/namei.c16
-rw-r--r--fs/affs/super.c43
-rw-r--r--fs/afs/dir.c42
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/misc.c16
-rw-r--r--fs/afs/mntpt.c8
-rw-r--r--fs/afs/rxrpc.c5
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c278
-rw-r--r--fs/autofs4/autofs_i.h6
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/autofs4/inode.c6
-rw-r--r--fs/autofs4/root.c18
-rw-r--r--fs/autofs4/symlink.c2
-rw-r--r--fs/autofs4/waitq.c6
-rw-r--r--fs/befs/befs.h22
-rw-r--r--fs/befs/datastream.c4
-rw-r--r--fs/befs/io.c2
-rw-r--r--fs/befs/linuxvfs.c16
-rw-r--r--fs/befs/super.c4
-rw-r--r--fs/bfs/dir.c12
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/binfmt_misc.c46
-rw-r--r--fs/block_dev.c27
-rw-r--r--fs/btrfs/async-thread.c4
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h46
-rw-r--r--fs/btrfs/delayed-inode.c11
-rw-r--r--fs/btrfs/delayed-ref.c22
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c570
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/export.c6
-rw-r--r--fs/btrfs/extent-tree.c532
-rw-r--r--fs/btrfs/extent_io.c59
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c97
-rw-r--r--fs/btrfs/free-space-cache.c339
-rw-r--r--fs/btrfs/free-space-cache.h9
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c227
-rw-r--r--fs/btrfs/ioctl.c58
-rw-r--r--fs/btrfs/lzo.c2
-rw-r--r--fs/btrfs/math.h6
-rw-r--r--fs/btrfs/props.c2
-rw-r--r--fs/btrfs/qgroup.c348
-rw-r--r--fs/btrfs/qgroup.h3
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/relocation.c11
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c83
-rw-r--r--fs/btrfs/super.c31
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/sysfs.h22
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/transaction.c54
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c396
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c155
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c69
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/cachefiles/bind.c10
-rw-r--r--fs/cachefiles/interface.c6
-rw-r--r--fs/cachefiles/namei.c122
-rw-r--r--fs/cachefiles/rdwr.c14
-rw-r--r--fs/cachefiles/security.c6
-rw-r--r--fs/cachefiles/xattr.c22
-rw-r--r--fs/ceph/addr.c41
-rw-r--r--fs/ceph/caps.c53
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c108
-rw-r--r--fs/ceph/export.c28
-rw-r--r--fs/ceph/file.c31
-rw-r--r--fs/ceph/inode.c52
-rw-r--r--fs/ceph/mds_client.c85
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c60
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c39
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c14
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c19
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c142
-rw-r--r--fs/cifs/inode.c34
-rw-r--r--fs/cifs/link.c12
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/readdir.c4
-rw-r--r--fs/cifs/smb1ops.c2
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c13
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/cifs/xattr.c22
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c22
-rw-r--r--fs/coda/file.c38
-rw-r--r--fs/coda/inode.c6
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/upcall.c4
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/dir.c70
-rw-r--r--fs/configfs/file.c4
-rw-r--r--fs/configfs/inode.c16
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c48
-rw-r--r--fs/dcache.c49
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c61
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/direct-io.c51
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/dentry.c6
-rw-r--r--fs/ecryptfs/file.c11
-rw-r--r--fs/ecryptfs/inode.c164
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/ecryptfs/mmap.c2
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/efs/namei.c4
-rw-r--r--fs/exec.c88
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/file.c2
-rw-r--r--fs/exofs/inode.c6
-rw-r--r--fs/exofs/namei.c10
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/exofs/symlink.c2
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c20
-rw-r--r--fs/ext2/namei.c24
-rw-r--r--fs/ext2/symlink.c2
-rw-r--r--fs/ext2/xattr.c4
-rw-r--r--fs/ext2/xattr_security.c4
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c18
-rw-r--r--fs/ext3/namei.c34
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/ext3/symlink.c2
-rw-r--r--fs/ext3/xattr.c13
-rw-r--r--fs/ext3/xattr_security.c4
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/ext4/Kconfig22
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/crypto.c558
-rw-r--r--fs/ext4/crypto_fname.c719
-rw-r--r--fs/ext4/crypto_key.c166
-rw-r--r--fs/ext4/crypto_policy.c198
-rw-r--r--fs/ext4/dir.c81
-rw-r--r--fs/ext4/ext4.h184
-rw-r--r--fs/ext4/ext4_crypto.h156
-rw-r--r--fs/ext4/extents.c96
-rw-r--r--fs/ext4/extents_status.c10
-rw-r--r--fs/ext4/file.c77
-rw-r--r--fs/ext4/fsync.c3
-rw-r--r--fs/ext4/hash.c1
-rw-r--r--fs/ext4/ialloc.c30
-rw-r--r--fs/ext4/indirect.c33
-rw-r--r--fs/ext4/inline.c20
-rw-r--r--fs/ext4/inode.c174
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/namei.c621
-rw-r--r--fs/ext4/page-io.c48
-rw-r--r--fs/ext4/readpage.c328
-rw-r--r--fs/ext4/resize.c7
-rw-r--r--fs/ext4/super.c64
-rw-r--r--fs/ext4/symlink.c99
-rw-r--r--fs/ext4/xattr.c14
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c766
-rw-r--r--fs/f2fs/debug.c22
-rw-r--r--fs/f2fs/dir.c93
-rw-r--r--fs/f2fs/f2fs.h176
-rw-r--r--fs/f2fs/file.c70
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/inline.c69
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/namei.c99
-rw-r--r--fs/f2fs/node.c18
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c76
-rw-r--r--fs/f2fs/segment.c17
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c40
-rw-r--r--fs/f2fs/xattr.c14
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h5
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/fat/file.c10
-rw-r--r--fs/fat/inode.c23
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fat/namei_msdos.c10
-rw-r--r--fs/fat/namei_vfat.c16
-rw-r--r--fs/fat/nfs.c4
-rw-r--r--fs/file.c3
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/freevxfs/vxfs_immed.c2
-rw-r--r--fs/fs-writeback.c93
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/fuse/control.c6
-rw-r--r--fs/fuse/cuse.c27
-rw-r--r--fs/fuse/dev.c64
-rw-r--r--fs/fuse/dir.c60
-rw-r--r--fs/fuse/file.c151
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c24
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dentry.c12
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c108
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c52
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c90
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/xattr.c8
-rw-r--r--fs/hfs/attr.c6
-rw-r--r--fs/hfs/dir.c12
-rw-r--r--fs/hfs/inode.c14
-rw-r--r--fs/hfs/sysdep.c2
-rw-r--r--fs/hfsplus/bfind.c4
-rw-r--r--fs/hfsplus/catalog.c3
-rw-r--r--fs/hfsplus/dir.c16
-rw-r--r--fs/hfsplus/inode.c21
-rw-r--r--fs/hfsplus/ioctl.c14
-rw-r--r--fs/hfsplus/xattr.c90
-rw-r--r--fs/hfsplus/xattr.h22
-rw-r--r--fs/hfsplus/xattr_security.c38
-rw-r--r--fs/hfsplus/xattr_trusted.c37
-rw-r--r--fs/hfsplus/xattr_user.c35
-rw-r--r--fs/hostfs/hostfs.h6
-rw-r--r--fs/hostfs/hostfs_kern.c116
-rw-r--r--fs/hostfs/hostfs_user.c29
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hppfs/hppfs.c20
-rw-r--r--fs/hugetlbfs/inode.c187
-rw-r--r--fs/inode.c20
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jffs2/dir.c40
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/security.c4
-rw-r--r--fs/jffs2/super.c4
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/xattr.c5
-rw-r--r--fs/jffs2/xattr_trusted.c4
-rw-r--r--fs/jffs2/xattr_user.c4
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/jfs/inode.c10
-rw-r--r--fs/jfs/jfs_metapage.c31
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/jfs/namei.c18
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/jfs/symlink.c2
-rw-r--r--fs/jfs/xattr.c12
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/kernfs/inode.c8
-rw-r--r--fs/libfs.c26
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c99
-rw-r--r--fs/logfs/dir.c14
-rw-r--r--fs/logfs/file.c4
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/file.c4
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/minix/namei.c10
-rw-r--r--fs/namei.c182
-rw-r--r--fs/namespace.c142
-rw-r--r--fs/ncpfs/dir.c48
-rw-r--r--fs/ncpfs/file.c88
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/ncpfs/ioctl.c8
-rw-r--r--fs/ncpfs/ncplib_kernel.c8
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/ncpfs/symlink.c2
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/blocklayout/dev.c2
-rw-r--r--fs/nfs/callback.c6
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/delegation.c4
-rw-r--r--fs/nfs/dir.c63
-rw-r--r--fs/nfs/direct.c93
-rw-r--r--fs/nfs/file.c21
-rw-r--r--fs/nfs/filelayout/filelayout.c10
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c12
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c54
-rw-r--r--fs/nfs/namespace.c10
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c12
-rw-r--r--fs/nfs/nfs42proc.c31
-rw-r--r--fs/nfs/nfs42xdr.c20
-rw-r--r--fs/nfs/nfs4client.c4
-rw-r--r--fs/nfs/nfs4file.c26
-rw-r--r--fs/nfs/nfs4idmap.c (renamed from fs/nfs/idmap.c)2
-rw-r--r--fs/nfs/nfs4idmap.h68
-rw-r--r--fs/nfs/nfs4namespace.c4
-rw-r--r--fs/nfs/nfs4proc.c83
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4super.c7
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/nfs4trace.h4
-rw-r--r--fs/nfs/nfs4xdr.c22
-rw-r--r--fs/nfs/nfstrace.c3
-rw-r--r--fs/nfs/objlayout/objio_osd.c4
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c68
-rw-r--r--fs/nfs/pnfs.h28
-rw-r--r--fs/nfs/pnfs_dev.c21
-rw-r--r--fs/nfs/pnfs_nfs.c12
-rw-r--r--fs/nfs/proc.c4
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/nfs/symlink.c2
-rw-r--r--fs/nfs/unlink.c20
-rw-r--r--fs/nfs/write.c28
-rw-r--r--fs/nfsd/Kconfig3
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/blocklayoutxdr.c6
-rw-r--r--fs/nfsd/export.c8
-rw-r--r--fs/nfsd/nfs2acl.c8
-rw-r--r--fs/nfsd/nfs3acl.c8
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c16
-rw-r--r--fs/nfsd/nfs4acl.c54
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c26
-rw-r--r--fs/nfsd/nfs4recover.c22
-rw-r--r--fs/nfsd/nfs4state.c31
-rw-r--r--fs/nfsd/nfs4xdr.c46
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nfsd/nfsctl.c16
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c20
-rw-r--r--fs/nfsd/nfsfh.h6
-rw-r--r--fs/nfsd/nfsproc.c4
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/vfs.c62
-rw-r--r--fs/nfsd/xdr4.h5
-rw-r--r--fs/nilfs2/alloc.c5
-rw-r--r--fs/nilfs2/bmap.c48
-rw-r--r--fs/nilfs2/bmap.h13
-rw-r--r--fs/nilfs2/btree.c65
-rw-r--r--fs/nilfs2/cpfile.c58
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/inode.c39
-rw-r--r--fs/nilfs2/mdt.c54
-rw-r--r--fs/nilfs2/mdt.h10
-rw-r--r--fs/nilfs2/namei.c18
-rw-r--r--fs/nilfs2/page.c24
-rw-r--r--fs/nilfs2/segment.c17
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/nsfs.c4
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c778
-rw-r--r--fs/ntfs/inode.c3
-rw-r--r--fs/ntfs/namei.c4
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c178
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dcache.c14
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dir.h2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c13
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/file.c155
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c20
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c37
-rw-r--r--fs/ocfs2/xattr.c30
-rw-r--r--fs/omfs/dir.c10
-rw-r--r--fs/omfs/file.c4
-rw-r--r--fs/open.c17
-rw-r--r--fs/pipe.c5
-rw-r--r--fs/pnode.c60
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/posix_acl.c8
-rw-r--r--fs/proc/array.c26
-rw-r--r--fs/proc/base.c114
-rw-r--r--fs/proc/fd.c33
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/namespaces.c4
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/proc_sysctl.c12
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/self.c2
-rw-r--r--fs/proc/thread_self.c2
-rw-r--r--fs/pstore/inode.c13
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/quota/dquot.c161
-rw-r--r--fs/quota/quota.c217
-rw-r--r--fs/quota/quota_tree.c7
-rw-r--r--fs/quota/quota_v2.c12
-rw-r--r--fs/quota/quotaio_v2.h6
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/read_write.c213
-rw-r--r--fs/reiserfs/dir.c4
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c12
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/super.c6
-rw-r--r--fs/reiserfs/xattr.c126
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/reiserfs/xattr_security.c10
-rw-r--r--fs/reiserfs/xattr_trusted.c10
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/mmap-nommu.c1
-rw-r--r--fs/splice.c31
-rw-r--r--fs/squashfs/export.c2
-rw-r--r--fs/squashfs/xattr.c8
-rw-r--r--fs/stat.c6
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/sysv/dir.c4
-rw-r--r--fs/sysv/file.c4
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/sysv/namei.c10
-rw-r--r--fs/sysv/symlink.c2
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c12
-rw-r--r--fs/ubifs/compress.c22
-rw-r--r--fs/ubifs/debug.c186
-rw-r--r--fs/ubifs/dir.c37
-rw-r--r--fs/ubifs/file.c24
-rw-r--r--fs/ubifs/io.c40
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c21
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/lprops.c62
-rw-r--r--fs/ubifs/lpt.c59
-rw-r--r--fs/ubifs/lpt_commit.c34
-rw-r--r--fs/ubifs/master.c6
-rw-r--r--fs/ubifs/orphan.c26
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/replay.c34
-rw-r--r--fs/ubifs/sb.c30
-rw-r--r--fs/ubifs/scan.c24
-rw-r--r--fs/ubifs/super.c107
-rw-r--r--fs/ubifs/tnc.c20
-rw-r--r--fs/ubifs/tnc_commit.c12
-rw-r--r--fs/ubifs/tnc_misc.c24
-rw-r--r--fs/ubifs/ubifs.h40
-rw-r--r--fs/ubifs/xattr.c28
-rw-r--r--fs/udf/balloc.c20
-rw-r--r--fs/udf/dir.c1
-rw-r--r--fs/udf/directory.c1
-rw-r--r--fs/udf/file.c32
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/misc.c1
-rw-r--r--fs/udf/namei.c26
-rw-r--r--fs/udf/partition.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/udf/symlink.c1
-rw-r--r--fs/udf/truncate.c1
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/ufs/namei.c10
-rw-r--r--fs/ufs/super.c4
-rw-r--r--fs/ufs/symlink.c2
-rw-r--r--fs/ufs/truncate.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c104
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c150
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c554
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c24
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c39
-rw-r--r--fs/xfs/libxfs/xfs_format.h62
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c48
-rw-r--r--fs/xfs/libxfs/xfs_sb.c20
-rw-r--r--fs/xfs/xfs_aops.c283
-rw-r--r--fs/xfs/xfs_attr_inactive.c3
-rw-r--r--fs/xfs/xfs_attr_list.c9
-rw-r--r--fs/xfs/xfs_bmap_util.c164
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_file.c203
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_inode.c558
-rw-r--r--fs/xfs/xfs_inode.h49
-rw-r--r--fs/xfs/xfs_ioctl.c25
-rw-r--r--fs/xfs/xfs_ioctl32.c12
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_iops.c109
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c918
-rw-r--r--fs/xfs/xfs_mount.h95
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_pnfs.c7
-rw-r--r--fs/xfs/xfs_pnfs.h5
-rw-r--r--fs/xfs/xfs_qm.c13
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c176
-rw-r--r--fs/xfs/xfs_quotaops.c117
-rw-r--r--fs/xfs/xfs_super.c134
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_symlink.c58
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_trans.c234
-rw-r--r--fs/xfs/xfs_xattr.c6
599 files changed, 15386 insertions, 9113 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 8482f2d11606..31c010372660 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -247,7 +247,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
return v9fs_remote_get_acl(dentry, name, buffer, size, type);
- acl = v9fs_get_cached_acl(dentry->d_inode, type);
+ acl = v9fs_get_cached_acl(d_inode(dentry), type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -285,7 +285,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
int retval;
struct posix_acl *acl;
struct v9fs_session_info *v9ses;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
if (strcmp(name, "") != 0)
return -EINVAL;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 099c7712631c..fb9ffcb43277 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -78,7 +78,6 @@ enum p9_cache_modes {
* @cache: cache mode of type &p9_cache_modes
* @cachetag: the tag of the cache associated with this session
* @fscache: session cookie associated with FS-Cache
- * @options: copy of options string given by user
* @uname: string user name to mount hierarchy as
* @aname: mount specifier for remote hierarchy
* @maxdata: maximum data to be sent/recvd per protocol message
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b83ebfbf3fdc..5a0db6dec8d1 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -68,14 +68,10 @@ int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
-ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
-ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync);
-ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
- const char __user *, size_t, loff_t *, int);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
static inline void v9fs_invalidate_inode_attr(struct inode *inode)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..e9e04376c52c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
#include <linux/pagemap.h>
#include <linux/idr.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -51,12 +51,11 @@
*/
static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
{
- int retval;
- loff_t offset;
- char *buffer;
- struct inode *inode;
+ struct inode *inode = page->mapping->host;
+ struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
+ struct iov_iter to;
+ int retval, err;
- inode = page->mapping->host;
p9_debug(P9_DEBUG_VFS, "\n");
BUG_ON(!PageLocked(page));
@@ -65,16 +64,16 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
if (retval == 0)
return retval;
- buffer = kmap(page);
- offset = page_offset(page);
+ iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
- retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
- if (retval < 0) {
+ retval = p9_client_read(fid, page_offset(page), &to, &err);
+ if (err) {
v9fs_uncache_page(inode, page);
+ retval = err;
goto done;
}
- memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
+ zero_user(page, retval, PAGE_SIZE - retval);
flush_dcache_page(page);
SetPageUptodate(page);
@@ -82,7 +81,6 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
retval = 0;
done:
- kunmap(page);
unlock_page(page);
return retval;
}
@@ -161,41 +159,32 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
static int v9fs_vfs_writepage_locked(struct page *page)
{
- char *buffer;
- int retval, len;
- loff_t offset, size;
- mm_segment_t old_fs;
- struct v9fs_inode *v9inode;
struct inode *inode = page->mapping->host;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ loff_t size = i_size_read(inode);
+ struct iov_iter from;
+ struct bio_vec bvec;
+ int err, len;
- v9inode = V9FS_I(inode);
- size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
else
len = PAGE_CACHE_SIZE;
- set_page_writeback(page);
-
- buffer = kmap(page);
- offset = page_offset(page);
+ bvec.bv_page = page;
+ bvec.bv_offset = 0;
+ bvec.bv_len = len;
+ iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
- old_fs = get_fs();
- set_fs(get_ds());
/* We should have writeback_fid always set */
BUG_ON(!v9inode->writeback_fid);
- retval = v9fs_file_write_internal(inode,
- v9inode->writeback_fid,
- (__force const char __user *)buffer,
- len, &offset, 0);
- if (retval > 0)
- retval = 0;
+ set_page_writeback(page);
+
+ p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err);
- set_fs(old_fs);
- kunmap(page);
end_page_writeback(page);
- return retval;
+ return err;
}
static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -241,11 +230,8 @@ static int v9fs_launder_page(struct page *page)
/**
* v9fs_direct_IO - 9P address space operation for direct I/O
- * @rw: direction (read or write)
* @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
* @pos: offset in file to begin the operation
- * @nr_segs: size of iovec array
*
* The presence of v9fs_direct_IO() in the address space ops vector
* allowes open() O_DIRECT flags which would have failed otherwise.
@@ -259,18 +245,23 @@ static int v9fs_launder_page(struct page *page)
*
*/
static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
+v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
- /*
- * FIXME
- * Now that we do caching with cache mode enabled, We need
- * to support direct IO
- */
- p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n",
- iocb->ki_filp,
- (long long)pos, iter->nr_segs);
-
- return -EINVAL;
+ struct file *file = iocb->ki_filp;
+ ssize_t n;
+ int err = 0;
+ if (iov_iter_rw(iter) == WRITE) {
+ n = p9_client_write(file->private_data, pos, iter, &err);
+ if (n) {
+ struct inode *inode = file_inode(file);
+ loff_t i_size = i_size_read(inode);
+ if (pos + n > i_size)
+ inode_add_bytes(inode, pos + n - i_size);
+ }
+ } else {
+ n = p9_client_read(file->private_data, pos, iter, &err);
+ }
+ return n ? n : err;
}
static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index a345b2d659cc..bd456c668d39 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -53,7 +53,7 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
dentry, dentry);
/* Don't cache negative dentries */
- if (!dentry->d_inode)
+ if (d_really_is_negative(dentry))
return 1;
return 0;
}
@@ -83,7 +83,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
if (!inode)
goto out_valid;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4f1151088ebe..5cc00e56206e 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -33,6 +33,7 @@
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/slab.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -115,6 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
int buflen;
int reclen = 0;
struct p9_rdir *rdir;
+ struct kvec kvec;
p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
fid = file->private_data;
@@ -124,16 +126,23 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
rdir = v9fs_alloc_rdir_buf(file, buflen);
if (!rdir)
return -ENOMEM;
+ kvec.iov_base = rdir->buf;
+ kvec.iov_len = buflen;
while (1) {
if (rdir->tail == rdir->head) {
- err = v9fs_file_readn(file, rdir->buf, NULL,
- buflen, ctx->pos);
- if (err <= 0)
+ struct iov_iter to;
+ int n;
+ iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+ n = p9_client_read(file->private_data, ctx->pos, &to,
+ &err);
+ if (err)
return err;
+ if (n == 0)
+ return 0;
rdir->head = 0;
- rdir->tail = err;
+ rdir->tail = n;
}
while (rdir->head < rdir->tail) {
p9stat_init(&st);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b40133796b87..1ef16bd8280b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -36,6 +36,8 @@
#include <linux/utsname.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
+#include <linux/uio.h>
+#include <linux/slab.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -149,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct p9_flock flock;
struct p9_fid *fid;
- uint8_t status;
+ uint8_t status = P9_LOCK_ERROR;
int res = 0;
unsigned char fl_type;
@@ -194,7 +196,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
for (;;) {
res = p9_client_lock_dotl(fid, &flock, &status);
if (res < 0)
- break;
+ goto out_unlock;
if (status != P9_LOCK_BLOCKED)
break;
@@ -212,14 +214,16 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
case P9_LOCK_BLOCKED:
res = -EAGAIN;
break;
+ default:
+ WARN_ONCE(1, "unknown lock status code: %d\n", status);
+ /* fallthough */
case P9_LOCK_ERROR:
case P9_LOCK_GRACE:
res = -ENOLCK;
break;
- default:
- BUG();
}
+out_unlock:
/*
* incase server returned error for lock request, revert
* it locally
@@ -285,6 +289,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
fl->fl_end = glock.start + glock.length - 1;
fl->fl_pid = glock.proc_id;
}
+ kfree(glock.client_id);
return res;
}
@@ -364,63 +369,6 @@ out_err:
}
/**
- * v9fs_fid_readn - read from a fid
- * @fid: fid to read
- * @data: data buffer to read data into
- * @udata: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-ssize_t
-v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
- u64 offset)
-{
- int n, total, size;
-
- p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
- fid->fid, (long long unsigned)offset, count);
- n = 0;
- total = 0;
- size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
- do {
- n = p9_client_read(fid, data, udata, offset, count);
- if (n <= 0)
- break;
-
- if (data)
- data += n;
- if (udata)
- udata += n;
-
- offset += n;
- count -= n;
- total += n;
- } while (count > 0 && n == size);
-
- if (n < 0)
- total = n;
-
- return total;
-}
-
-/**
- * v9fs_file_readn - read from a file
- * @filp: file pointer to read
- * @data: data buffer to read data into
- * @udata: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-ssize_t
-v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
- u64 offset)
-{
- return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
-}
-
-/**
* v9fs_file_read - read from a file
* @filp: file pointer to read
* @udata: user data buffer to read data into
@@ -430,69 +378,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
*/
static ssize_t
-v9fs_file_read(struct file *filp, char __user *udata, size_t count,
- loff_t * offset)
+v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- int ret;
- struct p9_fid *fid;
- size_t size;
-
- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
- fid = filp->private_data;
+ struct p9_fid *fid = iocb->ki_filp->private_data;
+ int ret, err;
- size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
- if (count > size)
- ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
- else
- ret = p9_client_read(fid, NULL, udata, *offset, count);
+ p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
+ iov_iter_count(to), iocb->ki_pos);
- if (ret > 0)
- *offset += ret;
+ ret = p9_client_read(fid, iocb->ki_pos, to, &err);
+ if (!ret)
+ return err;
+ iocb->ki_pos += ret;
return ret;
}
-ssize_t
-v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
- const char __user *data, size_t count,
- loff_t *offset, int invalidate)
-{
- int n;
- loff_t i_size;
- size_t total = 0;
- loff_t origin = *offset;
- unsigned long pg_start, pg_end;
-
- p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
- data, (int)count, (int)*offset);
-
- do {
- n = p9_client_write(fid, NULL, data+total, origin+total, count);
- if (n <= 0)
- break;
- count -= n;
- total += n;
- } while (count > 0);
-
- if (invalidate && (total > 0)) {
- pg_start = origin >> PAGE_CACHE_SHIFT;
- pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
- if (inode->i_mapping && inode->i_mapping->nrpages)
- invalidate_inode_pages2_range(inode->i_mapping,
- pg_start, pg_end);
- *offset += total;
- i_size = i_size_read(inode);
- if (*offset > i_size) {
- inode_add_bytes(inode, *offset - i_size);
- i_size_write(inode, *offset);
- }
- }
- if (n < 0)
- return n;
-
- return total;
-}
-
/**
* v9fs_file_write - write to a file
* @filp: file pointer to write
@@ -502,35 +403,39 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
*
*/
static ssize_t
-v9fs_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offset)
+v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- ssize_t retval = 0;
- loff_t origin = *offset;
-
-
- retval = generic_write_checks(filp, &origin, &count, 0);
- if (retval)
- goto out;
+ struct file *file = iocb->ki_filp;
+ ssize_t retval;
+ loff_t origin;
+ int err = 0;
- retval = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- retval = 0;
- if (!count)
- goto out;
+ retval = generic_write_checks(iocb, from);
+ if (retval <= 0)
+ return retval;
- retval = v9fs_file_write_internal(file_inode(filp),
- filp->private_data,
- data, count, &origin, 1);
- /* update offset on successful write */
- if (retval > 0)
- *offset = origin;
-out:
- return retval;
+ origin = iocb->ki_pos;
+ retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
+ if (retval > 0) {
+ struct inode *inode = file_inode(file);
+ loff_t i_size;
+ unsigned long pg_start, pg_end;
+ pg_start = origin >> PAGE_CACHE_SHIFT;
+ pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
+ iocb->ki_pos += retval;
+ i_size = i_size_read(inode);
+ if (iocb->ki_pos > i_size) {
+ inode_add_bytes(inode, iocb->ki_pos - i_size);
+ i_size_write(inode, iocb->ki_pos);
+ }
+ return retval;
+ }
+ return err;
}
-
static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
@@ -657,44 +562,6 @@ out_unlock:
return VM_FAULT_NOPAGE;
}
-static ssize_t
-v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
- loff_t *offsetp)
-{
- loff_t size, offset;
- struct inode *inode;
- struct address_space *mapping;
-
- offset = *offsetp;
- mapping = filp->f_mapping;
- inode = mapping->host;
- if (!count)
- return 0;
- size = i_size_read(inode);
- if (offset < size)
- filemap_write_and_wait_range(mapping, offset,
- offset + count - 1);
-
- return v9fs_file_read(filp, udata, count, offsetp);
-}
-
-/**
- * v9fs_cached_file_read - read from a file
- * @filp: file pointer to read
- * @data: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-static ssize_t
-v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
- loff_t *offset)
-{
- if (filp->f_flags & O_DIRECT)
- return v9fs_direct_read(filp, data, count, offset);
- return new_sync_read(filp, data, count, offset);
-}
-
/**
* v9fs_mmap_file_read - read from a file
* @filp: file pointer to read
@@ -704,84 +571,12 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
*
*/
static ssize_t
-v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count,
- loff_t *offset)
+v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
/* TODO: Check if there are dirty pages */
- return v9fs_file_read(filp, data, count, offset);
-}
-
-static ssize_t
-v9fs_direct_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offsetp)
-{
- loff_t offset;
- ssize_t retval;
- struct inode *inode;
- struct address_space *mapping;
-
- offset = *offsetp;
- mapping = filp->f_mapping;
- inode = mapping->host;
- if (!count)
- return 0;
-
- mutex_lock(&inode->i_mutex);
- retval = filemap_write_and_wait_range(mapping, offset,
- offset + count - 1);
- if (retval)
- goto err_out;
- /*
- * After a write we want buffered reads to be sure to go to disk to get
- * the new data. We invalidate clean cached page from the region we're
- * about to write. We do this *before* the write so that if we fail
- * here we fall back to buffered write
- */
- if (mapping->nrpages) {
- pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
- pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
-
- retval = invalidate_inode_pages2_range(mapping,
- pg_start, pg_end);
- /*
- * If a page can not be invalidated, fall back
- * to buffered write.
- */
- if (retval) {
- if (retval == -EBUSY)
- goto buff_write;
- goto err_out;
- }
- }
- retval = v9fs_file_write(filp, data, count, offsetp);
-err_out:
- mutex_unlock(&inode->i_mutex);
- return retval;
-
-buff_write:
- mutex_unlock(&inode->i_mutex);
- return new_sync_write(filp, data, count, offsetp);
-}
-
-/**
- * v9fs_cached_file_write - write to a file
- * @filp: file pointer to write
- * @data: data buffer to write data from
- * @count: size of buffer
- * @offset: offset at which to write data
- *
- */
-static ssize_t
-v9fs_cached_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offset)
-{
-
- if (filp->f_flags & O_DIRECT)
- return v9fs_direct_write(filp, data, count, offset);
- return new_sync_write(filp, data, count, offset);
+ return v9fs_file_read_iter(iocb, to);
}
-
/**
* v9fs_mmap_file_write - write to a file
* @filp: file pointer to write
@@ -791,14 +586,13 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
*
*/
static ssize_t
-v9fs_mmap_file_write(struct file *filp, const char __user *data,
- size_t count, loff_t *offset)
+v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
/*
* TODO: invalidate mmaps on filp's inode between
* offset and offset+count
*/
- return v9fs_file_write(filp, data, count, offset);
+ return v9fs_file_write_iter(iocb, from);
}
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
@@ -843,8 +637,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_cached_file_read,
- .write = v9fs_cached_file_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = v9fs_file_open,
@@ -856,8 +648,6 @@ const struct file_operations v9fs_cached_file_operations = {
const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_cached_file_read,
- .write = v9fs_cached_file_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = v9fs_file_open,
@@ -870,8 +660,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_file_read,
- .write = v9fs_file_write,
+ .read_iter = v9fs_file_read_iter,
+ .write_iter = v9fs_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
@@ -881,8 +671,8 @@ const struct file_operations v9fs_file_operations = {
const struct file_operations v9fs_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_file_read,
- .write = v9fs_file_write,
+ .read_iter = v9fs_file_read_iter,
+ .write_iter = v9fs_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
@@ -893,8 +683,8 @@ const struct file_operations v9fs_file_operations_dotl = {
const struct file_operations v9fs_mmap_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_mmap_file_read,
- .write = v9fs_mmap_file_write,
+ .read_iter = v9fs_mmap_file_read_iter,
+ .write_iter = v9fs_mmap_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
@@ -904,8 +694,8 @@ const struct file_operations v9fs_mmap_file_operations = {
const struct file_operations v9fs_mmap_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_mmap_file_read,
- .write = v9fs_mmap_file_write,
+ .read_iter = v9fs_mmap_file_read_iter,
+ .write_iter = v9fs_mmap_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3662f1d1d9cf..703342e309f5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -595,7 +595,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
dir, dentry, flags);
v9ses = v9fs_inode2v9ses(dir);
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
retval = PTR_ERR(dfid);
@@ -864,7 +864,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
/* Only creates */
- if (!(flags & O_CREAT) || dentry->d_inode)
+ if (!(flags & O_CREAT) || d_really_is_positive(dentry))
return finish_no_open(file, res);
err = 0;
@@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
v9fs_invalidate_inode_attr(dir);
- v9inode = V9FS_I(dentry->d_inode);
+ v9inode = V9FS_I(d_inode(dentry));
mutex_lock(&v9inode->v_mutex);
if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
!v9inode->writeback_fid &&
@@ -908,7 +908,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
file->private_data = fid;
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- v9fs_cache_inode_set_cookie(dentry->d_inode, file);
+ v9fs_cache_inode_set_cookie(d_inode(dentry), file);
*opened |= FILE_CREATED;
out:
@@ -969,8 +969,8 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
p9_debug(P9_DEBUG_VFS, "\n");
retval = 0;
- old_inode = old_dentry->d_inode;
- new_inode = new_dentry->d_inode;
+ old_inode = d_inode(old_dentry);
+ new_inode = d_inode(new_dentry);
v9ses = v9fs_inode2v9ses(old_inode);
oldfid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(oldfid))
@@ -1061,7 +1061,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(dentry->d_inode, stat);
+ generic_fillattr(d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -1072,8 +1072,8 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
- generic_fillattr(dentry->d_inode, stat);
+ v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb);
+ generic_fillattr(d_inode(dentry), stat);
p9stat_free(st);
kfree(st);
@@ -1095,7 +1095,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct p9_wstat wstat;
p9_debug(P9_DEBUG_VFS, "\n");
- retval = inode_change_ok(dentry->d_inode, iattr);
+ retval = inode_change_ok(d_inode(dentry), iattr);
if (retval)
return retval;
@@ -1128,20 +1128,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
/* Write all dirty data */
if (d_is_reg(dentry))
- filemap_write_and_wait(dentry->d_inode->i_mapping);
+ filemap_write_and_wait(d_inode(dentry)->i_mapping);
retval = p9_client_wstat(fid, &wstat);
if (retval < 0)
return retval;
if ((iattr->ia_valid & ATTR_SIZE) &&
- iattr->ia_size != i_size_read(dentry->d_inode))
- truncate_setsize(dentry->d_inode, iattr->ia_size);
+ iattr->ia_size != i_size_read(d_inode(dentry)))
+ truncate_setsize(d_inode(dentry), iattr->ia_size);
- v9fs_invalidate_inode_attr(dentry->d_inode);
+ v9fs_invalidate_inode_attr(d_inode(dentry));
- setattr_copy(dentry->d_inode, iattr);
- mark_inode_dirty(dentry->d_inode);
+ setattr_copy(d_inode(dentry), iattr);
+ mark_inode_dirty(d_inode(dentry));
return 0;
}
@@ -1403,7 +1403,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
__putname(name);
if (!retval) {
- v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+ v9fs_refresh_inode(oldfid, d_inode(old_dentry));
v9fs_invalidate_inode_attr(dir);
}
clunk_fid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 6054c16b8fae..9861c7c951a6 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -265,7 +265,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
}
/* Only creates */
- if (!(flags & O_CREAT) || dentry->d_inode)
+ if (!(flags & O_CREAT) || d_really_is_positive(dentry))
return finish_no_open(file, res);
v9ses = v9fs_inode2v9ses(dir);
@@ -481,7 +481,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
- generic_fillattr(dentry->d_inode, stat);
+ generic_fillattr(d_inode(dentry), stat);
return 0;
}
fid = v9fs_fid_lookup(dentry);
@@ -496,8 +496,8 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
if (IS_ERR(st))
return PTR_ERR(st);
- v9fs_stat2inode_dotl(st, dentry->d_inode);
- generic_fillattr(dentry->d_inode, stat);
+ v9fs_stat2inode_dotl(st, d_inode(dentry));
+ generic_fillattr(d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -557,7 +557,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
int retval;
struct p9_fid *fid;
struct p9_iattr_dotl p9attr;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
p9_debug(P9_DEBUG_VFS, "\n");
@@ -795,10 +795,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
if (IS_ERR(fid))
return PTR_ERR(fid);
- v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
+ v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
}
- ihold(old_dentry->d_inode);
- d_instantiate(dentry, old_dentry->d_inode);
+ ihold(d_inode(old_dentry));
+ d_instantiate(dentry, d_inode(old_dentry));
return err;
}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 0afd0382822b..e99a338a4638 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -168,8 +168,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(st);
goto release_sb;
}
- root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode_dotl(st, root->d_inode);
+ d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode_dotl(st, d_inode(root));
kfree(st);
} else {
struct p9_wstat *st = NULL;
@@ -179,8 +179,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
}
- root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, root->d_inode, sb);
+ d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, d_inode(root), sb);
p9stat_free(st);
kfree(st);
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f95e01e058e4..0cf44b6cccd6 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/sched.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -25,50 +26,34 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
void *buffer, size_t buffer_size)
{
ssize_t retval;
- int msize, read_count;
- u64 offset = 0, attr_size;
+ u64 attr_size;
struct p9_fid *attr_fid;
+ struct kvec kvec = {.iov_base = buffer, .iov_len = buffer_size};
+ struct iov_iter to;
+ int err;
+
+ iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
retval = PTR_ERR(attr_fid);
p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
retval);
- attr_fid = NULL;
- goto error;
- }
- if (!buffer_size) {
- /* request to get the attr_size */
- retval = attr_size;
- goto error;
+ return retval;
}
if (attr_size > buffer_size) {
- retval = -ERANGE;
- goto error;
- }
- msize = attr_fid->clnt->msize;
- while (attr_size) {
- if (attr_size > (msize - P9_IOHDRSZ))
- read_count = msize - P9_IOHDRSZ;
+ if (!buffer_size) /* request to get the attr_size */
+ retval = attr_size;
else
- read_count = attr_size;
- read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
- NULL, offset, read_count);
- if (read_count < 0) {
- /* error in xattr read */
- retval = read_count;
- goto error;
- }
- offset += read_count;
- attr_size -= read_count;
+ retval = -ERANGE;
+ } else {
+ iov_iter_truncate(&to, attr_size);
+ retval = p9_client_read(attr_fid, 0, &to, &err);
+ if (err)
+ retval = err;
}
- /* Total read xattr bytes */
- retval = offset;
-error:
- if (attr_fid)
- p9_client_clunk(attr_fid);
+ p9_client_clunk(attr_fid);
return retval;
-
}
@@ -120,8 +105,11 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
const void *value, size_t value_len, int flags)
{
- u64 offset = 0;
- int retval, msize, write_count;
+ struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len};
+ struct iov_iter from;
+ int retval;
+
+ iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
name, value_len, flags);
@@ -135,29 +123,11 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
* On success fid points to xattr
*/
retval = p9_client_xattrcreate(fid, name, value_len, flags);
- if (retval < 0) {
+ if (retval < 0)
p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
retval);
- goto err;
- }
- msize = fid->clnt->msize;
- while (value_len) {
- if (value_len > (msize - P9_IOHDRSZ))
- write_count = msize - P9_IOHDRSZ;
- else
- write_count = value_len;
- write_count = p9_client_write(fid, ((char *)value)+offset,
- NULL, offset, write_count);
- if (write_count < 0) {
- /* error in xattr write */
- retval = write_count;
- goto err;
- }
- offset += write_count;
- value_len -= write_count;
- }
- retval = 0;
-err:
+ else
+ p9_client_write(fid, 0, &from, &retval);
p9_client_clunk(fid);
return retval;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index ec35851e5b71..011f43365d7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig"
source "fs/ocfs2/Kconfig"
source "fs/btrfs/Kconfig"
source "fs/nilfs2/Kconfig"
+source "fs/f2fs/Kconfig"
config FS_DAX
bool "Direct Access (DAX) support"
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
-source "fs/f2fs/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
-config ARCH_BINFMT_ELF_RANDOMIZE_PIE
- bool
-
config ARCH_BINFMT_ELF_STATE
bool
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_CACHEFILES) += cachefiles/
obj-$(CONFIG_DEBUG_FS) += debugfs/
+obj-$(CONFIG_TRACING) += tracefs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index f2ba88ab4aed..82d14cdf70f9 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -61,6 +61,7 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
+ ret = -ENOMEM;
adfs_error(sb, "not enough memory for"
" dir object %X (%d blocks)", id, size);
goto out;
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 07c9edce5aa7..46c0d5671cd5 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,11 +23,9 @@
const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index b9acadafa4a1..335055d828e4 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -298,7 +298,7 @@ out:
int
adfs_notify_change(struct dentry *dentry, struct iattr *attr)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
unsigned int ia_valid = attr->ia_valid;
int error;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9852bdf34d76..a19c31d3f369 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -316,7 +316,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL);
if (dm == NULL) {
adfs_error(sb, "not enough memory");
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
for (zone = 0; zone < nzones; zone++, map_addr++) {
@@ -349,7 +349,7 @@ error_free:
brelse(dm[zone].dm_bh);
kfree(dm);
- return NULL;
+ return ERR_PTR(-EIO);
}
static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits)
@@ -370,6 +370,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
unsigned char *b_data;
struct adfs_sb_info *asb;
struct inode *root;
+ int ret = -EINVAL;
sb->s_flags |= MS_NODIRATIME;
@@ -391,6 +392,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
sb_set_blocksize(sb, BLOCK_SIZE);
if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) {
adfs_error(sb, "unable to read superblock");
+ ret = -EIO;
goto error;
}
@@ -400,6 +402,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk("VFS: Can't find an adfs filesystem on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error_free_bh;
}
@@ -412,6 +415,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk("VPS: Can't find an adfs filesystem on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error_free_bh;
}
@@ -421,11 +425,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!bh) {
adfs_error(sb, "couldn't read superblock on "
"2nd try.");
+ ret = -EIO;
goto error;
}
b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize);
if (adfs_checkbblk(b_data)) {
adfs_error(sb, "disc record mismatch, very weird!");
+ ret = -EINVAL;
goto error_free_bh;
}
dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET);
@@ -433,6 +439,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk(KERN_ERR "VFS: Unsupported blocksize on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error;
}
@@ -447,10 +454,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits);
asb->s_version = dr->format_version;
asb->s_log2sharesize = dr->log2sharesize;
-
+
asb->s_map = adfs_read_map(sb, dr);
- if (!asb->s_map)
+ if (IS_ERR(asb->s_map)) {
+ ret = PTR_ERR(asb->s_map);
goto error_free_bh;
+ }
brelse(bh);
@@ -499,6 +508,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
adfs_error(sb, "get root inode failed\n");
+ ret = -EIO;
goto error;
}
return 0;
@@ -508,7 +518,7 @@ error_free_bh:
error:
sb->s_fs_info = NULL;
kfree(asb);
- return -EINVAL;
+ return ret;
}
static struct dentry *adfs_mount(struct file_system_type *fs_type,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c8764bd7497d..cffe8370fb44 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,18 +106,22 @@ struct affs_sb_info {
spinlock_t work_lock; /* protects sb_work and work_queued */
};
-#define SF_INTL 0x0001 /* International filesystem. */
-#define SF_BM_VALID 0x0002 /* Bitmap is valid. */
-#define SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
-#define SF_QUIET 0x0008 /* chmod errors will be not reported */
-#define SF_SETUID 0x0010 /* Ignore Amiga uid */
-#define SF_SETGID 0x0020 /* Ignore Amiga gid */
-#define SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
-#define SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
-#define SF_OFS 0x0200 /* Old filesystem */
-#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
-#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */
-#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
+#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
+#define AFFS_MOUNT_SF_BM_VALID 0x0002 /* Bitmap is valid. */
+#define AFFS_MOUNT_SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
+#define AFFS_MOUNT_SF_QUIET 0x0008 /* chmod errors will be not reported */
+#define AFFS_MOUNT_SF_SETUID 0x0010 /* Ignore Amiga uid */
+#define AFFS_MOUNT_SF_SETGID 0x0020 /* Ignore Amiga gid */
+#define AFFS_MOUNT_SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
+#define AFFS_MOUNT_SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
+#define AFFS_MOUNT_SF_OFS 0x0200 /* Old filesystem */
+#define AFFS_MOUNT_SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
+#define AFFS_MOUNT_SF_VERBOSE 0x0800 /* Talk about fs when mounting */
+#define AFFS_MOUNT_SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
+
+#define affs_clear_opt(o, opt) (o &= ~AFFS_MOUNT_##opt)
+#define affs_set_opt(o, opt) (o |= AFFS_MOUNT_##opt)
+#define affs_test_opt(o, opt) ((o) & AFFS_MOUNT_##opt)
/* short cut to get to the affs specific sb data */
static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 388da1ea815d..a8f463c028ce 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -138,7 +138,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
static int
affs_remove_link(struct dentry *dentry)
{
- struct inode *dir, *inode = dentry->d_inode;
+ struct inode *dir, *inode = d_inode(dentry);
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL, *link_bh = NULL;
u32 link_ino, ino;
@@ -268,11 +268,11 @@ affs_remove_header(struct dentry *dentry)
struct buffer_head *bh = NULL;
int retval;
- dir = dentry->d_parent->d_inode;
+ dir = d_inode(dentry->d_parent);
sb = dir->i_sb;
retval = -ENOENT;
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
if (!inode)
goto done;
@@ -471,9 +471,9 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
bool
affs_nofilenametruncate(const struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
- return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
+ struct inode *inode = d_inode(dentry);
+ return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
}
/* Check if the name is valid for a affs object. */
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a91795e01a7f..659c579c4588 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
* affs regular file handling primitives
*/
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "affs.h"
static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
}
static ssize_t
-affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -398,15 +397,15 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
- if (rw == WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
loff_t size = offset + count;
if (AFFS_I(inode)->mmu_private < size)
return 0;
}
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block);
- if (ret < 0 && (rw & WRITE))
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block);
+ if (ret < 0 && iov_iter_rw(iter) == WRITE)
affs_write_failed(mapping, offset + count);
return ret;
}
@@ -915,7 +914,7 @@ affs_truncate(struct inode *inode)
if (inode->i_size) {
AFFS_I(inode)->i_blkcnt = last_blk + 1;
AFFS_I(inode)->i_extcnt = ext + 1;
- if (AFFS_SB(sb)->s_flags & SF_OFS) {
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS)) {
struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
u32 tmp;
if (IS_ERR(bh)) {
@@ -969,9 +968,7 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.open = affs_file_open,
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 6f34510449e8..a022f4accd76 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -66,23 +66,23 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
AFFS_I(inode)->i_lastalloc = 0;
AFFS_I(inode)->i_pa_cnt = 0;
- if (sbi->s_flags & SF_SETMODE)
+ if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
inode->i_mode = prot_to_mode(prot);
id = be16_to_cpu(tail->uid);
- if (id == 0 || sbi->s_flags & SF_SETUID)
+ if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
inode->i_uid = sbi->s_uid;
- else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
+ else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
i_uid_write(inode, 0);
else
i_uid_write(inode, id);
id = be16_to_cpu(tail->gid);
- if (id == 0 || sbi->s_flags & SF_SETGID)
+ if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETGID))
inode->i_gid = sbi->s_gid;
- else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
+ else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
i_gid_write(inode, 0);
else
i_gid_write(inode, id);
@@ -94,7 +94,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
/* fall through */
case ST_USERDIR:
if (be32_to_cpu(tail->stype) == ST_USERDIR ||
- sbi->s_flags & SF_SETMODE) {
+ affs_test_opt(sbi->s_flags, SF_SETMODE)) {
if (inode->i_mode & S_IRUSR)
inode->i_mode |= S_IXUSR;
if (inode->i_mode & S_IRGRP)
@@ -133,7 +133,8 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
}
if (tail->link_chain)
set_nlink(inode, 2);
- inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
+ inode->i_mapping->a_ops = affs_test_opt(sbi->s_flags, SF_OFS) ?
+ &affs_aops_ofs : &affs_aops;
inode->i_op = &affs_file_inode_operations;
inode->i_fop = &affs_file_operations;
break;
@@ -190,15 +191,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
- if (AFFS_SB(sb)->s_flags & SF_MUFS) {
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_MUFS)) {
if (uid == 0 || uid == 0xFFFF)
uid = uid ^ ~0;
if (gid == 0 || gid == 0xFFFF)
gid = gid ^ ~0;
}
- if (!(AFFS_SB(sb)->s_flags & SF_SETUID))
+ if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETUID))
tail->uid = cpu_to_be16(uid);
- if (!(AFFS_SB(sb)->s_flags & SF_SETGID))
+ if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETGID))
tail->gid = cpu_to_be16(gid);
}
}
@@ -212,7 +213,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
int
affs_notify_change(struct dentry *dentry, struct iattr *attr)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
int error;
pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
@@ -221,11 +222,14 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
if (error)
goto out;
- if (((attr->ia_valid & ATTR_UID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETUID)) ||
- ((attr->ia_valid & ATTR_GID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETGID)) ||
+ if (((attr->ia_valid & ATTR_UID) &&
+ affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETUID)) ||
+ ((attr->ia_valid & ATTR_GID) &&
+ affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETGID)) ||
((attr->ia_valid & ATTR_MODE) &&
- (AFFS_SB(inode->i_sb)->s_flags & (SF_SETMODE | SF_IMMUTABLE)))) {
- if (!(AFFS_SB(inode->i_sb)->s_flags & SF_QUIET))
+ (AFFS_SB(inode->i_sb)->s_flags &
+ (AFFS_MOUNT_SF_SETMODE | AFFS_MOUNT_SF_IMMUTABLE)))) {
+ if (!affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_QUIET))
error = -EPERM;
goto out;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index ffb7bd82c2a5..181e05b46e72 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -53,7 +53,8 @@ affs_intl_toupper(int ch)
static inline toupper_t
affs_get_toupper(struct super_block *sb)
{
- return AFFS_SB(sb)->s_flags & SF_INTL ? affs_intl_toupper : affs_toupper;
+ return affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL) ?
+ affs_intl_toupper : affs_toupper;
}
/*
@@ -250,7 +251,7 @@ int
affs_unlink(struct inode *dir, struct dentry *dentry)
{
pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
- dentry->d_inode->i_ino, dentry);
+ d_inode(dentry)->i_ino, dentry);
return affs_remove_header(dentry);
}
@@ -275,7 +276,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
inode->i_op = &affs_file_inode_operations;
inode->i_fop = &affs_file_operations;
- inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
+ inode->i_mapping->a_ops = affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS) ?
+ &affs_aops_ofs : &affs_aops;
error = affs_add_entry(dir, inode, dentry, ST_FILE);
if (error) {
clear_nlink(inode);
@@ -318,7 +320,7 @@ int
affs_rmdir(struct inode *dir, struct dentry *dentry)
{
pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
- dentry->d_inode->i_ino, dentry);
+ d_inode(dentry)->i_ino, dentry);
return affs_remove_header(dentry);
}
@@ -401,7 +403,7 @@ err:
int
affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
- struct inode *inode = old_dentry->d_inode;
+ struct inode *inode = d_inode(old_dentry);
pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
dentry);
@@ -428,13 +430,13 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
return retval;
/* Unlink destination if it already exists */
- if (new_dentry->d_inode) {
+ if (d_really_is_positive(new_dentry)) {
retval = affs_remove_header(new_dentry);
if (retval)
return retval;
}
- bh = affs_bread(sb, old_dentry->d_inode->i_ino);
+ bh = affs_bread(sb, d_inode(old_dentry)->i_ino);
if (!bh)
return -EIO;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4cf0e9113fb6..3f89c9e05b40 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -227,22 +227,22 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
if (match_octal(&args[0], &option))
return 0;
*mode = option & 0777;
- *mount_opts |= SF_SETMODE;
+ affs_set_opt(*mount_opts, SF_SETMODE);
break;
case Opt_mufs:
- *mount_opts |= SF_MUFS;
+ affs_set_opt(*mount_opts, SF_MUFS);
break;
case Opt_notruncate:
- *mount_opts |= SF_NO_TRUNCATE;
+ affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
break;
case Opt_prefix:
*prefix = match_strdup(&args[0]);
if (!*prefix)
return 0;
- *mount_opts |= SF_PREFIX;
+ affs_set_opt(*mount_opts, SF_PREFIX);
break;
case Opt_protect:
- *mount_opts |= SF_IMMUTABLE;
+ affs_set_opt(*mount_opts, SF_IMMUTABLE);
break;
case Opt_reserved:
if (match_int(&args[0], reserved))
@@ -258,7 +258,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
*gid = make_kgid(current_user_ns(), option);
if (!gid_valid(*gid))
return 0;
- *mount_opts |= SF_SETGID;
+ affs_set_opt(*mount_opts, SF_SETGID);
break;
case Opt_setuid:
if (match_int(&args[0], &option))
@@ -266,10 +266,10 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
*uid = make_kuid(current_user_ns(), option);
if (!uid_valid(*uid))
return 0;
- *mount_opts |= SF_SETUID;
+ affs_set_opt(*mount_opts, SF_SETUID);
break;
case Opt_verbose:
- *mount_opts |= SF_VERBOSE;
+ affs_set_opt(*mount_opts, SF_VERBOSE);
break;
case Opt_volume: {
char *vol = match_strdup(&args[0]);
@@ -435,30 +435,31 @@ got_root:
case MUFS_FS:
case MUFS_INTLFFS:
case MUFS_DCFFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
/* fall thru */
case FS_INTLFFS:
case FS_DCFFS:
- sbi->s_flags |= SF_INTL;
+ affs_set_opt(sbi->s_flags, SF_INTL);
break;
case MUFS_FFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
break;
case FS_FFS:
break;
case MUFS_OFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
/* fall thru */
case FS_OFS:
- sbi->s_flags |= SF_OFS;
+ affs_set_opt(sbi->s_flags, SF_OFS);
sb->s_flags |= MS_NOEXEC;
break;
case MUFS_DCOFS:
case MUFS_INTLOFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
case FS_DCOFS:
case FS_INTLOFS:
- sbi->s_flags |= SF_INTL | SF_OFS;
+ affs_set_opt(sbi->s_flags, SF_INTL);
+ affs_set_opt(sbi->s_flags, SF_OFS);
sb->s_flags |= MS_NOEXEC;
break;
default:
@@ -467,7 +468,7 @@ got_root:
return -EINVAL;
}
- if (mount_flags & SF_VERBOSE) {
+ if (affs_test_opt(mount_flags, SF_VERBOSE)) {
u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
len > 31 ? 31 : len,
@@ -478,7 +479,7 @@ got_root:
sb->s_flags |= MS_NODEV | MS_NOSUID;
sbi->s_data_blksize = sb->s_blocksize;
- if (sbi->s_flags & SF_OFS)
+ if (affs_test_opt(sbi->s_flags, SF_OFS))
sbi->s_data_blksize -= 24;
tmp_flags = sb->s_flags;
@@ -493,7 +494,7 @@ got_root:
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
- if (AFFS_SB(sb)->s_flags & SF_INTL)
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL))
sb->s_d_op = &affs_intl_dentry_operations;
else
sb->s_d_op = &affs_dentry_operations;
@@ -520,10 +521,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
int root_block;
unsigned long mount_flags;
int res = 0;
- char *new_opts = kstrdup(data, GFP_KERNEL);
+ char *new_opts;
char volume[32];
char *prefix = NULL;
+ new_opts = kstrdup(data, GFP_KERNEL);
+ if (!new_opts)
+ return -ENOMEM;
+
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4ec35e9130e1..e10e17788f06 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -505,7 +505,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
_enter("{%x:%u},%p{%pd},",
vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
- ASSERTCMP(dentry->d_inode, ==, NULL);
+ ASSERTCMP(d_inode(dentry), ==, NULL);
if (dentry->d_name.len >= AFSNAMEMAX) {
_leave(" = -ENAMETOOLONG");
@@ -563,8 +563,8 @@ success:
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
fid.vnode,
fid.unique,
- dentry->d_inode->i_ino,
- dentry->d_inode->i_generation);
+ d_inode(dentry)->i_ino,
+ d_inode(dentry)->i_generation);
return NULL;
}
@@ -586,9 +586,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- vnode = AFS_FS_I(dentry->d_inode);
+ vnode = AFS_FS_I(d_inode(dentry));
- if (dentry->d_inode)
+ if (d_really_is_positive(dentry))
_enter("{v={%x:%u} n=%pd fl=%lx},",
vnode->fid.vid, vnode->fid.vnode, dentry,
vnode->flags);
@@ -601,7 +601,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
/* lock down the parent dentry so we can peer at it */
parent = dget_parent(dentry);
- dir = AFS_FS_I(parent->d_inode);
+ dir = AFS_FS_I(d_inode(parent));
/* validate the parent directory */
if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
@@ -623,9 +623,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
switch (ret) {
case 0:
/* the filename maps to something */
- if (!dentry->d_inode)
+ if (d_really_is_negative(dentry))
goto out_bad;
- if (is_bad_inode(dentry->d_inode)) {
+ if (is_bad_inode(d_inode(dentry))) {
printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
dentry);
goto out_bad;
@@ -647,7 +647,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
_debug("%pd: file deleted (uq %u -> %u I:%u)",
dentry, fid.unique,
vnode->fid.unique,
- dentry->d_inode->i_generation);
+ d_inode(dentry)->i_generation);
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
spin_unlock(&vnode->lock);
@@ -658,7 +658,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
case -ENOENT:
/* the filename is unknown */
_debug("%pd: dirent not found", dentry);
- if (dentry->d_inode)
+ if (d_really_is_positive(dentry))
goto not_found;
goto out_valid;
@@ -703,9 +703,9 @@ static int afs_d_delete(const struct dentry *dentry)
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto zap;
- if (dentry->d_inode &&
- (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags) ||
- test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags)))
+ if (d_really_is_positive(dentry) &&
+ (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(d_inode(dentry))->flags) ||
+ test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags)))
goto zap;
_leave(" = 0 [keep]");
@@ -814,8 +814,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
if (ret < 0)
goto rmdir_error;
- if (dentry->d_inode) {
- vnode = AFS_FS_I(dentry->d_inode);
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
clear_nlink(&vnode->vfs_inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
afs_discard_callback_on_delete(vnode);
@@ -856,8 +856,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
goto error;
}
- if (dentry->d_inode) {
- vnode = AFS_FS_I(dentry->d_inode);
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
/* make sure we have a callback promise on the victim */
ret = afs_validate(vnode, key);
@@ -869,7 +869,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (ret < 0)
goto remove_error;
- if (dentry->d_inode) {
+ if (d_really_is_positive(dentry)) {
/* if the file wasn't deleted due to excess hard links, the
* fileserver will break the callback promise on the file - if
* it had one - before it returns to us, and if it was deleted,
@@ -879,7 +879,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
* or it was outstanding on a different server, then it won't
* break it either...
*/
- vnode = AFS_FS_I(dentry->d_inode);
+ vnode = AFS_FS_I(d_inode(dentry));
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
_debug("AFS_VNODE_DELETED");
if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
@@ -977,7 +977,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
struct key *key;
int ret;
- vnode = AFS_FS_I(from->d_inode);
+ vnode = AFS_FS_I(d_inode(from));
dvnode = AFS_FS_I(dir);
_enter("{%x:%u},{%x:%u},{%pd}",
@@ -1089,7 +1089,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct key *key;
int ret;
- vnode = AFS_FS_I(old_dentry->d_inode);
+ vnode = AFS_FS_I(d_inode(old_dentry));
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 932ce07948b3..999bc3caec92 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,8 +31,6 @@ const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = afs_file_write,
.mmap = generic_file_readonly_mmap,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 8a1d38ef0fc2..e06f5a23352a 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -379,7 +379,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
{
struct inode *inode;
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
@@ -458,7 +458,7 @@ void afs_evict_inode(struct inode *inode)
*/
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
- struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
int ret;
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 0dd4dafee10b..91ea1aa0d8b3 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,9 +22,12 @@
int afs_abort_to_error(u32 abort_code)
{
switch (abort_code) {
+ /* low errno codes inserted into abort namespace */
case 13: return -EACCES;
case 27: return -EFBIG;
case 30: return -EROFS;
+
+ /* VICE "special error" codes; 101 - 111 */
case VSALVAGE: return -EIO;
case VNOVNODE: return -ENOENT;
case VNOVOL: return -ENOMEDIUM;
@@ -36,11 +39,18 @@ int afs_abort_to_error(u32 abort_code)
case VOVERQUOTA: return -EDQUOT;
case VBUSY: return -EBUSY;
case VMOVED: return -ENXIO;
- case 0x2f6df0a: return -EWOULDBLOCK;
+
+ /* Unified AFS error table; ET "uae" == 0x2f6df00 */
+ case 0x2f6df00: return -EPERM;
+ case 0x2f6df01: return -ENOENT;
+ case 0x2f6df04: return -EIO;
+ case 0x2f6df0a: return -EAGAIN;
+ case 0x2f6df0b: return -ENOMEM;
case 0x2f6df0c: return -EACCES;
case 0x2f6df0f: return -EBUSY;
case 0x2f6df10: return -EEXIST;
case 0x2f6df11: return -EXDEV;
+ case 0x2f6df12: return -ENODEV;
case 0x2f6df13: return -ENOTDIR;
case 0x2f6df14: return -EISDIR;
case 0x2f6df15: return -EINVAL;
@@ -54,8 +64,12 @@ int afs_abort_to_error(u32 abort_code)
case 0x2f6df23: return -ENAMETOOLONG;
case 0x2f6df24: return -ENOLCK;
case 0x2f6df26: return -ENOTEMPTY;
+ case 0x2f6df28: return -EWOULDBLOCK;
+ case 0x2f6df69: return -ENOTCONN;
+ case 0x2f6df6c: return -ETIMEDOUT;
case 0x2f6df78: return -EDQUOT;
+ /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
case RXKADPACKETSHORT: return -EPROTO;
case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 938c5ab06d5a..ccd0b212e82a 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -134,7 +134,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
_enter("{%pd}", mntpt);
- BUG_ON(!mntpt->d_inode);
+ BUG_ON(!d_inode(mntpt));
ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
@@ -145,7 +145,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
if (!options)
goto error_no_options;
- vnode = AFS_FS_I(mntpt->d_inode);
+ vnode = AFS_FS_I(d_inode(mntpt));
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
/* if the directory is a pseudo directory, use the d_name */
static const char afs_root_cell[] = ":root.cell.";
@@ -169,14 +169,14 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
}
} else {
/* read the contents of the AFS special symlink */
- loff_t size = i_size_read(mntpt->d_inode);
+ loff_t size = i_size_read(d_inode(mntpt));
char *buf;
ret = -EINVAL;
if (size > PAGE_SIZE - 1)
goto error_no_page;
- page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
+ page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto error_no_page;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index dbc732e9a5c0..3a57a1b0fb51 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -770,15 +770,12 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
void afs_send_empty_reply(struct afs_call *call)
{
struct msghdr msg;
- struct kvec iov[1];
_enter("");
- iov[0].iov_base = NULL;
- iov[0].iov_len = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c4861557e385..1fb4a5129f7d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -529,7 +529,7 @@ static void afs_destroy_inode(struct inode *inode)
static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct afs_volume_status vs;
- struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
int ret;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
-#include <linux/aio.h>
#include "internal.h"
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..480440f4701f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
unsigned reqs_available;
};
+struct ctx_rq_wait {
+ struct completion comp;
+ atomic_t count;
+};
+
struct kioctx {
struct percpu_ref users;
atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
/*
* signals when all in-flight requests are done
*/
- struct completion *requests_done;
+ struct ctx_rq_wait *rq_wait;
struct {
/*
@@ -151,6 +156,38 @@ struct kioctx {
unsigned id;
};
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED ((void *) (~0ULL))
+
+struct aio_kiocb {
+ struct kiocb common;
+
+ struct kioctx *ki_ctx;
+ kiocb_cancel_fn *ki_cancel;
+
+ struct iocb __user *ki_user_iocb; /* user's aiocb */
+ __u64 ki_user_data; /* user's data for completion */
+
+ struct list_head ki_list; /* the aio core uses this
+ * for cancellation */
+
+ /*
+ * If the aio_resfd field of the userspace iocb is not zero,
+ * this is the underlying eventfd context to deliver events to.
+ */
+ struct eventfd_ctx *ki_eventfd;
+};
+
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +257,7 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
- kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -278,11 +315,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct kioctx_table *table;
- int i;
+ int i, res = -EINVAL;
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
@@ -292,13 +329,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
ctx = table->table[i];
if (ctx && ctx->aio_ring_file == file) {
- ctx->user_id = ctx->mmap_base = vma->vm_start;
+ if (!atomic_read(&ctx->dead)) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ res = 0;
+ }
break;
}
}
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
+ return res;
}
static const struct file_operations aio_ring_fops = {
@@ -480,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx)
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
@@ -496,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
@@ -514,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);
- return cancel(kiocb);
+ return cancel(&kiocb->common);
}
static void free_ioctx(struct work_struct *work)
@@ -535,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
/* At this point we know that there are no any in-flight requests */
- if (ctx->requests_done)
- complete(ctx->requests_done);
+ if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
+ complete(&ctx->rq_wait->comp);
INIT_WORK(&ctx->free_work, free_ioctx);
schedule_work(&ctx->free_work);
@@ -550,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
- struct kiocb *req;
+ struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
- struct kiocb, ki_list);
+ struct aio_kiocb, ki_list);
list_del_init(&req->ki_list);
kiocb_cancel(req);
@@ -655,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
nr_events *= 2;
/* Prevent overflows */
- if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
- (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
+ if (nr_events > (0x10000000U / sizeof(struct io_event))) {
pr_debug("ENOMEM: nr_events too high\n");
return ERR_PTR(-EINVAL);
}
@@ -727,6 +768,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err_cleanup:
aio_nr_sub(ctx->max_reqs);
err_ctx:
+ atomic_set(&ctx->dead, 1);
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
aio_free_ring(ctx);
err:
mutex_unlock(&ctx->ring_lock);
@@ -744,15 +788,16 @@ err:
* the rapid destruction of the kioctx.
*/
static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
- struct completion *requests_done)
+ struct ctx_rq_wait *wait)
{
struct kioctx_table *table;
- if (atomic_xchg(&ctx->dead, 1))
+ spin_lock(&mm->ioctx_lock);
+ if (atomic_xchg(&ctx->dead, 1)) {
+ spin_unlock(&mm->ioctx_lock);
return -EINVAL;
+ }
-
- spin_lock(&mm->ioctx_lock);
table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
@@ -773,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
- ctx->requests_done = requests_done;
+ ctx->rq_wait = wait;
percpu_ref_kill(&ctx->users);
return 0;
}
-/* wait_on_sync_kiocb:
- * Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
- while (!req->ki_ctx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (req->ki_ctx)
- break;
- io_schedule();
- }
- __set_current_state(TASK_RUNNING);
- return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
/*
* exit_aio: called when the last user of mm goes away. At this point, there is
* no way for any new requests to be submited or any of the io_* syscalls to be
@@ -805,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
void exit_aio(struct mm_struct *mm)
{
struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
- int i;
+ struct ctx_rq_wait wait;
+ int i, skipped;
if (!table)
return;
+ atomic_set(&wait.count, table->nr);
+ init_completion(&wait.comp);
+
+ skipped = 0;
for (i = 0; i < table->nr; ++i) {
struct kioctx *ctx = table->table[i];
- struct completion requests_done =
- COMPLETION_INITIALIZER_ONSTACK(requests_done);
- if (!ctx)
+ if (!ctx) {
+ skipped++;
continue;
+ }
+
/*
* We don't need to bother with munmap() here - exit_mmap(mm)
* is coming and it'll unmap everything. And we simply can't,
@@ -825,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
* that it needs to unmap the area, just set it to 0.
*/
ctx->mmap_size = 0;
- kill_ioctx(mm, ctx, &requests_done);
+ kill_ioctx(mm, ctx, &wait);
+ }
+ if (!atomic_sub_and_test(skipped, &wait.count)) {
/* Wait until all IO for the context are done. */
- wait_for_completion(&requests_done);
+ wait_for_completion(&wait.comp);
}
RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -948,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
@@ -971,10 +1008,10 @@ out_put:
return NULL;
}
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
{
- if (req->ki_filp)
- fput(req->ki_filp);
+ if (req->common.ki_filp)
+ fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1047,9 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
@@ -1025,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
- if (is_sync_kiocb(iocb)) {
- iocb->ki_user_data = res;
- smp_wmb();
- iocb->ki_ctx = ERR_PTR(-EXDEV);
- wake_up_process(iocb->ki_obj.tsk);
- return;
- }
+ BUG_ON(is_sync_kiocb(kiocb));
if (iocb->ki_list.next) {
unsigned long flags;
@@ -1057,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+ event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
@@ -1066,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+ ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);
/* after flagging the request as done, we
@@ -1113,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
percpu_ref_put(&ctx->reqs);
}
-EXPORT_SYMBOL(aio_complete);
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
@@ -1313,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- struct completion requests_done =
- COMPLETION_INITIALIZER_ONSTACK(requests_done);
+ struct ctx_rq_wait wait;
int ret;
+ init_completion(&wait.comp);
+ atomic_set(&wait.count, 1);
+
/* Pass requests_done to kill_ioctx() where it can be set
* in a thread-safe way. If we try to set it here then we have
* a race condition if two io_destroy() called simultaneously.
*/
- ret = kill_ioctx(current->mm, ioctx, &requests_done);
+ ret = kill_ioctx(current->mm, ioctx, &wait);
percpu_ref_put(&ioctx->users);
/* Wait until all IO for the context are done. Otherwise kernel
@@ -1329,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
* is destroyed.
*/
if (!ret)
- wait_for_completion(&requests_done);
+ wait_for_completion(&wait.comp);
return ret;
}
@@ -1337,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
-typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- struct iovec **iovec,
- bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+ struct iovec **iovec,
+ bool compat,
+ struct iov_iter *iter)
{
- ssize_t ret;
-
- *nr_segs = kiocb->ki_nbytes;
-
#ifdef CONFIG_COMPAT
if (compat)
- ret = compat_rw_copy_check_uvector(rw,
+ return compat_import_iovec(rw,
(struct compat_iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- else
+ len, UIO_FASTIOV, iovec, iter);
#endif
- ret = rw_copy_check_uvector(rw,
- (struct iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- if (ret < 0)
- return ret;
-
- /* ki_nbytes now reflect bytes instead of segs */
- kiocb->ki_nbytes = ret;
- return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- struct iovec *iovec)
-{
- if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
- return -EFAULT;
-
- iovec->iov_base = buf;
- iovec->iov_len = kiocb->ki_nbytes;
- *nr_segs = 1;
- return 0;
+ return import_iovec(rw, (struct iovec __user *)buf,
+ len, UIO_FASTIOV, iovec, iter);
}
/*
@@ -1388,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
* Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, bool compat)
+ char __user *buf, size_t len, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
- unsigned long nr_segs;
int rw;
fmode_t mode;
- aio_rw_op *rw_op;
rw_iter_op *iter_op;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1405,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
case IOCB_CMD_PREADV:
mode = FMODE_READ;
rw = READ;
- rw_op = file->f_op->aio_read;
iter_op = file->f_op->read_iter;
goto rw_common;
@@ -1413,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
case IOCB_CMD_PWRITEV:
mode = FMODE_WRITE;
rw = WRITE;
- rw_op = file->f_op->aio_write;
iter_op = file->f_op->write_iter;
goto rw_common;
rw_common:
if (unlikely(!(file->f_mode & mode)))
return -EBADF;
- if (!rw_op && !iter_op)
+ if (!iter_op)
return -EINVAL;
- ret = (opcode == IOCB_CMD_PREADV ||
- opcode == IOCB_CMD_PWRITEV)
- ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
- &iovec, compat)
- : aio_setup_single_vector(req, rw, buf, &nr_segs,
- iovec);
+ if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+ ret = aio_setup_vectored_rw(rw, buf, len,
+ &iovec, compat, &iter);
+ else {
+ ret = import_single_range(rw, buf, len, iovec, &iter);
+ iovec = NULL;
+ }
if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ ret = rw_verify_area(rw, file, &req->ki_pos,
+ iov_iter_count(&iter));
if (ret < 0) {
- if (iovec != inline_vecs)
- kfree(iovec);
+ kfree(iovec);
return ret;
}
- req->ki_nbytes = ret;
-
- /* XXX: move/kill - rw_verify_area()? */
- /* This matches the pread()/pwrite() logic */
- if (req->ki_pos < 0) {
- ret = -EINVAL;
- break;
- }
+ len = ret;
if (rw == WRITE)
file_start_write(file);
- if (iter_op) {
- iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
- ret = iter_op(req, &iter);
- } else {
- ret = rw_op(req, iovec, nr_segs, req->ki_pos);
- }
+ ret = iter_op(req, &iter);
if (rw == WRITE)
file_end_write(file);
+ kfree(iovec);
break;
case IOCB_CMD_FDSYNC:
@@ -1479,9 +1469,6 @@ rw_common:
return -EINVAL;
}
- if (iovec != inline_vecs)
- kfree(iovec);
-
if (ret != -EIOCBQUEUED) {
/*
* There's no easy way to restart the syscall since other AIO's
@@ -1500,7 +1487,7 @@ rw_common:
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
ssize_t ret;
/* enforce forwards compatibility on users */
@@ -1523,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp)) {
+ req->common.ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
+ req->common.ki_pos = iocb->aio_offset;
+ req->common.ki_complete = aio_complete;
+ req->common.ki_flags = iocb_flags(req->common.ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1542,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = NULL;
goto out_put_req;
}
+
+ req->common.ki_flags |= IOCB_EVENTFD;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
}
- req->ki_obj.user = user_iocb;
+ req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
- req->ki_pos = iocb->aio_offset;
- req->ki_nbytes = iocb->aio_nbytes;
- ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+ ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
+ iocb->aio_nbytes,
compat);
if (ret)
goto out_put_req;
@@ -1643,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
- u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
- struct list_head *pos;
+ struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
@@ -1654,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
return NULL;
/* TODO: use a hash or array, this sucks. */
- list_for_each(pos, &ctx->active_reqs) {
- struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb)
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
@@ -1676,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
struct kioctx *ctx;
- struct kiocb *kiocb;
+ struct aio_kiocb *kiocb;
u32 key;
int ret;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8e98cf954bab..5b700ef1e59d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -213,7 +213,7 @@ void autofs4_clean_ino(struct autofs_info *);
static inline int autofs_prepare_pipe(struct file *pipe)
{
- if (!pipe->f_op->write)
+ if (!(pipe->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (!S_ISFIFO(file_inode(pipe)->i_mode))
return -EINVAL;
@@ -235,12 +235,12 @@ static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
{
- return sbi->sb->s_root->d_inode->i_ino;
+ return d_inode(sbi->sb->s_root)->i_ino;
}
static inline int simple_positive(struct dentry *dentry)
{
- return dentry->d_inode && !d_unhashed(dentry);
+ return d_really_is_positive(dentry) && !d_unhashed(dentry);
}
static inline void __autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 11dd118f75e2..1cebc3c52fa5 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -374,7 +374,7 @@ static struct dentry *should_expire(struct dentry *dentry,
return NULL;
}
- if (dentry->d_inode && d_is_symlink(dentry)) {
+ if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
DPRINTK("checking symlink %p %pd", dentry, dentry);
/*
* A symlink can't be "busy" in the usual sense so
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 1c55388ae633..a3ae0b2aeb5a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -71,7 +71,7 @@ void autofs4_kill_sb(struct super_block *sb)
static int autofs4_show_options(struct seq_file *m, struct dentry *root)
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
- struct inode *root_inode = root->d_sb->s_root->d_inode;
+ struct inode *root_inode = d_inode(root->d_sb->s_root);
if (!sbi)
return 0;
@@ -352,8 +352,8 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
if (sb->s_root) {
- inode->i_uid = sb->s_root->d_inode->i_uid;
- inode->i_gid = sb->s_root->d_inode->i_gid;
+ inode->i_uid = d_inode(sb->s_root)->i_uid;
+ inode->i_gid = d_inode(sb->s_root)->i_gid;
}
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = get_next_ino();
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 7e44fdd03e2d..c6d7d3dbd52a 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -240,7 +240,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry,
spin_lock(&expiring->d_lock);
/* We've already been dentry_iput or unlinked */
- if (!expiring->d_inode)
+ if (d_really_is_negative(expiring))
goto next;
qstr = &expiring->d_name;
@@ -371,7 +371,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
* having d_mountpoint() true, so there's no need to call back
* to the daemon.
*/
- if (dentry->d_inode && d_is_symlink(dentry)) {
+ if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
spin_unlock(&sbi->fs_lock);
goto done;
}
@@ -459,7 +459,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
return 0;
if (d_mountpoint(dentry))
return 0;
- inode = ACCESS_ONCE(dentry->d_inode);
+ inode = d_inode_rcu(dentry);
if (inode && S_ISLNK(inode->i_mode))
return -EISDIR;
if (list_empty(&dentry->d_subdirs))
@@ -485,7 +485,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
* an incorrect ELOOP error return.
*/
if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
- (dentry->d_inode && d_is_symlink(dentry)))
+ (d_really_is_positive(dentry) && d_is_symlink(dentry)))
status = -EISDIR;
}
spin_unlock(&sbi->fs_lock);
@@ -625,8 +625,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
}
dput(ino->dentry);
- dentry->d_inode->i_size = 0;
- clear_nlink(dentry->d_inode);
+ d_inode(dentry)->i_size = 0;
+ clear_nlink(d_inode(dentry));
dir->i_mtime = CURRENT_TIME;
@@ -719,8 +719,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
atomic_dec(&p_ino->count);
}
dput(ino->dentry);
- dentry->d_inode->i_size = 0;
- clear_nlink(dentry->d_inode);
+ d_inode(dentry)->i_size = 0;
+ clear_nlink(d_inode(dentry));
if (dir->i_nlink)
drop_nlink(dir);
@@ -839,7 +839,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
*/
int is_autofs4_dentry(struct dentry *dentry)
{
- return dentry && dentry->d_inode &&
+ return dentry && d_really_is_positive(dentry) &&
dentry->d_op == &autofs4_dentry_operations &&
dentry->d_fsdata != NULL;
}
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index 1e8ea192be2b..de58cc7b8076 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -18,7 +18,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
if (ino && !autofs4_oz_mode(sbi))
ino->last_used = jiffies;
- nd_set_link(nd, dentry->d_inode->i_private);
+ nd_set_link(nd, d_inode(dentry)->i_private);
return NULL;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 116fd38ee472..35b755e79c2d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -70,7 +70,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
mutex_lock(&sbi->pipe_mutex);
while (bytes &&
- (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
+ (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
data += wr;
bytes -= wr;
}
@@ -322,7 +322,7 @@ static int validate_request(struct autofs_wait_queue **wait,
* continue on and create a new request.
*/
if (!IS_ROOT(dentry)) {
- if (dentry->d_inode && d_unhashed(dentry)) {
+ if (d_really_is_positive(dentry) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
new = d_lookup(parent, &dentry->d_name);
if (new)
@@ -364,7 +364,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
if (pid == 0 || tgid == 0)
return -ENOENT;
- if (!dentry->d_inode) {
+ if (d_really_is_negative(dentry)) {
/*
* A wait for a negative dentry is invalid for certain
* cases. A direct or offset mount "always" has its mount
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index 3a7813ab8c95..1fead8d56a98 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -19,16 +19,16 @@ typedef u64 befs_blocknr_t;
* BeFS in memory structures
*/
-typedef struct befs_mount_options {
+struct befs_mount_options {
kgid_t gid;
kuid_t uid;
int use_gid;
int use_uid;
int debug;
char *iocharset;
-} befs_mount_options;
+};
-typedef struct befs_sb_info {
+struct befs_sb_info {
u32 magic1;
u32 block_size;
u32 block_shift;
@@ -52,12 +52,11 @@ typedef struct befs_sb_info {
befs_inode_addr indices;
u32 magic3;
- befs_mount_options mount_opts;
+ struct befs_mount_options mount_opts;
struct nls_table *nls;
+};
-} befs_sb_info;
-
-typedef struct befs_inode_info {
+struct befs_inode_info {
u32 i_flags;
u32 i_type;
@@ -71,8 +70,7 @@ typedef struct befs_inode_info {
} i_data;
struct inode vfs_inode;
-
-} befs_inode_info;
+};
enum befs_err {
BEFS_OK,
@@ -105,13 +103,13 @@ void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *);
/* Gets a pointer to the private portion of the super_block
* structure from the public part
*/
-static inline befs_sb_info *
+static inline struct befs_sb_info *
BEFS_SB(const struct super_block *super)
{
- return (befs_sb_info *) super->s_fs_info;
+ return (struct befs_sb_info *) super->s_fs_info;
}
-static inline befs_inode_info *
+static inline struct befs_inode_info *
BEFS_I(const struct inode *inode)
{
return list_entry(inode, struct befs_inode_info, vfs_inode);
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 1e8e0b8d8836..ebd50718659f 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -168,7 +168,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds)
befs_blocknr_t blocks;
befs_blocknr_t datablocks; /* File data blocks */
befs_blocknr_t metablocks; /* FS metadata blocks */
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_debug(sb, "---> %s", __func__);
@@ -428,7 +428,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
struct buffer_head *indir_block;
befs_block_run indir_run;
befs_disk_inode_addr *iaddr_array = NULL;
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_blocknr_t indir_start_blk =
data->max_indirect_range >> befs_sb->block_shift;
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 0408a3d601d0..7a5b4ec21c56 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -28,7 +28,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
{
struct buffer_head *bh = NULL;
befs_blocknr_t block = 0;
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_debug(sb, "---> Enter %s "
"[%u, %hu, %hu]", __func__, iaddr.allocation_group,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e089f1985fca..7943533c3868 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -51,7 +51,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
static void befs_put_super(struct super_block *);
static int befs_remount(struct super_block *, int *, char *);
static int befs_statfs(struct dentry *, struct kstatfs *);
-static int parse_options(char *, befs_mount_options *);
+static int parse_options(char *, struct befs_mount_options *);
static const struct super_operations befs_sops = {
.alloc_inode = befs_alloc_inode, /* allocate a new inode */
@@ -304,9 +304,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
{
struct buffer_head *bh = NULL;
befs_inode *raw_inode = NULL;
-
- befs_sb_info *befs_sb = BEFS_SB(sb);
- befs_inode_info *befs_ino = NULL;
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_inode_info *befs_ino = NULL;
struct inode *inode;
long ret = -EIO;
@@ -472,7 +471,7 @@ static void *
befs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct super_block *sb = dentry->d_sb;
- befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+ struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
char *link;
@@ -502,7 +501,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
static void *
befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+ struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+
nd_set_link(nd, befs_ino->i_data.symlink);
return NULL;
}
@@ -669,7 +669,7 @@ static const match_table_t befs_tokens = {
};
static int
-parse_options(char *options, befs_mount_options * opts)
+parse_options(char *options, struct befs_mount_options *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -769,7 +769,7 @@ static int
befs_fill_super(struct super_block *sb, void *data, int silent)
{
struct buffer_head *bh;
- befs_sb_info *befs_sb;
+ struct befs_sb_info *befs_sb;
befs_super_block *disk_sb;
struct inode *root;
long ret = -EINVAL;
diff --git a/fs/befs/super.c b/fs/befs/super.c
index ca40f828f64d..aeafc4d84278 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -24,7 +24,7 @@
int
befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
{
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
/* Check the byte order of the filesystem */
if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
@@ -59,7 +59,7 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
int
befs_check_sb(struct super_block *sb)
{
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
/* Check magic headers of super block */
if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1)
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 08063ae0a17c..3ec6113146c0 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -86,7 +86,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode = new_inode(s);
if (!inode)
- return -ENOSPC;
+ return -ENOMEM;
mutex_lock(&info->bfs_lock);
ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
if (ino > info->si_lasti) {
@@ -153,7 +153,7 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
static int bfs_link(struct dentry *old, struct inode *dir,
struct dentry *new)
{
- struct inode *inode = old->d_inode;
+ struct inode *inode = d_inode(old);
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
int err;
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
static int bfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error = -ENOENT;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct buffer_head *bh;
struct bfs_dirent *de;
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -216,7 +216,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
int error = -ENOENT;
old_bh = new_bh = NULL;
- old_inode = old_dentry->d_inode;
+ old_inode = d_inode(old_dentry);
if (S_ISDIR(old_inode->i_mode))
return -EINVAL;
@@ -231,7 +231,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
error = -EPERM;
- new_inode = new_dentry->d_inode;
+ new_inode = d_inode(new_dentry);
new_bh = bfs_find_entry(new_dir,
new_dentry->d_name.name,
new_dentry->d_name.len, &new_de);
@@ -293,7 +293,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
for (block = sblock; block <= eblock; block++) {
bh = sb_bread(dir->i_sb, block);
if (!bh)
- return -ENOSPC;
+ return -EIO;
for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) {
de = (struct bfs_dirent *)(bh->b_data + off);
if (!de->ino) {
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index e7f88ace1a25..97f1b5160155 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,9 +23,7 @@
const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
#include "bfs.h"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf-randomize.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
- /* Memory randomization might have been switched off
- * in runtime via sysctl or explicit setting of
- * personality flags.
- * If that is the case, retain the original non-zero
- * load_bias value in order to establish proper
- * non-randomized mappings.
- */
+ load_bias = ELF_ET_DYN_BASE - vaddr;
if (current->flags & PF_RANDOMIZE)
- load_bias = 0;
- else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#endif
+ load_bias += arch_mmap_rnd();
+ load_bias = ELF_PAGESTART(load_bias);
+ total_size = total_mapping_size(elf_phdata,
+ loc->elf_ex.e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_free_dentry;
+ }
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
-#ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
-#ifdef CONFIG_COMPAT_BRK
+#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
-#endif
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 97aff2879cda..78f005f37847 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -521,9 +522,8 @@ static int parse_command(const char __user *buffer, size_t count)
static void entry_status(Node *e, char *page)
{
- char *dp;
- char *status = "disabled";
- const char *flags = "flags: ";
+ char *dp = page;
+ const char *status = "disabled";
if (test_bit(Enabled, &e->flags))
status = "enabled";
@@ -533,12 +533,10 @@ static void entry_status(Node *e, char *page)
return;
}
- sprintf(page, "%s\ninterpreter %s\n", status, e->interpreter);
- dp = page + strlen(page);
+ dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
/* print the special flags */
- sprintf(dp, "%s", flags);
- dp += strlen(flags);
+ dp += sprintf(dp, "flags: ");
if (e->flags & MISC_FMT_PRESERVE_ARGV0)
*dp++ = 'P';
if (e->flags & MISC_FMT_OPEN_BINARY)
@@ -550,21 +548,11 @@ static void entry_status(Node *e, char *page)
if (!test_bit(Magic, &e->flags)) {
sprintf(dp, "extension .%s\n", e->magic);
} else {
- int i;
-
- sprintf(dp, "offset %i\nmagic ", e->offset);
- dp = page + strlen(page);
- for (i = 0; i < e->size; i++) {
- sprintf(dp, "%02x", 0xff & (int) (e->magic[i]));
- dp += 2;
- }
+ dp += sprintf(dp, "offset %i\nmagic ", e->offset);
+ dp = bin2hex(dp, e->magic, e->size);
if (e->mask) {
- sprintf(dp, "\nmask ");
- dp += 6;
- for (i = 0; i < e->size; i++) {
- sprintf(dp, "%02x", 0xff & (int) (e->mask[i]));
- dp += 2;
- }
+ dp += sprintf(dp, "\nmask ");
+ dp = bin2hex(dp, e->mask, e->size);
}
*dp++ = '\n';
*dp = '\0';
@@ -603,7 +591,7 @@ static void kill_node(Node *e)
write_unlock(&entries_lock);
if (dentry) {
- drop_nlink(dentry->d_inode);
+ drop_nlink(d_inode(dentry));
d_drop(dentry);
dput(dentry);
simple_release_fs(&bm_mnt, &entry_count);
@@ -650,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
case 3:
/* Delete this handler. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
+ mutex_lock(&d_inode(root)->i_mutex);
kill_node(e);
- mutex_unlock(&root->d_inode->i_mutex);
+ mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
break;
default:
@@ -687,14 +675,14 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
return PTR_ERR(e);
root = dget(sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
+ mutex_lock(&d_inode(root)->i_mutex);
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out;
err = -EEXIST;
- if (dentry->d_inode)
+ if (d_really_is_positive(dentry))
goto out2;
inode = bm_get_inode(sb, S_IFREG | 0644);
@@ -723,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
out2:
dput(dentry);
out:
- mutex_unlock(&root->d_inode->i_mutex);
+ mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
if (err) {
@@ -766,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
case 3:
/* Delete all handlers. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&root->d_inode->i_mutex);
+ mutex_lock(&d_inode(root)->i_mutex);
while (!list_empty(&entries))
kill_node(list_entry(entries.next, Node, list));
- mutex_unlock(&root->d_inode->i_mutex);
+ mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
break;
default:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..c7e4163ede87 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -147,15 +146,14 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
}
static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
- offset, blkdev_get_block,
- NULL, NULL, 0);
+ return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
+ blkdev_get_block, NULL, NULL,
+ DIO_SKIP_DIO_COUNT);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1598,9 +1596,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = file->f_mapping->host;
+ loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
ssize_t ret;
+ if (bdev_read_only(I_BDEV(bd_inode)))
+ return -EPERM;
+
+ if (!iov_iter_count(from))
+ return 0;
+
+ if (iocb->ki_pos >= size)
+ return -ENOSPC;
+
+ iov_iter_truncate(from, size - iocb->ki_pos);
+
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0) {
@@ -1660,8 +1671,6 @@ const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.mmap = generic_file_mmap,
@@ -1708,7 +1717,7 @@ struct block_device *lookup_bdev(const char *pathname)
if (error)
return ERR_PTR(error);
- inode = path.dentry->d_inode;
+ inode = d_backing_inode(path.dentry);
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))
goto fail;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 4dabeb893b7c..df9932b00d08 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
int thresh)
{
struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -132,7 +132,7 @@ static inline void
__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
- int flags,
+ unsigned int flags,
int max_active,
int thresh)
{
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index e386c29ef1f6..ec2ee477f8ba 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
- int flags,
+ unsigned int flags,
int max_active,
int thresh);
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f55721ff9385..9de772ee0031 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
struct ulist *roots = NULL;
struct ulist_iterator uiter;
struct ulist_node *node;
- struct seq_list elem = {};
+ struct seq_list elem = SEQ_LIST_INIT(elem);
int ret = 0;
tmp = ulist_alloc(GFP_NOFS);
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
- struct seq_list tree_mod_seq_elem = {};
+ struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index de5e4f2adfea..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,7 +66,11 @@ struct btrfs_inode {
*/
struct btrfs_key location;
- /* Lock for counters */
+ /*
+ * Lock for counters and all fields used to determine if the inode is in
+ * the log or not (last_trans, last_sub_trans, last_log_commit,
+ * logged_trans).
+ */
spinlock_t lock;
/* the extent_tree has caches of all the extent mappings to disk */
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
+ int ret = 0;
+
+ spin_lock(&BTRFS_I(inode)->lock);
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <=
BTRFS_I(inode)->last_log_commit &&
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
*/
smp_mb();
if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
- return 1;
+ ret = 1;
}
- return 0;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ return ret;
}
#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d897ef803b3b..ce7dec88f4b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector,
dev_bytenr, bio->bi_bdev);
- mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
- GFP_NOFS);
+ mapped_datav = kmalloc_array(bio->bi_vcnt,
+ sizeof(*mapped_datav), GFP_NOFS);
if (!mapped_datav)
goto leave;
cur_bytenr = dev_bytenr;
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root,
mutex_unlock(&btrfsic_mutex);
- if (is_vmalloc_addr(state))
- vfree(state);
- else
- kfree(state);
+ kvfree(state);
}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e9df8862012c..ce62324c78e7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->orig_bio = bio;
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
- cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
+ cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS);
if (!cb->compressed_pages)
goto fail1;
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
-static struct btrfs_compress_op *btrfs_compress_op[] = {
+static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zlib_compress,
&btrfs_lzo_compress,
};
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d181f70caae0..13a4dc0436c9 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -77,7 +77,7 @@ struct btrfs_compress_op {
size_t srclen, size_t destlen);
};
-extern struct btrfs_compress_op btrfs_zlib_compress;
-extern struct btrfs_compress_op btrfs_lzo_compress;
+extern const struct btrfs_compress_op btrfs_zlib_compress;
+extern const struct btrfs_compress_op btrfs_lzo_compress;
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6d67f32e648d..0f11ebc92f02 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
if (!tree_mod_need_log(fs_info, eb))
return 0;
- tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
if (!tm_list)
return -ENOMEM;
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (log_removal && btrfs_header_level(old_root) > 0) {
nritems = btrfs_header_nritems(old_root);
- tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
flags);
if (!tm_list) {
ret = -ENOMEM;
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
return 0;
- tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
GFP_NOFS);
if (!tm_list)
return -ENOMEM;
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
return 0;
nritems = btrfs_header_nritems(eb);
- tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
- GFP_NOFS);
+ tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
if (!tm_list)
return -ENOMEM;
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1);
BUG_ON(ret); /* -ENOMEM */
}
- clean_tree_block(trans, root, buf);
+ clean_tree_block(trans, root->fs_info, buf);
*last_ref = 1;
}
return 0;
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
continue;
}
- cur = btrfs_find_tree_block(root, blocknr);
+ cur = btrfs_find_tree_block(root->fs_info, blocknr);
if (cur)
uptodate = btrfs_buffer_uptodate(cur, gen, 0);
else
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
path->locks[level] = 0;
path->nodes[level] = NULL;
- clean_tree_block(trans, root, mid);
+ clean_tree_block(trans, root->fs_info, mid);
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
- clean_tree_block(trans, root, right);
+ clean_tree_block(trans, root->fs_info, right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BUG_ON(wret == 1);
}
if (btrfs_header_nritems(mid) == 0) {
- clean_tree_block(trans, root, mid);
+ clean_tree_block(trans, root->fs_info, mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root,
search = btrfs_node_blockptr(node, slot);
blocksize = root->nodesize;
- eb = btrfs_find_tree_block(root, search);
+ eb = btrfs_find_tree_block(root->fs_info, search);
if (eb) {
free_extent_buffer(eb);
return;
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
- eb = btrfs_find_tree_block(root, block1);
+ eb = btrfs_find_tree_block(root->fs_info, block1);
/*
* if we get -eagain from btrfs_buffer_uptodate, we
* don't want to return eagain here. That will loop
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
if (slot + 1 < nritems) {
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
- eb = btrfs_find_tree_block(root, block2);
+ eb = btrfs_find_tree_block(root->fs_info, block2);
if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block2 = 0;
free_extent_buffer(eb);
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
blocknr = btrfs_node_blockptr(b, slot);
gen = btrfs_node_ptr_generation(b, slot);
- tmp = btrfs_find_tree_block(root, blocknr);
+ tmp = btrfs_find_tree_block(root->fs_info, blocknr);
if (tmp) {
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
@@ -3126,7 +3125,8 @@ again:
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
{
int i;
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
if (!path->nodes[i])
break;
t = path->nodes[i];
- tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
+ tree_mod_log_set_node_key(fs_info, t, tslot, 1);
btrfs_set_node_key(t, key, tslot);
btrfs_mark_buffer_dirty(path->nodes[i]);
if (tslot != 0)
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
btrfs_set_item_key(eb, &disk_key, slot);
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(fs_info, path, &disk_key, 1);
}
/*
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (left_nritems)
btrfs_mark_buffer_dirty(left);
else
- clean_tree_block(trans, root, left);
+ clean_tree_block(trans, root->fs_info, left);
btrfs_mark_buffer_dirty(right);
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (path->slots[0] >= left_nritems) {
path->slots[0] -= left_nritems;
if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, root, path->nodes[0]);
+ clean_tree_block(trans, root->fs_info, path->nodes[0]);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
if (right_nritems)
btrfs_mark_buffer_dirty(right);
else
- clean_tree_block(trans, root, right);
+ clean_tree_block(trans, root->fs_info, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(root->fs_info, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int mid;
int slot;
struct extent_buffer *right;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
int wret;
int split;
@@ -4271,10 +4273,10 @@ again:
btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
- write_extent_buffer(right, root->fs_info->fsid,
+ write_extent_buffer(right, fs_info->fsid,
btrfs_header_fsid(), BTRFS_FSID_SIZE);
- write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+ write_extent_buffer(right, fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(right),
BTRFS_UUID_SIZE);
@@ -4297,7 +4299,7 @@ again:
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(fs_info, path, &disk_key, 1);
}
btrfs_mark_buffer_dirty(right);
return ret;
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(root->fs_info, path, &disk_key, 1);
}
item = btrfs_item_nr(slot);
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (path->slots[0] == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(root->fs_info, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(root, path, &disk_key, level + 1);
+ fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
}
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
- clean_tree_block(trans, root, leaf);
+ clean_tree_block(trans, root->fs_info, leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
} else {
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(root, path, &disk_key, 1);
+ fixup_low_keys(root->fs_info, path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f9c89cae39ee..6f364e1d8d3d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+static inline u64 btrfs_qgroup_level(u64 qgroupid)
+{
+ return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
/*
* is subvolume quota turned on?
*/
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control {
atomic_t count;
};
+struct btrfs_io_ctl {
+ void *cur, *orig;
+ struct page *page;
+ struct page **pages;
+ struct btrfs_root *root;
+ struct inode *inode;
+ unsigned long size;
+ int index;
+ int num_pages;
+ int entries;
+ int bitmaps;
+ unsigned check_crcs:1;
+};
+
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache {
/* For dirty block groups */
struct list_head dirty_list;
+ struct list_head io_list;
+
+ struct btrfs_io_ctl io_ctl;
};
/* delayed seq elem */
@@ -1329,6 +1352,8 @@ struct seq_list {
u64 seq;
};
+#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
+
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info {
struct mutex chunk_mutex;
struct mutex volume_mutex;
+ /*
+ * this is taken to make sure we don't set block groups ro after
+ * the free space cache has been allocated on them
+ */
+ struct mutex ro_block_group_mutex;
+
/* this is used during read/modify/write to make sure
* no two ios are trying to mod the same stripe at the same
* time
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info {
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
+ struct rw_semaphore delayed_iput_sem;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
}
/* extent-tree.c */
+
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
+
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
@@ -3385,6 +3420,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset, int no_quota);
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@@ -3417,7 +3454,7 @@ enum btrfs_reserve_flush_enum {
BTRFS_RESERVE_FLUSH_ALL,
};
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -3440,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
@@ -3486,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root,
int type);
int btrfs_previous_extent_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid);
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -4180,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
- (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+ ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
+ !btrfs_qgroup_level(rootid)))
return 1;
return 0;
}
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 82f0c7c95474..a2ae42720a6a 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1383,7 +1383,7 @@ out:
static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
- struct btrfs_root *root, int nr)
+ struct btrfs_fs_info *fs_info, int nr)
{
struct btrfs_async_delayed_work *async_work;
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
btrfs_async_run_delayed_root, NULL, NULL);
async_work->nr = nr;
- btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
+ btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
return 0;
}
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
delayed_root = btrfs_get_delayed_root(root);
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
seq = atomic_read(&delayed_root->items_seq);
- ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
+ ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
if (ret)
return;
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
return;
}
- btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
+ btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}
/* Will return 0 or -ENOMEM */
@@ -1801,6 +1802,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+ BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
+
inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6d16bea94e1c..8f8ed7d20bac 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
* existing and update must have the same bytenr
*/
static noinline void
-update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update)
{
struct btrfs_delayed_ref_head *existing_ref;
struct btrfs_delayed_ref_head *ref;
+ int old_ref_mod;
existing_ref = btrfs_delayed_node_to_head(existing);
ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* only need the lock for this case cause we could be processing it
* currently, for refs we just added we know we're a-ok.
*/
+ old_ref_mod = existing_ref->total_ref_mod;
existing->ref_mod += update->ref_mod;
+ existing_ref->total_ref_mod += update->ref_mod;
+
+ /*
+ * If we are going to from a positive ref mod to a negative or vice
+ * versa we need to make sure to adjust pending_csums accordingly.
+ */
+ if (existing_ref->is_data) {
+ if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
+ delayed_refs->pending_csums -= existing->num_bytes;
+ if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
+ delayed_refs->pending_csums += existing->num_bytes;
+ }
spin_unlock(&existing_ref->lock);
}
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref->is_data = is_data;
head_ref->ref_root = RB_ROOT;
head_ref->processing = 0;
+ head_ref->total_ref_mod = count_mod;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
- update_existing_head_ref(&existing->node, ref);
+ update_existing_head_ref(delayed_refs, &existing->node, ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
head_ref = existing;
} else {
+ if (is_data && count_mod < 0)
+ delayed_refs->pending_csums += num_bytes;
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a764e2340d48..5eb0892396d0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
+
+ /*
+ * This is used to track the final ref_mod from all the refs associated
+ * with this head ref, this is not adjusted as delayed refs are run,
+ * this is meant to track if we need to do the csum accounting or not.
+ */
+ int total_ref_mod;
+
/*
* when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;
+ u64 pending_csums;
+
/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5ec03d999c37..0573848c7333 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
srcdev = dev_replace->srcdev;
- args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
- div64_u64(btrfs_device_get_total_bytes(srcdev), 1000));
+ args->status.progress_1000 = div_u64(dev_replace->cursor_left,
+ div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
btrfs_dev_replace_unlock(dev_replace);
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data)
btrfs_dev_replace_status(fs_info, status_args);
progress = status_args->status.progress_1000;
kfree(status_args);
- do_div(progress, 10);
+ progress = div_u64(progress, 10);
printk_in_rcu(KERN_INFO
"BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
dev_replace->srcdev->missing ? "<missing disk>" :
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 639f2663ed3f..2ef9a4b72d06 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -54,7 +54,7 @@
#include <asm/cpufeature.h>
#endif
-static struct extent_io_ops btree_extent_io_ops;
+static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
* compute the csum for a btree block, and either verify it or write it
* into the csum field of the block.
*/
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *buf,
int verify)
{
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
char *result = NULL;
unsigned long len;
unsigned long cur_len;
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
offset += cur_len;
}
if (csum_size > sizeof(inline_result)) {
- result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
+ result = kzalloc(csum_size, GFP_NOFS);
if (!result)
return 1;
} else {
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
printk_ratelimited(KERN_WARNING
"BTRFS: %s checksum verify failed on %llu wanted %X found %X "
"level %d\n",
- root->fs_info->sb->s_id, buf->start,
+ fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
if (memcmp(raw_disk_sb, result, csum_size))
ret = 1;
-
- if (ret && btrfs_super_generation(disk_sb) < 10) {
- printk(KERN_WARNING
- "BTRFS: super block crcs don't match, older mkfs detected\n");
- ret = 0;
- }
}
if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
* we only fill in the checksum field in the first page of a multi-page block
*/
-static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
{
u64 start = page_offset(page);
u64 found_start;
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
found_start = btrfs_header_bytenr(eb);
if (WARN_ON(found_start != start || !PageUptodate(page)))
return 0;
- csum_tree_block(root, eb, 0);
+ csum_tree_block(fs_info, eb, 0);
return 0;
}
-static int check_tree_block_fsid(struct btrfs_root *root,
+static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
{
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
ret = -EIO;
goto err;
}
- if (check_tree_block_fsid(root, eb)) {
+ if (check_tree_block_fsid(root->fs_info, eb)) {
printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
eb->fs_info->sb->s_id, eb->start);
ret = -EIO;
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
- ret = csum_tree_block(root, eb, 1);
+ ret = csum_tree_block(root->fs_info, eb, 1);
if (ret) {
ret = -EIO;
goto err;
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
root = BTRFS_I(bvec->bv_page->mapping->host)->root;
- ret = csum_dirty_buffer(root, bvec->bv_page);
+ ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
if (ret)
break;
}
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
return 0;
}
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr)
{
- return find_extent_buffer(root->fs_info, bytenr);
+ return find_extent_buffer(fs_info, bytenr);
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
}
-void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+void clean_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info,
struct extent_buffer *buf)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
-
if (btrfs_header_generation(buf) ==
fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
}
}
+static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
+{
+ mutex_init(&fs_info->scrub_lock);
+ atomic_set(&fs_info->scrubs_running, 0);
+ atomic_set(&fs_info->scrub_pause_req, 0);
+ atomic_set(&fs_info->scrubs_paused, 0);
+ atomic_set(&fs_info->scrub_cancel_req, 0);
+ init_waitqueue_head(&fs_info->scrub_pause_wait);
+ fs_info->scrub_workers_refcnt = 0;
+}
+
+static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
+{
+ spin_lock_init(&fs_info->balance_lock);
+ mutex_init(&fs_info->balance_mutex);
+ atomic_set(&fs_info->balance_running, 0);
+ atomic_set(&fs_info->balance_pause_req, 0);
+ atomic_set(&fs_info->balance_cancel_req, 0);
+ fs_info->balance_ctl = NULL;
+ init_waitqueue_head(&fs_info->balance_wait_q);
+}
+
+static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *tree_root)
+{
+ fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+ set_nlink(fs_info->btree_inode, 1);
+ /*
+ * we set the i_size on the btree inode to the max possible int.
+ * the real end of the address space is determined by all of
+ * the devices in the system
+ */
+ fs_info->btree_inode->i_size = OFFSET_MAX;
+ fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
+
+ RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
+ extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
+ fs_info->btree_inode->i_mapping);
+ BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
+ extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+
+ BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
+
+ BTRFS_I(fs_info->btree_inode)->root = tree_root;
+ memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+ sizeof(struct btrfs_key));
+ set_bit(BTRFS_INODE_DUMMY,
+ &BTRFS_I(fs_info->btree_inode)->runtime_flags);
+ btrfs_insert_inode_hash(fs_info->btree_inode);
+}
+
+static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
+{
+ fs_info->dev_replace.lock_owner = 0;
+ atomic_set(&fs_info->dev_replace.nesting_level, 0);
+ mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+ mutex_init(&fs_info->dev_replace.lock_management_lock);
+ mutex_init(&fs_info->dev_replace.lock);
+ init_waitqueue_head(&fs_info->replace_wait);
+}
+
+static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
+{
+ spin_lock_init(&fs_info->qgroup_lock);
+ mutex_init(&fs_info->qgroup_ioctl_lock);
+ fs_info->qgroup_tree = RB_ROOT;
+ fs_info->qgroup_op_tree = RB_ROOT;
+ INIT_LIST_HEAD(&fs_info->dirty_qgroups);
+ fs_info->qgroup_seq = 1;
+ fs_info->quota_enabled = 0;
+ fs_info->pending_quota_state = 0;
+ fs_info->qgroup_ulist = NULL;
+ mutex_init(&fs_info->qgroup_rescan_lock);
+}
+
+static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
+ struct btrfs_fs_devices *fs_devices)
+{
+ int max_active = fs_info->thread_pool_size;
+ unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
+
+ fs_info->workers =
+ btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
+ max_active, 16);
+
+ fs_info->delalloc_workers =
+ btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+
+ fs_info->flush_workers =
+ btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+
+ fs_info->caching_workers =
+ btrfs_alloc_workqueue("cache", flags, max_active, 0);
+
+ /*
+ * a higher idle thresh on the submit workers makes it much more
+ * likely that bios will be send down in a sane order to the
+ * devices
+ */
+ fs_info->submit_workers =
+ btrfs_alloc_workqueue("submit", flags,
+ min_t(u64, fs_devices->num_devices,
+ max_active), 64);
+
+ fs_info->fixup_workers =
+ btrfs_alloc_workqueue("fixup", flags, 1, 0);
+
+ /*
+ * endios are largely parallel and should have a very
+ * low idle thresh
+ */
+ fs_info->endio_workers =
+ btrfs_alloc_workqueue("endio", flags, max_active, 4);
+ fs_info->endio_meta_workers =
+ btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+ fs_info->endio_meta_write_workers =
+ btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+ fs_info->endio_raid56_workers =
+ btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+ fs_info->endio_repair_workers =
+ btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+ fs_info->rmw_workers =
+ btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+ fs_info->endio_write_workers =
+ btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+ fs_info->endio_freespace_worker =
+ btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+ fs_info->delayed_workers =
+ btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+ fs_info->readahead_workers =
+ btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+ fs_info->qgroup_rescan_workers =
+ btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+ fs_info->extent_workers =
+ btrfs_alloc_workqueue("extent-refs", flags,
+ min_t(u64, fs_devices->num_devices,
+ max_active), 8);
+
+ if (!(fs_info->workers && fs_info->delalloc_workers &&
+ fs_info->submit_workers && fs_info->flush_workers &&
+ fs_info->endio_workers && fs_info->endio_meta_workers &&
+ fs_info->endio_meta_write_workers &&
+ fs_info->endio_repair_workers &&
+ fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
+ fs_info->endio_freespace_worker && fs_info->rmw_workers &&
+ fs_info->caching_workers && fs_info->readahead_workers &&
+ fs_info->fixup_workers && fs_info->delayed_workers &&
+ fs_info->extent_workers &&
+ fs_info->qgroup_rescan_workers)) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+ struct btrfs_fs_devices *fs_devices)
+{
+ int ret;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *log_tree_root;
+ struct btrfs_super_block *disk_super = fs_info->super_copy;
+ u64 bytenr = btrfs_super_log_root(disk_super);
+
+ if (fs_devices->rw_devices == 0) {
+ printk(KERN_WARNING "BTRFS: log replay required "
+ "on RO media\n");
+ return -EIO;
+ }
+
+ log_tree_root = btrfs_alloc_root(fs_info);
+ if (!log_tree_root)
+ return -ENOMEM;
+
+ __setup_root(tree_root->nodesize, tree_root->sectorsize,
+ tree_root->stripesize, log_tree_root, fs_info,
+ BTRFS_TREE_LOG_OBJECTID);
+
+ log_tree_root->node = read_tree_block(tree_root, bytenr,
+ fs_info->generation + 1);
+ if (!log_tree_root->node ||
+ !extent_buffer_uptodate(log_tree_root->node)) {
+ printk(KERN_ERR "BTRFS: failed to read log tree\n");
+ free_extent_buffer(log_tree_root->node);
+ kfree(log_tree_root);
+ return -EIO;
+ }
+ /* returns with log_tree_root freed on success */
+ ret = btrfs_recover_log_trees(log_tree_root);
+ if (ret) {
+ btrfs_error(tree_root->fs_info, ret,
+ "Failed to recover log tree");
+ free_extent_buffer(log_tree_root->node);
+ kfree(log_tree_root);
+ return ret;
+ }
+
+ if (fs_info->sb->s_flags & MS_RDONLY) {
+ ret = btrfs_commit_super(tree_root);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *tree_root)
+{
+ struct btrfs_root *root;
+ struct btrfs_key location;
+ int ret;
+
+ location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ location.type = BTRFS_ROOT_ITEM_KEY;
+ location.offset = 0;
+
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->extent_root = root;
+
+ location.objectid = BTRFS_DEV_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->dev_root = root;
+ btrfs_init_devices_late(fs_info);
+
+ location.objectid = BTRFS_CSUM_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->csum_root = root;
+
+ location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (!IS_ERR(root)) {
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->quota_enabled = 1;
+ fs_info->pending_quota_state = 1;
+ fs_info->quota_root = root;
+ }
+
+ location.objectid = BTRFS_UUID_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ if (ret != -ENOENT)
+ return ret;
+ } else {
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->uuid_root = root;
+ }
+
+ return 0;
+}
+
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options)
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb,
struct btrfs_super_block *disk_super;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *tree_root;
- struct btrfs_root *extent_root;
- struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
- struct btrfs_root *dev_root;
- struct btrfs_root *quota_root;
- struct btrfs_root *uuid_root;
- struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
int num_backups_tried = 0;
int backup_index = 0;
int max_active;
- int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
- bool create_uuid_tree;
- bool check_uuid_tree;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
- mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock);
+ mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
seqlock_init(&fs_info->profiles_lock);
+ init_rwsem(&fs_info->delayed_iput_sem);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
- fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
+ fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb,
}
btrfs_init_delayed_root(fs_info->delayed_root);
- mutex_init(&fs_info->scrub_lock);
- atomic_set(&fs_info->scrubs_running, 0);
- atomic_set(&fs_info->scrub_pause_req, 0);
- atomic_set(&fs_info->scrubs_paused, 0);
- atomic_set(&fs_info->scrub_cancel_req, 0);
- init_waitqueue_head(&fs_info->replace_wait);
- init_waitqueue_head(&fs_info->scrub_pause_wait);
- fs_info->scrub_workers_refcnt = 0;
+ btrfs_init_scrub(fs_info);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
fs_info->check_integrity_print_mask = 0;
#endif
-
- spin_lock_init(&fs_info->balance_lock);
- mutex_init(&fs_info->balance_mutex);
- atomic_set(&fs_info->balance_running, 0);
- atomic_set(&fs_info->balance_pause_req, 0);
- atomic_set(&fs_info->balance_cancel_req, 0);
- fs_info->balance_ctl = NULL;
- init_waitqueue_head(&fs_info->balance_wait_q);
+ btrfs_init_balance(fs_info);
btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
sb->s_bdi = &fs_info->bdi;
- fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
- set_nlink(fs_info->btree_inode, 1);
- /*
- * we set the i_size on the btree inode to the max possible int.
- * the real end of the address space is determined by all of
- * the devices in the system
- */
- fs_info->btree_inode->i_size = OFFSET_MAX;
- fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
-
- RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
- extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
- fs_info->btree_inode->i_mapping);
- BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
- extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
-
- BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
-
- BTRFS_I(fs_info->btree_inode)->root = tree_root;
- memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
- sizeof(struct btrfs_key));
- set_bit(BTRFS_INODE_DUMMY,
- &BTRFS_I(fs_info->btree_inode)->runtime_flags);
- btrfs_insert_inode_hash(fs_info->btree_inode);
+ btrfs_init_btree_inode(fs_info, tree_root);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
+ mutex_init(&fs_info->ro_block_group_mutex);
init_rwsem(&fs_info->commit_root_sem);
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
sema_init(&fs_info->uuid_tree_rescan_sem, 1);
- fs_info->dev_replace.lock_owner = 0;
- atomic_set(&fs_info->dev_replace.nesting_level, 0);
- mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
- mutex_init(&fs_info->dev_replace.lock_management_lock);
- mutex_init(&fs_info->dev_replace.lock);
- spin_lock_init(&fs_info->qgroup_lock);
- mutex_init(&fs_info->qgroup_ioctl_lock);
- fs_info->qgroup_tree = RB_ROOT;
- fs_info->qgroup_op_tree = RB_ROOT;
- INIT_LIST_HEAD(&fs_info->dirty_qgroups);
- fs_info->qgroup_seq = 1;
- fs_info->quota_enabled = 0;
- fs_info->pending_quota_state = 0;
- fs_info->qgroup_ulist = NULL;
- mutex_init(&fs_info->qgroup_rescan_lock);
+ btrfs_init_dev_replace_locks(fs_info);
+ btrfs_init_qgroup(fs_info);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb,
max_active = fs_info->thread_pool_size;
- fs_info->workers =
- btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
- max_active, 16);
-
- fs_info->delalloc_workers =
- btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
-
- fs_info->flush_workers =
- btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
-
- fs_info->caching_workers =
- btrfs_alloc_workqueue("cache", flags, max_active, 0);
-
- /*
- * a higher idle thresh on the submit workers makes it much more
- * likely that bios will be send down in a sane order to the
- * devices
- */
- fs_info->submit_workers =
- btrfs_alloc_workqueue("submit", flags,
- min_t(u64, fs_devices->num_devices,
- max_active), 64);
-
- fs_info->fixup_workers =
- btrfs_alloc_workqueue("fixup", flags, 1, 0);
-
- /*
- * endios are largely parallel and should have a very
- * low idle thresh
- */
- fs_info->endio_workers =
- btrfs_alloc_workqueue("endio", flags, max_active, 4);
- fs_info->endio_meta_workers =
- btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
- fs_info->endio_meta_write_workers =
- btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
- fs_info->endio_raid56_workers =
- btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
- fs_info->endio_repair_workers =
- btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
- fs_info->rmw_workers =
- btrfs_alloc_workqueue("rmw", flags, max_active, 2);
- fs_info->endio_write_workers =
- btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
- fs_info->endio_freespace_worker =
- btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
- fs_info->delayed_workers =
- btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
- fs_info->readahead_workers =
- btrfs_alloc_workqueue("readahead", flags, max_active, 2);
- fs_info->qgroup_rescan_workers =
- btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
- fs_info->extent_workers =
- btrfs_alloc_workqueue("extent-refs", flags,
- min_t(u64, fs_devices->num_devices,
- max_active), 8);
-
- if (!(fs_info->workers && fs_info->delalloc_workers &&
- fs_info->submit_workers && fs_info->flush_workers &&
- fs_info->endio_workers && fs_info->endio_meta_workers &&
- fs_info->endio_meta_write_workers &&
- fs_info->endio_repair_workers &&
- fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
- fs_info->endio_freespace_worker && fs_info->rmw_workers &&
- fs_info->caching_workers && fs_info->readahead_workers &&
- fs_info->fixup_workers && fs_info->delayed_workers &&
- fs_info->extent_workers &&
- fs_info->qgroup_rescan_workers)) {
- err = -ENOMEM;
+ ret = btrfs_init_workqueues(fs_info, fs_devices);
+ if (ret) {
+ err = ret;
goto fail_sb_buffer;
}
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb,
* keep the device that is marked to be the target device for the
* dev_replace procedure
*/
- btrfs_close_extra_devices(fs_info, fs_devices, 0);
+ btrfs_close_extra_devices(fs_devices, 0);
if (!fs_devices->latest_bdev) {
printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2846,9 @@ retry_root_backup:
tree_root->commit_root = btrfs_root_node(tree_root);
btrfs_set_root_refs(&tree_root->root_item, 1);
- location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = 0;
-
- extent_root = btrfs_read_tree_root(tree_root, &location);
- if (IS_ERR(extent_root)) {
- ret = PTR_ERR(extent_root);
- goto recovery_tree_root;
- }
- set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
- fs_info->extent_root = extent_root;
-
- location.objectid = BTRFS_DEV_TREE_OBJECTID;
- dev_root = btrfs_read_tree_root(tree_root, &location);
- if (IS_ERR(dev_root)) {
- ret = PTR_ERR(dev_root);
- goto recovery_tree_root;
- }
- set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
- fs_info->dev_root = dev_root;
- btrfs_init_devices_late(fs_info);
-
- location.objectid = BTRFS_CSUM_TREE_OBJECTID;
- csum_root = btrfs_read_tree_root(tree_root, &location);
- if (IS_ERR(csum_root)) {
- ret = PTR_ERR(csum_root);
+ ret = btrfs_read_roots(fs_info, tree_root);
+ if (ret)
goto recovery_tree_root;
- }
- set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
- fs_info->csum_root = csum_root;
-
- location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
- quota_root = btrfs_read_tree_root(tree_root, &location);
- if (!IS_ERR(quota_root)) {
- set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
- fs_info->quota_enabled = 1;
- fs_info->pending_quota_state = 1;
- fs_info->quota_root = quota_root;
- }
-
- location.objectid = BTRFS_UUID_TREE_OBJECTID;
- uuid_root = btrfs_read_tree_root(tree_root, &location);
- if (IS_ERR(uuid_root)) {
- ret = PTR_ERR(uuid_root);
- if (ret != -ENOENT)
- goto recovery_tree_root;
- create_uuid_tree = true;
- check_uuid_tree = false;
- } else {
- set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
- fs_info->uuid_root = uuid_root;
- create_uuid_tree = false;
- check_uuid_tree =
- generation != btrfs_super_uuid_tree_generation(disk_super);
- }
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
@@ -2792,7 +2872,7 @@ retry_root_backup:
goto fail_block_groups;
}
- btrfs_close_extra_devices(fs_info, fs_devices, 1);
+ btrfs_close_extra_devices(fs_devices, 1);
ret = btrfs_sysfs_add_one(fs_info);
if (ret) {
@@ -2806,7 +2886,7 @@ retry_root_backup:
goto fail_sysfs;
}
- ret = btrfs_read_block_groups(extent_root);
+ ret = btrfs_read_block_groups(fs_info->extent_root);
if (ret) {
printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
goto fail_sysfs;
@@ -2864,48 +2944,11 @@ retry_root_backup:
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0) {
- u64 bytenr = btrfs_super_log_root(disk_super);
-
- if (fs_devices->rw_devices == 0) {
- printk(KERN_WARNING "BTRFS: log replay required "
- "on RO media\n");
- err = -EIO;
- goto fail_qgroup;
- }
-
- log_tree_root = btrfs_alloc_root(fs_info);
- if (!log_tree_root) {
- err = -ENOMEM;
- goto fail_qgroup;
- }
-
- __setup_root(nodesize, sectorsize, stripesize,
- log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
-
- log_tree_root->node = read_tree_block(tree_root, bytenr,
- generation + 1);
- if (!log_tree_root->node ||
- !extent_buffer_uptodate(log_tree_root->node)) {
- printk(KERN_ERR "BTRFS: failed to read log tree\n");
- free_extent_buffer(log_tree_root->node);
- kfree(log_tree_root);
- goto fail_qgroup;
- }
- /* returns with log_tree_root freed on success */
- ret = btrfs_recover_log_trees(log_tree_root);
+ ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
- btrfs_error(tree_root->fs_info, ret,
- "Failed to recover log tree");
- free_extent_buffer(log_tree_root->node);
- kfree(log_tree_root);
+ err = ret;
goto fail_qgroup;
}
-
- if (sb->s_flags & MS_RDONLY) {
- ret = btrfs_commit_super(tree_root);
- if (ret)
- goto fail_qgroup;
- }
}
ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3009,7 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
- if (create_uuid_tree) {
+ if (!fs_info->uuid_root) {
pr_info("BTRFS: creating UUID tree\n");
ret = btrfs_create_uuid_tree(fs_info);
if (ret) {
@@ -2975,8 +3018,9 @@ retry_root_backup:
close_ctree(tree_root);
return ret;
}
- } else if (check_uuid_tree ||
- btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
+ } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+ fs_info->generation !=
+ btrfs_super_uuid_tree_generation(disk_super)) {
pr_info("BTRFS: checking UUID tree\n");
ret = btrfs_check_uuid_tree(fs_info);
if (ret) {
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root)
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
- btrfs_err(root->fs_info, "commit super ret %d", ret);
+ btrfs_err(fs_info, "commit super ret %d", ret);
}
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root)
fs_info->closing = 2;
smp_mb();
- btrfs_free_qgroup_config(root->fs_info);
+ btrfs_free_qgroup_config(fs_info);
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
- btrfs_info(root->fs_info, "at unmount delalloc count %lld",
+ btrfs_info(fs_info, "at unmount delalloc count %lld",
percpu_counter_sum(&fs_info->delalloc_bytes));
}
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root)
btrfs_free_stripe_hash_table(fs_info);
- btrfs_free_block_rsv(root, root->orphan_block_rsv);
+ __btrfs_free_block_rsv(root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
lock_chunks(root);
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
- eb = btrfs_find_tree_block(root, start);
+ eb = btrfs_find_tree_block(root->fs_info, start);
start += root->nodesize;
if (!eb)
continue;
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
return 0;
}
-static struct extent_io_ops btree_extent_io_ops = {
+static const struct extent_io_ops btree_extent_io_ops = {
.readpage_end_io_hook = btree_readpage_end_io_hook,
.readpage_io_failed_hook = btree_io_failed_hook,
.submit_bio_hook = btree_submit_bio_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 27d44c0fd236..d4cbfeeeedd4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr);
void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf);
+ struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
struct btrfs_key *location);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 37d164540c3a..8d052209f473 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -152,7 +152,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
static struct dentry *btrfs_get_parent(struct dentry *child)
{
- struct inode *dir = child->d_inode;
+ struct inode *dir = d_inode(child);
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -220,8 +220,8 @@ fail:
static int btrfs_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
- struct inode *inode = child->d_inode;
- struct inode *dir = parent->d_inode;
+ struct inode *inode = d_inode(child);
+ struct inode *dir = d_inode(parent);
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode_ref *iref;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8b353ad02f03..0ec8e228b89f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* list before we release it.
*/
if (btrfs_delayed_ref_is_head(ref)) {
+ if (locked_ref->is_data &&
+ locked_ref->total_ref_mod < 0) {
+ spin_lock(&delayed_refs->lock);
+ delayed_refs->pending_csums -= ref->num_bytes;
+ spin_unlock(&delayed_refs->lock);
+ }
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
}
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
spin_lock(&delayed_refs->lock);
avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
- avg = div64_u64(avg, 4);
- fs_info->avg_delayed_ref_runtime = avg;
+ fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
spin_unlock(&delayed_refs->lock);
}
return 0;
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
* We don't ever fill up leaves all the way so multiply by 2 just to be
* closer to what we're really going to want to ouse.
*/
- return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+ return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+}
+
+/*
+ * Takes the number of bytes to be csumm'ed and figures out how many leaves it
+ * would require to store the csums for that many bytes.
+ */
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
+{
+ u64 csum_size;
+ u64 num_csums_per_leaf;
+ u64 num_csums;
+
+ csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+ num_csums_per_leaf = div64_u64(csum_size,
+ (u64)btrfs_super_csum_size(root->fs_info->super_copy));
+ num_csums = div64_u64(csum_bytes, root->sectorsize);
+ num_csums += num_csums_per_leaf - 1;
+ num_csums = div64_u64(num_csums, num_csums_per_leaf);
+ return num_csums;
}
int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
{
struct btrfs_block_rsv *global_rsv;
u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
- u64 num_bytes;
+ u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
+ u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
+ u64 num_bytes, num_dirty_bgs_bytes;
int ret = 0;
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
if (num_heads > 1)
num_bytes += (num_heads - 1) * root->nodesize;
num_bytes <<= 1;
+ num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
+ num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
+ num_dirty_bgs);
global_rsv = &root->fs_info->global_block_rsv;
/*
* If we can't allocate any more chunks lets make sure we have _lots_ of
* wiggle room since running delayed refs can create more delayed refs.
*/
- if (global_rsv->space_info->full)
+ if (global_rsv->space_info->full) {
+ num_dirty_bgs_bytes <<= 1;
num_bytes <<= 1;
+ }
spin_lock(&global_rsv->lock);
- if (global_rsv->reserved <= num_bytes)
+ if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
ret = 1;
spin_unlock(&global_rsv->lock);
return ret;
@@ -3147,8 +3178,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
fail:
+ btrfs_release_path(path);
if (ret)
btrfs_abort_transaction(trans, root, ret);
return ret;
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
struct inode *inode = NULL;
u64 alloc_hint = 0;
int dcs = BTRFS_DC_ERROR;
- int num_pages = 0;
+ u64 num_pages = 0;
int retries = 0;
int ret = 0;
@@ -3267,15 +3298,14 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
if (ret)
goto out_put;
}
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE) ||
- block_group->delalloc_bytes) {
+ !btrfs_test_opt(root, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
@@ -3293,14 +3323,14 @@ again:
* taking up quite a bit since it's not folded into the other space
* cache.
*/
- num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
+ num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
if (!num_pages)
num_pages = 1;
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
- ret = btrfs_check_data_free_space(inode, num_pages);
+ ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
if (ret)
goto out_put;
@@ -3351,16 +3381,166 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
return 0;
}
+/*
+ * transaction commit does final block group cache writeback during a
+ * critical section where nothing is allowed to change the FS. This is
+ * required in order for the cache to actually match the block group,
+ * but can introduce a lot of latency into the commit.
+ *
+ * So, btrfs_start_dirty_block_groups is here to kick off block group
+ * cache IO. There's a chance we'll have to redo some of it if the
+ * block group changes again during the commit, but it greatly reduces
+ * the commit latency by getting rid of the easy block groups while
+ * we're still allowing others to join the commit.
+ */
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_block_group_cache *cache;
+ struct btrfs_transaction *cur_trans = trans->transaction;
+ int ret = 0;
+ int should_put;
+ struct btrfs_path *path = NULL;
+ LIST_HEAD(dirty);
+ struct list_head *io = &cur_trans->io_bgs;
+ int num_started = 0;
+ int loops = 0;
+
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ if (list_empty(&cur_trans->dirty_bgs)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ return 0;
+ }
+ list_splice_init(&cur_trans->dirty_bgs, &dirty);
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+
+again:
+ /*
+ * make sure all the block groups on our dirty list actually
+ * exist
+ */
+ btrfs_create_pending_block_groups(trans, root);
+
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ }
+
+ /*
+ * cache_write_mutex is here only to save us from balance or automatic
+ * removal of empty block groups deleting this block group while we are
+ * writing out the cache
+ */
+ mutex_lock(&trans->transaction->cache_write_mutex);
+ while (!list_empty(&dirty)) {
+ cache = list_first_entry(&dirty,
+ struct btrfs_block_group_cache,
+ dirty_list);
+ /*
+ * this can happen if something re-dirties a block
+ * group that is already under IO. Just wait for it to
+ * finish and then do it all again
+ */
+ if (!list_empty(&cache->io_list)) {
+ list_del_init(&cache->io_list);
+ btrfs_wait_cache_io(root, trans, cache,
+ &cache->io_ctl, path,
+ cache->key.objectid);
+ btrfs_put_block_group(cache);
+ }
+
+
+ /*
+ * btrfs_wait_cache_io uses the cache->dirty_list to decide
+ * if it should update the cache_state. Don't delete
+ * until after we wait.
+ *
+ * Since we're not running in the commit critical section
+ * we need the dirty_bgs_lock to protect from update_block_group
+ */
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ list_del_init(&cache->dirty_list);
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+
+ should_put = 1;
+
+ cache_save_setup(cache, trans, path);
+
+ if (cache->disk_cache_state == BTRFS_DC_SETUP) {
+ cache->io_ctl.inode = NULL;
+ ret = btrfs_write_out_cache(root, trans, cache, path);
+ if (ret == 0 && cache->io_ctl.inode) {
+ num_started++;
+ should_put = 0;
+
+ /*
+ * the cache_write_mutex is protecting
+ * the io_list
+ */
+ list_add_tail(&cache->io_list, io);
+ } else {
+ /*
+ * if we failed to write the cache, the
+ * generation will be bad and life goes on
+ */
+ ret = 0;
+ }
+ }
+ if (!ret)
+ ret = write_one_cache_group(trans, root, path, cache);
+
+ /* if its not on the io list, we need to put the block group */
+ if (should_put)
+ btrfs_put_block_group(cache);
+
+ if (ret)
+ break;
+
+ /*
+ * Avoid blocking other tasks for too long. It might even save
+ * us from writing caches for block groups that are going to be
+ * removed.
+ */
+ mutex_unlock(&trans->transaction->cache_write_mutex);
+ mutex_lock(&trans->transaction->cache_write_mutex);
+ }
+ mutex_unlock(&trans->transaction->cache_write_mutex);
+
+ /*
+ * go through delayed refs for all the stuff we've just kicked off
+ * and then loop back (just once)
+ */
+ ret = btrfs_run_delayed_refs(trans, root, 0);
+ if (!ret && loops == 0) {
+ loops++;
+ spin_lock(&cur_trans->dirty_bgs_lock);
+ list_splice_init(&cur_trans->dirty_bgs, &dirty);
+ /*
+ * dirty_bgs_lock protects us from concurrent block group
+ * deletes too (not just cache_write_mutex).
+ */
+ if (!list_empty(&dirty)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ goto again;
+ }
+ spin_unlock(&cur_trans->dirty_bgs_lock);
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_group_cache *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
+ int should_put;
struct btrfs_path *path;
-
- if (list_empty(&cur_trans->dirty_bgs))
- return 0;
+ struct list_head *io = &cur_trans->io_bgs;
+ int num_started = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3376,16 +3556,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
dirty_list);
+
+ /*
+ * this can happen if cache_save_setup re-dirties a block
+ * group that is already under IO. Just wait for it to
+ * finish and then do it all again
+ */
+ if (!list_empty(&cache->io_list)) {
+ list_del_init(&cache->io_list);
+ btrfs_wait_cache_io(root, trans, cache,
+ &cache->io_ctl, path,
+ cache->key.objectid);
+ btrfs_put_block_group(cache);
+ }
+
+ /*
+ * don't remove from the dirty list until after we've waited
+ * on any pending IO
+ */
list_del_init(&cache->dirty_list);
- if (cache->disk_cache_state == BTRFS_DC_CLEAR)
- cache_save_setup(cache, trans, path);
+ should_put = 1;
+
+ cache_save_setup(cache, trans, path);
+
if (!ret)
- ret = btrfs_run_delayed_refs(trans, root,
- (unsigned long) -1);
- if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
- btrfs_write_out_cache(root, trans, cache, path);
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
+
+ if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
+ cache->io_ctl.inode = NULL;
+ ret = btrfs_write_out_cache(root, trans, cache, path);
+ if (ret == 0 && cache->io_ctl.inode) {
+ num_started++;
+ should_put = 0;
+ list_add_tail(&cache->io_list, io);
+ } else {
+ /*
+ * if we failed to write the cache, the
+ * generation will be bad and life goes on
+ */
+ ret = 0;
+ }
+ }
if (!ret)
ret = write_one_cache_group(trans, root, path, cache);
+
+ /* if its not on the io list, we need to put the block group */
+ if (should_put)
+ btrfs_put_block_group(cache);
+ }
+
+ while (!list_empty(io)) {
+ cache = list_first_entry(io, struct btrfs_block_group_cache,
+ io_list);
+ list_del_init(&cache->io_list);
+ btrfs_wait_cache_io(root, trans, cache,
+ &cache->io_ctl, path, cache->key.objectid);
btrfs_put_block_group(cache);
}
@@ -3635,19 +3860,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 used;
- int ret = 0, committed = 0, alloc_chunk = 1;
+ int ret = 0;
+ int need_commit = 2;
+ int have_pinned_space;
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
if (btrfs_is_free_space_inode(inode)) {
- committed = 1;
+ need_commit = 0;
ASSERT(current->journal_info);
}
@@ -3669,7 +3896,7 @@ again:
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
*/
- if (!data_sinfo->full && alloc_chunk) {
+ if (!data_sinfo->full) {
u64 alloc_target;
data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
@@ -3697,8 +3924,10 @@ alloc:
if (ret < 0) {
if (ret != -ENOSPC)
return ret;
- else
+ else {
+ have_pinned_space = 1;
goto commit_trans;
+ }
}
if (!data_sinfo)
@@ -3709,26 +3938,39 @@ alloc:
/*
* If we don't have enough pinned space to deal with this
- * allocation don't bother committing the transaction.
+ * allocation, and no removed chunk in current transaction,
+ * don't bother committing the transaction.
*/
- if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
- bytes) < 0)
- committed = 1;
+ have_pinned_space = percpu_counter_compare(
+ &data_sinfo->total_bytes_pinned,
+ used + bytes - data_sinfo->total_bytes);
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
commit_trans:
- if (!committed &&
+ if (need_commit &&
!atomic_read(&root->fs_info->open_ioctl_trans)) {
- committed = 1;
+ need_commit--;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- goto again;
+ if (have_pinned_space >= 0 ||
+ trans->transaction->have_free_bgs ||
+ need_commit > 0) {
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ /*
+ * make sure that all running delayed iput are
+ * done
+ */
+ down_write(&root->fs_info->delayed_iput_sem);
+ up_write(&root->fs_info->delayed_iput_sem);
+ goto again;
+ } else {
+ btrfs_end_transaction(trans, root);
+ }
}
trace_btrfs_space_reservation(root->fs_info,
@@ -3736,12 +3978,16 @@ commit_trans:
data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
+ ret = btrfs_qgroup_reserve(root, write_bytes);
+ if (ret)
+ goto out;
data_sinfo->bytes_may_use += bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
data_sinfo->flags, bytes, 1);
+out:
spin_unlock(&data_sinfo->lock);
- return 0;
+ return ret;
}
/*
@@ -4298,8 +4544,13 @@ out:
static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
struct btrfs_fs_info *fs_info, u64 used)
{
- return (used >= div_factor_fine(space_info->total_bytes, 98) &&
- !btrfs_fs_closing(fs_info) &&
+ u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+
+ /* If we're just plain full then async reclaim just slows us down. */
+ if (space_info->bytes_used >= thresh)
+ return 0;
+
+ return (used >= thresh && !btrfs_fs_closing(fs_info) &&
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
@@ -4354,10 +4605,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
if (!btrfs_need_do_async_reclaim(space_info, fs_info,
flush_state))
return;
- } while (flush_state <= COMMIT_TRANS);
-
- if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state))
- queue_work(system_unbound_wq, work);
+ } while (flush_state < COMMIT_TRANS);
}
void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -4700,6 +4948,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
kfree(rsv);
}
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
+{
+ kfree(rsv);
+}
+
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush)
@@ -4812,10 +5065,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
csum_size * 2;
- num_bytes += div64_u64(data_used + meta_used, 50);
+ num_bytes += div_u64(data_used + meta_used, 50);
if (num_bytes * 3 > meta_used)
- num_bytes = div64_u64(meta_used, 3);
+ num_bytes = div_u64(meta_used, 3);
return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
}
@@ -4998,8 +5251,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
u64 qgroup_reserved)
{
btrfs_block_rsv_release(root, rsv, (u64)-1);
- if (qgroup_reserved)
- btrfs_qgroup_free(root, qgroup_reserved);
}
/**
@@ -5066,30 +5317,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
int reserve)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 csum_size;
- int num_csums_per_leaf;
- int num_csums;
- int old_csums;
+ u64 old_csums, num_csums;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
BTRFS_I(inode)->csum_bytes == 0)
return 0;
- old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
+ old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
if (reserve)
BTRFS_I(inode)->csum_bytes += num_bytes;
else
BTRFS_I(inode)->csum_bytes -= num_bytes;
- csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
- num_csums_per_leaf = (int)div64_u64(csum_size,
- sizeof(struct btrfs_csum_item) +
- sizeof(struct btrfs_disk_key));
- num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
- num_csums = num_csums + num_csums_per_leaf - 1;
- num_csums = num_csums / num_csums_per_leaf;
-
- old_csums = old_csums + num_csums_per_leaf - 1;
- old_csums = old_csums / num_csums_per_leaf;
+ num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
/* No change, no need to reserve more */
if (old_csums == num_csums)
@@ -5163,8 +5402,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_unlock(&BTRFS_I(inode)->lock);
if (root->fs_info->quota_enabled) {
- ret = btrfs_qgroup_reserve(root, num_bytes +
- nr_extents * root->nodesize);
+ ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
if (ret)
goto out_fail;
}
@@ -5172,8 +5410,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (unlikely(ret)) {
if (root->fs_info->quota_enabled)
- btrfs_qgroup_free(root, num_bytes +
- nr_extents * root->nodesize);
+ btrfs_qgroup_free(root, nr_extents * root->nodesize);
goto out_fail;
}
@@ -5290,10 +5527,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
- if (root->fs_info->quota_enabled) {
- btrfs_qgroup_free(root, num_bytes +
- dropped * root->nodesize);
- }
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
@@ -5318,7 +5551,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
{
int ret;
- ret = btrfs_check_data_free_space(inode, num_bytes);
+ ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
if (ret)
return ret;
@@ -5390,14 +5623,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (!alloc && cache->cached == BTRFS_CACHE_NO)
cache_block_group(cache, 1);
- spin_lock(&trans->transaction->dirty_bgs_lock);
- if (list_empty(&cache->dirty_list)) {
- list_add_tail(&cache->dirty_list,
- &trans->transaction->dirty_bgs);
- btrfs_get_block_group(cache);
- }
- spin_unlock(&trans->transaction->dirty_bgs_lock);
-
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
@@ -5446,6 +5671,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&info->unused_bgs_lock);
}
}
+
+ spin_lock(&trans->transaction->dirty_bgs_lock);
+ if (list_empty(&cache->dirty_list)) {
+ list_add_tail(&cache->dirty_list,
+ &trans->transaction->dirty_bgs);
+ trans->transaction->num_dirty_bgs++;
+ btrfs_get_block_group(cache);
+ }
+ spin_unlock(&trans->transaction->dirty_bgs_lock);
+
btrfs_put_block_group(cache);
total -= num_bytes;
bytenr += num_bytes;
@@ -6956,15 +7191,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
return -ENOSPC;
}
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_discard_extent(root, start, len, NULL);
-
if (pin)
pin_down_extent(root, cache, start, len, 1);
else {
+ if (btrfs_test_opt(root, DISCARD))
+ ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
}
+
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
@@ -7095,9 +7330,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
+ btrfs_free_path(path);
btrfs_free_and_pin_reserved_extent(root, ins->objectid,
root->nodesize);
- btrfs_free_path(path);
return ret;
}
@@ -7217,7 +7452,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
- clean_tree_block(trans, root, buf);
+ clean_tree_block(trans, root->fs_info, buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
@@ -7311,7 +7546,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
* returns the key for the extent through ins, and a tree buffer for
* the first block of the extent through buf.
*
- * returns the tree buffer or NULL.
+ * returns the tree buffer or an ERR_PTR on error.
*/
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -7322,6 +7557,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
+ struct btrfs_delayed_extent_op *extent_op;
u64 flags = 0;
int ret;
u32 blocksize = root->nodesize;
@@ -7342,13 +7578,14 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
ret = btrfs_reserve_extent(root, blocksize, blocksize,
empty_size, hint, &ins, 0, 0);
- if (ret) {
- unuse_block_rsv(root->fs_info, block_rsv, blocksize);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto out_unuse;
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
- BUG_ON(IS_ERR(buf)); /* -ENOMEM */
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out_free_reserved;
+ }
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0)
@@ -7358,9 +7595,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
- struct btrfs_delayed_extent_op *extent_op;
extent_op = btrfs_alloc_delayed_extent_op();
- BUG_ON(!extent_op); /* -ENOMEM */
+ if (!extent_op) {
+ ret = -ENOMEM;
+ goto out_free_buf;
+ }
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
@@ -7375,13 +7614,24 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->level = level;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
- ins.objectid,
- ins.offset, parent, root_objectid,
- level, BTRFS_ADD_DELAYED_EXTENT,
- extent_op, 0);
- BUG_ON(ret); /* -ENOMEM */
+ ins.objectid, ins.offset,
+ parent, root_objectid, level,
+ BTRFS_ADD_DELAYED_EXTENT,
+ extent_op, 0);
+ if (ret)
+ goto out_free_delayed;
}
return buf;
+
+out_free_delayed:
+ btrfs_free_delayed_extent_op(extent_op);
+out_free_buf:
+ free_extent_buffer(buf);
+out_free_reserved:
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
+out_unuse:
+ unuse_block_rsv(root->fs_info, block_rsv, blocksize);
+ return ERR_PTR(ret);
}
struct walk_control {
@@ -7815,7 +8065,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
blocksize = root->nodesize;
- next = btrfs_find_tree_block(root, bytenr);
+ next = btrfs_find_tree_block(root->fs_info, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(root, bytenr);
if (!next)
@@ -8016,7 +8266,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_set_lock_blocking(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
- clean_tree_block(trans, root, eb);
+ clean_tree_block(trans, root->fs_info, eb);
}
if (eb == root->node) {
@@ -8533,10 +8783,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
BUG_ON(cache->ro);
+again:
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
+ /*
+ * we're not allowed to set block groups readonly after the dirty
+ * block groups cache has started writing. If it already started,
+ * back off and let this transaction commit
+ */
+ mutex_lock(&root->fs_info->ro_block_group_mutex);
+ if (trans->transaction->dirty_bg_run) {
+ u64 transid = trans->transid;
+
+ mutex_unlock(&root->fs_info->ro_block_group_mutex);
+ btrfs_end_transaction(trans, root);
+
+ ret = btrfs_wait_for_commit(root, transid);
+ if (ret)
+ return ret;
+ goto again;
+ }
+
+
ret = set_block_group_ro(cache, 0);
if (!ret)
goto out;
@@ -8551,6 +8821,7 @@ out:
alloc_flags = update_block_group_flags(root, cache->flags);
check_system_chunk(trans, root, alloc_flags);
}
+ mutex_unlock(&root->fs_info->ro_block_group_mutex);
btrfs_end_transaction(trans, root);
return ret;
@@ -8720,7 +8991,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
min_free <<= 1;
} else if (index == BTRFS_RAID_RAID0) {
dev_min = fs_devices->rw_devices;
- do_div(min_free, dev_min);
+ min_free = div64_u64(min_free, dev_min);
}
/* We need to do this so that we can look at pending chunks */
@@ -8992,6 +9263,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
INIT_LIST_HEAD(&cache->bg_list);
INIT_LIST_HEAD(&cache->ro_list);
INIT_LIST_HEAD(&cache->dirty_list);
+ INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
atomic_set(&cache->trimming, 0);
@@ -9355,7 +9627,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * get the inode first so any iput calls done for the io_list
+ * aren't the final iput (no unlinks allowed now)
+ */
inode = lookup_free_space_inode(tree_root, block_group, path);
+
+ mutex_lock(&trans->transaction->cache_write_mutex);
+ /*
+ * make sure our free spache cache IO is done before remove the
+ * free space inode
+ */
+ spin_lock(&trans->transaction->dirty_bgs_lock);
+ if (!list_empty(&block_group->io_list)) {
+ list_del_init(&block_group->io_list);
+
+ WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
+
+ spin_unlock(&trans->transaction->dirty_bgs_lock);
+ btrfs_wait_cache_io(root, trans, block_group,
+ &block_group->io_ctl, path,
+ block_group->key.objectid);
+ btrfs_put_block_group(block_group);
+ spin_lock(&trans->transaction->dirty_bgs_lock);
+ }
+
+ if (!list_empty(&block_group->dirty_list)) {
+ list_del_init(&block_group->dirty_list);
+ btrfs_put_block_group(block_group);
+ }
+ spin_unlock(&trans->transaction->dirty_bgs_lock);
+ mutex_unlock(&trans->transaction->cache_write_mutex);
+
if (!IS_ERR(inode)) {
ret = btrfs_orphan_add(trans, inode);
if (ret) {
@@ -9448,18 +9751,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&trans->transaction->dirty_bgs_lock);
if (!list_empty(&block_group->dirty_list)) {
- list_del_init(&block_group->dirty_list);
- btrfs_put_block_group(block_group);
+ WARN_ON(1);
+ }
+ if (!list_empty(&block_group->io_list)) {
+ WARN_ON(1);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
-
btrfs_remove_free_space_cache(block_group);
spin_lock(&block_group->space_info->lock);
list_del_init(&block_group->ro_list);
+
+ if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ WARN_ON(block_group->space_info->total_bytes
+ < block_group->key.offset);
+ WARN_ON(block_group->space_info->bytes_readonly
+ < block_group->key.offset);
+ WARN_ON(block_group->space_info->disk_total
+ < block_group->key.offset * factor);
+ }
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
block_group->space_info->disk_total -= block_group->key.offset * factor;
+
spin_unlock(&block_group->space_info->lock);
memcpy(&key, &block_group->key, sizeof(key));
@@ -9647,8 +9961,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
/* Reset pinned so btrfs_put_block_group doesn't complain */
+ spin_lock(&space_info->lock);
+ spin_lock(&block_group->lock);
+
+ space_info->bytes_pinned -= block_group->pinned;
+ space_info->bytes_readonly += block_group->pinned;
+ percpu_counter_add(&space_info->total_bytes_pinned,
+ -block_group->pinned);
block_group->pinned = 0;
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+
/*
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d688cfe5d496..43af5a61ad25 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
em_len, flags);
- if (ret)
+ if (ret) {
+ if (ret == 1)
+ ret = 0;
goto out_free;
+ }
}
out_free:
free_extent_map(em);
@@ -4557,36 +4560,37 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
do {
index--;
page = eb->pages[index];
- if (page && mapped) {
+ if (!page)
+ continue;
+ if (mapped)
spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
/*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear page_private if it's still connected to
- * this eb.
+ * We need to make sure we haven't be attached
+ * to a new eb.
*/
- if (PagePrivate(page) &&
- page->private == (unsigned long)eb) {
- BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(PageDirty(page));
- BUG_ON(PageWriteback(page));
- /*
- * We need to make sure we haven't be attached
- * to a new eb.
- */
- ClearPagePrivate(page);
- set_page_private(page, 0);
- /* One for the page private */
- page_cache_release(page);
- }
- spin_unlock(&page->mapping->private_lock);
-
- }
- if (page) {
- /* One for when we alloced the page */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
page_cache_release(page);
}
+
+ if (mapped)
+ spin_unlock(&page->mapping->private_lock);
+
+ /* One for when we alloced the page */
+ page_cache_release(page);
} while (index != 0);
}
@@ -4867,6 +4871,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
mark_extent_buffer_accessed(exists, p);
goto free_eb;
}
+ exists = NULL;
/*
* Do this so attach doesn't complain and we need to
@@ -4930,12 +4935,12 @@ again:
return eb;
free_eb:
+ WARN_ON(!atomic_dec_and_test(&eb->refs));
for (i = 0; i < num_pages; i++) {
if (eb->pages[i])
unlock_page(eb->pages[i]);
}
- WARN_ON(!atomic_dec_and_test(&eb->refs));
btrfs_release_extent_buffer(eb);
return exists;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 695b0ccfb755..c668f36898d3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -97,7 +97,7 @@ struct extent_io_tree {
u64 dirty_bytes;
int track_uptodate;
spinlock_t lock;
- struct extent_io_ops *ops;
+ const struct extent_io_ops *ops;
};
struct extent_state {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 84a2d1868271..58ece6558430 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
- btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
- GFP_NOFS);
+ btrfs_bio->csum_allocated = kmalloc_array(nblocks,
+ csum_size, GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
return -ENOMEM;
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root,
btrfs_truncate_item(root, path, new_size, 0);
key->offset = end_byte;
- btrfs_set_item_key_safe(root, path, key);
+ btrfs_set_item_key_safe(root->fs_info, path, key);
} else {
BUG();
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 30982bbd31c3..b072e17479aa 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
@@ -32,6 +31,7 @@
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
defrag = rb_entry(node, struct inode_defrag, rb_node);
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
- if (need_resched()) {
- spin_unlock(&fs_info->defrag_inodes_lock);
- cond_resched();
- spin_lock(&fs_info->defrag_inodes_lock);
- }
+ cond_resched_lock(&fs_info->defrag_inodes_lock);
node = rb_first(&fs_info->defrag_inodes);
}
@@ -868,7 +864,7 @@ next_slot:
memcpy(&new_key, &key, sizeof(new_key));
new_key.offset = end;
- btrfs_set_item_key_safe(root, path, &new_key);
+ btrfs_set_item_key_safe(root->fs_info, path, &new_key);
extent_offset += end - key.offset;
btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1126,7 +1122,7 @@ again:
ino, bytenr, orig_offset,
&other_start, &other_end)) {
new_key.offset = end;
- btrfs_set_item_key_safe(root, path, &new_key);
+ btrfs_set_item_key_safe(root->fs_info, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, fi,
@@ -1160,7 +1156,7 @@ again:
trans->transid);
path->slots[0]++;
new_key.offset = start;
- btrfs_set_item_key_safe(root, path, &new_key);
+ btrfs_set_item_key_safe(root->fs_info, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
PAGE_CACHE_SIZE / (sizeof(struct page *)));
nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
nrptrs = max(nrptrs, 8);
- pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
+ pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return -ENOMEM;
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- ret = btrfs_check_data_free_space(inode, reserve_bytes);
+ ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
if (ret == -ENOSPC &&
(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC))) {
@@ -1635,8 +1631,8 @@ again:
btrfs_end_write_no_snapshoting(root);
if (only_release_metadata && copied > 0) {
- u64 lockstart = round_down(pos, root->sectorsize);
- u64 lockend = lockstart +
+ lockstart = round_down(pos, root->sectorsize);
+ lockend = lockstart +
(dirty_pages << PAGE_CACHE_SHIFT) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -1739,27 +1735,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
- ssize_t err = 0;
- size_t count = iov_iter_count(from);
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
- loff_t pos = iocb->ki_pos;
+ ssize_t err;
+ loff_t pos;
+ size_t count;
mutex_lock(&inode->i_mutex);
-
- current->backing_dev_info = inode_to_bdi(inode);
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err) {
+ err = generic_write_checks(iocb, from);
+ if (err <= 0) {
mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- if (count == 0) {
- mutex_unlock(&inode->i_mutex);
- goto out;
+ return err;
}
- iov_iter_truncate(from, count);
-
+ current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_suid(file);
if (err) {
mutex_unlock(&inode->i_mutex);
@@ -1786,6 +1774,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
+ pos = iocb->ki_pos;
+ count = iov_iter_count(from);
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
/* Expand hole size to cover write data, preventing empty gap */
@@ -1800,7 +1790,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (sync)
atomic_inc(&BTRFS_I(inode)->sync_writers);
- if (file->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from, pos);
} else {
num_written = __btrfs_buffered_write(file, from, pos);
@@ -1815,7 +1805,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_sub_trans = root->log_transid;
+ spin_unlock(&BTRFS_I(inode)->lock);
if (num_written > 0) {
err = generic_write_sync(file, pos, num_written);
if (err < 0)
@@ -1870,7 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
@@ -2168,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
u64 num_bytes;
key.offset = offset;
- btrfs_set_item_key_safe(root, path, &key);
+ btrfs_set_item_key_safe(root->fs_info, path, &key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -2551,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode,
{
struct inode *inode = file_inode(file);
struct extent_state *cached_state = NULL;
- struct btrfs_root *root = BTRFS_I(inode)->root;
u64 cur_offset;
u64 last_byte;
u64 alloc_start;
@@ -2576,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode,
* Make sure we have enough space before we do the
* allocation.
*/
- ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+ ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
if (ret)
return ret;
- if (root->fs_info->quota_enabled) {
- ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
- if (ret)
- goto out_reserve_fail;
- }
mutex_lock(&inode->i_mutex);
ret = inode_newsize_ok(inode, alloc_end);
@@ -2673,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode,
1 << inode->i_blkbits,
offset + len,
&alloc_hint);
-
- if (ret < 0) {
- free_extent_map(em);
- break;
- }
} else if (actual_end > inode->i_size &&
!(mode & FALLOC_FL_KEEP_SIZE)) {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
/*
* We didn't need to allocate any more space, but we
* still extended the size of the file so we need to
- * update i_size.
+ * update i_size and the inode item.
*/
- inode->i_ctime = CURRENT_TIME;
- i_size_write(inode, actual_end);
- btrfs_ordered_update_i_size(inode, actual_end, NULL);
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ } else {
+ inode->i_ctime = CURRENT_TIME;
+ i_size_write(inode, actual_end);
+ btrfs_ordered_update_i_size(inode, actual_end,
+ NULL);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ btrfs_end_transaction(trans, root);
+ else
+ ret = btrfs_end_transaction(trans,
+ root);
+ }
}
free_extent_map(em);
+ if (ret < 0)
+ break;
cur_offset = last_byte;
if (cur_offset >= alloc_end) {
@@ -2701,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode,
&cached_state, GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
- if (root->fs_info->quota_enabled)
- btrfs_qgroup_free(root, alloc_end - alloc_start);
-out_reserve_fail:
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
return ret;
@@ -2806,8 +2801,6 @@ out:
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a71978578fa7..41c510b7cc11 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
}
mapping_set_gfp_mask(inode->i_mapping,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ mapping_gfp_mask(inode->i_mapping) &
+ ~(GFP_NOFS & ~__GFP_HIGHMEM));
return inode;
}
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root,
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
key.type = 0;
-
ret = btrfs_insert_empty_item(trans, root, path, &key,
sizeof(struct btrfs_free_space_header));
if (ret < 0) {
btrfs_release_path(path);
return ret;
}
+
leaf = path->nodes[0];
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
struct inode *inode)
{
int ret = 0;
+ struct btrfs_path *path = btrfs_alloc_path();
+
+ if (!path) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ if (block_group) {
+ mutex_lock(&trans->transaction->cache_write_mutex);
+ if (!list_empty(&block_group->io_list)) {
+ list_del_init(&block_group->io_list);
+
+ btrfs_wait_cache_io(root, trans, block_group,
+ &block_group->io_ctl, path,
+ block_group->key.objectid);
+ btrfs_put_block_group(block_group);
+ }
+
+ /*
+ * now that we've truncated the cache away, its no longer
+ * setup or written
+ */
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ spin_unlock(&block_group->lock);
+ }
+ btrfs_free_path(path);
btrfs_i_size_write(inode, 0);
truncate_pagecache(inode, 0);
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
/*
* We don't need an orphan item because truncating the free space cache
* will never be split across transactions.
+ * We don't need to check for -EAGAIN because we're a free space
+ * cache inode
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
if (ret) {
+ mutex_unlock(&trans->transaction->cache_write_mutex);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
ret = btrfs_update_inode(trans, root, inode);
+
+ if (block_group)
+ mutex_unlock(&trans->transaction->cache_write_mutex);
+
+fail:
if (ret)
btrfs_abort_transaction(trans, root, ret);
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode)
return 0;
}
-struct io_ctl {
- void *cur, *orig;
- struct page *page;
- struct page **pages;
- struct btrfs_root *root;
- unsigned long size;
- int index;
- int num_pages;
- unsigned check_crcs:1;
-};
-
-static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
struct btrfs_root *root, int write)
{
int num_pages;
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
(num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
return -ENOSPC;
- memset(io_ctl, 0, sizeof(struct io_ctl));
+ memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
- io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+ io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
if (!io_ctl->pages)
return -ENOMEM;
io_ctl->num_pages = num_pages;
io_ctl->root = root;
io_ctl->check_crcs = check_crcs;
+ io_ctl->inode = inode;
return 0;
}
-static void io_ctl_free(struct io_ctl *io_ctl)
+static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
{
kfree(io_ctl->pages);
+ io_ctl->pages = NULL;
}
-static void io_ctl_unmap_page(struct io_ctl *io_ctl)
+static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
{
if (io_ctl->cur) {
- kunmap(io_ctl->page);
io_ctl->cur = NULL;
io_ctl->orig = NULL;
}
}
-static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
+static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
{
ASSERT(io_ctl->index < io_ctl->num_pages);
io_ctl->page = io_ctl->pages[io_ctl->index++];
- io_ctl->cur = kmap(io_ctl->page);
+ io_ctl->cur = page_address(io_ctl->page);
io_ctl->orig = io_ctl->cur;
io_ctl->size = PAGE_CACHE_SIZE;
if (clear)
memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
}
-static void io_ctl_drop_pages(struct io_ctl *io_ctl)
+static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
{
int i;
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
}
}
-static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
int uptodate)
{
struct page *page;
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
return 0;
}
-static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
+static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
{
__le64 *val;
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
io_ctl->cur += sizeof(u64);
}
-static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
+static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
{
__le64 *gen;
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
return 0;
}
-static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
+static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
{
u32 *tmp;
u32 crc = ~(u32)0;
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
io_ctl_unmap_page(io_ctl);
- tmp = kmap(io_ctl->pages[0]);
+ tmp = page_address(io_ctl->pages[0]);
tmp += index;
*tmp = crc;
- kunmap(io_ctl->pages[0]);
}
-static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
+static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
{
u32 *tmp, val;
u32 crc = ~(u32)0;
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
- tmp = kmap(io_ctl->pages[0]);
+ tmp = page_address(io_ctl->pages[0]);
tmp += index;
val = *tmp;
- kunmap(io_ctl->pages[0]);
io_ctl_map_page(io_ctl, 0);
crc = btrfs_csum_data(io_ctl->orig + offset, crc,
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
return 0;
}
-static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
+static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
void *bitmap)
{
struct btrfs_free_space_entry *entry;
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
return 0;
}
-static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
+static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
{
if (!io_ctl->cur)
return -ENOSPC;
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
return 0;
}
-static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
+static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
{
/*
* If we're not on the boundary we know we've modified the page and we
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
}
}
-static int io_ctl_read_entry(struct io_ctl *io_ctl,
+static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
struct btrfs_free_space *entry, u8 *type)
{
struct btrfs_free_space_entry *e;
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
return 0;
}
-static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
+static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
struct btrfs_free_space *entry)
{
int ret;
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
{
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
- struct io_ctl io_ctl;
+ struct btrfs_io_ctl io_ctl;
struct btrfs_key key;
struct btrfs_free_space *e, *n;
LIST_HEAD(bitmaps);
@@ -877,7 +902,7 @@ out:
}
static noinline_for_stack
-int write_cache_extent_entries(struct io_ctl *io_ctl,
+int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group_cache *block_group,
int *entries, int *bitmaps,
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
{
int ret;
struct btrfs_free_cluster *cluster = NULL;
+ struct btrfs_free_cluster *cluster_locked = NULL;
struct rb_node *node = rb_first(&ctl->free_space_offset);
struct btrfs_trim_range *trim_entry;
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
}
if (!node && cluster) {
+ cluster_locked = cluster;
+ spin_lock(&cluster_locked->lock);
node = rb_first(&cluster->root);
cluster = NULL;
}
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
node = rb_next(node);
if (!node && cluster) {
node = rb_first(&cluster->root);
+ cluster_locked = cluster;
+ spin_lock(&cluster_locked->lock);
cluster = NULL;
}
}
+ if (cluster_locked) {
+ spin_unlock(&cluster_locked->lock);
+ cluster_locked = NULL;
+ }
/*
* Make sure we don't miss any range that was removed from our rbtree
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
return 0;
fail:
+ if (cluster_locked)
+ spin_unlock(&cluster_locked->lock);
return -ENOSPC;
}
@@ -1000,7 +1036,7 @@ fail:
static noinline_for_stack int
write_pinned_extent_entries(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
- struct io_ctl *io_ctl,
+ struct btrfs_io_ctl *io_ctl,
int *entries)
{
u64 start, extent_start, extent_end, len;
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root,
}
static noinline_for_stack int
-write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list)
+write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
{
struct list_head *pos, *n;
int ret;
@@ -1083,10 +1119,7 @@ static int flush_dirty_cache(struct inode *inode)
}
static void noinline_for_stack
-cleanup_write_cache_enospc(struct inode *inode,
- struct io_ctl *io_ctl,
- struct extent_state **cached_state,
- struct list_head *bitmap_list)
+cleanup_bitmap_list(struct list_head *bitmap_list)
{
struct list_head *pos, *n;
@@ -1095,12 +1128,85 @@ cleanup_write_cache_enospc(struct inode *inode,
list_entry(pos, struct btrfs_free_space, list);
list_del_init(&entry->list);
}
+}
+
+static void noinline_for_stack
+cleanup_write_cache_enospc(struct inode *inode,
+ struct btrfs_io_ctl *io_ctl,
+ struct extent_state **cached_state,
+ struct list_head *bitmap_list)
+{
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, cached_state,
GFP_NOFS);
}
+int btrfs_wait_cache_io(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_io_ctl *io_ctl,
+ struct btrfs_path *path, u64 offset)
+{
+ int ret;
+ struct inode *inode = io_ctl->inode;
+
+ if (!inode)
+ return 0;
+
+ if (block_group)
+ root = root->fs_info->tree_root;
+
+ /* Flush the dirty pages in the cache file. */
+ ret = flush_dirty_cache(inode);
+ if (ret)
+ goto out;
+
+ /* Update the cache item to tell everyone this cache file is valid. */
+ ret = update_cache_item(trans, root, inode, path, offset,
+ io_ctl->entries, io_ctl->bitmaps);
+out:
+ io_ctl_free(io_ctl);
+ if (ret) {
+ invalidate_inode_pages2(inode->i_mapping);
+ BTRFS_I(inode)->generation = 0;
+ if (block_group) {
+#ifdef DEBUG
+ btrfs_err(root->fs_info,
+ "failed to write free space cache for block group %llu",
+ block_group->key.objectid);
+#endif
+ }
+ }
+ btrfs_update_inode(trans, root, inode);
+
+ if (block_group) {
+ /* the dirty list is protected by the dirty_bgs_lock */
+ spin_lock(&trans->transaction->dirty_bgs_lock);
+
+ /* the disk_cache_state is protected by the block group lock */
+ spin_lock(&block_group->lock);
+
+ /*
+ * only mark this as written if we didn't get put back on
+ * the dirty list while waiting for IO. Otherwise our
+ * cache state won't be right, and we won't get written again
+ */
+ if (!ret && list_empty(&block_group->dirty_list))
+ block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+ else if (ret)
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+
+ spin_unlock(&block_group->lock);
+ spin_unlock(&trans->transaction->dirty_bgs_lock);
+ io_ctl->inode = NULL;
+ iput(inode);
+ }
+
+ return ret;
+
+}
+
/**
* __btrfs_write_out_cache - write out cached info to an inode
* @root - the root the inode belongs to
@@ -1112,27 +1218,29 @@ cleanup_write_cache_enospc(struct inode *inode,
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successfull in writing the cache out,
- * and -1 if it was not.
+ * or an errno if it was not.
*/
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group_cache *block_group,
+ struct btrfs_io_ctl *io_ctl,
struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 offset)
{
struct extent_state *cached_state = NULL;
- struct io_ctl io_ctl;
LIST_HEAD(bitmap_list);
int entries = 0;
int bitmaps = 0;
int ret;
+ int must_iput = 0;
if (!i_size_read(inode))
- return -1;
+ return -EIO;
- ret = io_ctl_init(&io_ctl, inode, root, 1);
+ WARN_ON(io_ctl->pages);
+ ret = io_ctl_init(io_ctl, inode, root, 1);
if (ret)
- return -1;
+ return ret;
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
down_write(&block_group->data_rwsem);
@@ -1143,55 +1251,59 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
up_write(&block_group->data_rwsem);
BTRFS_I(inode)->generation = 0;
ret = 0;
+ must_iput = 1;
goto out;
}
spin_unlock(&block_group->lock);
}
/* Lock all pages first so we can lock the extent safely. */
- io_ctl_prepare_pages(&io_ctl, inode, 0);
+ ret = io_ctl_prepare_pages(io_ctl, inode, 0);
+ if (ret)
+ goto out;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state);
- io_ctl_set_generation(&io_ctl, trans->transid);
+ io_ctl_set_generation(io_ctl, trans->transid);
mutex_lock(&ctl->cache_writeout_mutex);
/* Write out the extent entries in the free space cache */
- ret = write_cache_extent_entries(&io_ctl, ctl,
+ spin_lock(&ctl->tree_lock);
+ ret = write_cache_extent_entries(io_ctl, ctl,
block_group, &entries, &bitmaps,
&bitmap_list);
- if (ret) {
- mutex_unlock(&ctl->cache_writeout_mutex);
- goto out_nospc;
- }
+ if (ret)
+ goto out_nospc_locked;
/*
* Some spaces that are freed in the current transaction are pinned,
* they will be added into free space cache after the transaction is
* committed, we shouldn't lose them.
+ *
+ * If this changes while we are working we'll get added back to
+ * the dirty list and redo it. No locking needed
*/
- ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
- if (ret) {
- mutex_unlock(&ctl->cache_writeout_mutex);
- goto out_nospc;
- }
+ ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
+ if (ret)
+ goto out_nospc_locked;
/*
* At last, we write out all the bitmaps and keep cache_writeout_mutex
* locked while doing it because a concurrent trim can be manipulating
* or freeing the bitmap.
*/
- ret = write_bitmap_entries(&io_ctl, &bitmap_list);
+ ret = write_bitmap_entries(io_ctl, &bitmap_list);
+ spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex);
if (ret)
goto out_nospc;
/* Zero out the rest of the pages just to make sure */
- io_ctl_zero_remaining_pages(&io_ctl);
+ io_ctl_zero_remaining_pages(io_ctl);
/* Everything is written out, now we dirty the pages in the file. */
- ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
+ ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
0, i_size_read(inode), &cached_state);
if (ret)
goto out_nospc;
@@ -1202,30 +1314,44 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* Release the pages and unlock the extent, we will flush
* them out later
*/
- io_ctl_drop_pages(&io_ctl);
+ io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
- /* Flush the dirty pages in the cache file. */
- ret = flush_dirty_cache(inode);
+ /*
+ * at this point the pages are under IO and we're happy,
+ * The caller is responsible for waiting on them and updating the
+ * the cache and the inode
+ */
+ io_ctl->entries = entries;
+ io_ctl->bitmaps = bitmaps;
+
+ ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
if (ret)
goto out;
- /* Update the cache item to tell everyone this cache file is valid. */
- ret = update_cache_item(trans, root, inode, path, offset,
- entries, bitmaps);
+ return 0;
+
out:
- io_ctl_free(&io_ctl);
+ io_ctl->inode = NULL;
+ io_ctl_free(io_ctl);
if (ret) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
}
btrfs_update_inode(trans, root, inode);
+ if (must_iput)
+ iput(inode);
return ret;
+out_nospc_locked:
+ cleanup_bitmap_list(&bitmap_list);
+ spin_unlock(&ctl->tree_lock);
+ mutex_unlock(&ctl->cache_writeout_mutex);
+
out_nospc:
- cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
+ cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
up_write(&block_group->data_rwsem);
@@ -1241,7 +1367,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct inode *inode;
int ret = 0;
- enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
root = root->fs_info->tree_root;
@@ -1250,34 +1375,34 @@ int btrfs_write_out_cache(struct btrfs_root *root,
spin_unlock(&block_group->lock);
return 0;
}
-
- if (block_group->delalloc_bytes) {
- block_group->disk_cache_state = BTRFS_DC_WRITTEN;
- spin_unlock(&block_group->lock);
- return 0;
- }
spin_unlock(&block_group->lock);
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode))
return 0;
- ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
+ ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
+ &block_group->io_ctl, trans,
path, block_group->key.objectid);
if (ret) {
- dcs = BTRFS_DC_ERROR;
- ret = 0;
#ifdef DEBUG
btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
block_group->key.objectid);
#endif
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ spin_unlock(&block_group->lock);
+
+ block_group->io_ctl.inode = NULL;
+ iput(inode);
}
- spin_lock(&block_group->lock);
- block_group->disk_cache_state = dcs;
- spin_unlock(&block_group->lock);
- iput(inode);
+ /*
+ * if ret == 0 the caller is expected to call btrfs_wait_cache_io
+ * to wait for IO and put the inode
+ */
+
return ret;
}
@@ -1298,11 +1423,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
u64 offset)
{
u64 bitmap_start;
- u64 bytes_per_bitmap;
+ u32 bytes_per_bitmap;
bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
bitmap_start = offset - ctl->start;
- bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
+ bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
bitmap_start *= bytes_per_bitmap;
bitmap_start += ctl->start;
@@ -1521,10 +1646,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->key.offset;
- u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
- int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
+ u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+ u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
- max_bitmaps = max(max_bitmaps, 1);
+ max_bitmaps = max_t(u32, max_bitmaps, 1);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
@@ -1537,7 +1662,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
max_bytes = MAX_CACHE_BYTES_PER_GIG;
else
max_bytes = MAX_CACHE_BYTES_PER_GIG *
- div64_u64(size, 1024 * 1024 * 1024);
+ div_u64(size, 1024 * 1024 * 1024);
/*
* we want to account for 1 more bitmap than what we have so we can make
@@ -1552,14 +1677,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
}
/*
- * we want the extent entry threshold to always be at most 1/2 the maxw
+ * we want the extent entry threshold to always be at most 1/2 the max
* bytes we can have, or whatever is less than that.
*/
extent_bytes = max_bytes - bitmap_bytes;
- extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
+ extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
ctl->extents_thresh =
- div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
+ div_u64(extent_bytes, sizeof(struct btrfs_free_space));
}
static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1673,7 +1798,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
*/
if (*bytes >= align) {
tmp = entry->offset - ctl->start + align - 1;
- do_div(tmp, align);
+ tmp = div64_u64(tmp, align);
tmp = tmp * align + ctl->start;
align_off = tmp - entry->offset;
} else {
@@ -2402,11 +2527,8 @@ static void __btrfs_remove_free_space_cache_locked(
} else {
free_bitmap(ctl, info);
}
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
+
+ cond_resched_lock(&ctl->tree_lock);
}
}
@@ -2431,11 +2553,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
WARN_ON(cluster->block_group != block_group);
__btrfs_return_cluster_to_free_space(block_group, cluster);
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
+
+ cond_resched_lock(&ctl->tree_lock);
}
__btrfs_remove_free_space_cache_locked(ctl);
spin_unlock(&ctl->tree_lock);
@@ -3346,11 +3465,17 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
int ret;
+ struct btrfs_io_ctl io_ctl;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
- ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
+ memset(&io_ctl, 0, sizeof(io_ctl));
+ ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
+ trans, path, 0);
+ if (!ret)
+ ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 88b2238a0aed..a16a029ad3b1 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -48,6 +48,8 @@ struct btrfs_free_space_op {
struct btrfs_free_space *info);
};
+struct btrfs_io_ctl;
+
struct inode *lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path);
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
+int btrfs_wait_cache_io(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_io_ctl *io_ctl,
+ struct btrfs_path *path, u64 offset);
int btrfs_write_out_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
-
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path);
int create_free_ino_inode(struct btrfs_root *root,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 74faea3a516e..f6a596d5a637 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -456,7 +456,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d7af52..8bb013672aee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
-#include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -59,6 +59,7 @@
#include "backref.h"
#include "hash.h"
#include "props.h"
+#include "qgroup.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -470,7 +471,7 @@ again:
*/
if (inode_need_compress(inode)) {
WARN_ON(pages);
- pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
/* just bail out to the uncompressed code */
goto cont;
@@ -752,7 +753,6 @@ retry:
}
goto out_free;
}
-
/*
* here we're doing allocation and writeback of the
* compressed pages
@@ -3110,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
if (empty)
return;
+ down_read(&fs_info->delayed_iput_sem);
+
spin_lock(&fs_info->delayed_iput_lock);
list_splice_init(&fs_info->delayed_iputs, &list);
spin_unlock(&fs_info->delayed_iput_lock);
@@ -3120,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
iput(delayed->inode);
kfree(delayed);
}
+
+ up_read(&root->fs_info->delayed_iput_sem);
}
/*
@@ -3628,25 +3632,28 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
+ inode->i_version = btrfs_inode_sequence(leaf, inode_item);
+ inode->i_generation = BTRFS_I(inode)->generation;
+ inode->i_rdev = 0;
+ rdev = btrfs_inode_rdev(leaf, inode_item);
+
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+ BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
/*
* If we were modified in the current generation and evicted from memory
* and then re-read we need to do a full sync since we don't have any
* idea about which extents were modified before we were evicted from
* cache.
+ *
+ * This is required for both inode re-read from disk and delayed inode
+ * in delayed_nodes_tree.
*/
if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- inode->i_version = btrfs_inode_sequence(leaf, inode_item);
- inode->i_generation = BTRFS_I(inode)->generation;
- inode->i_rdev = 0;
- rdev = btrfs_inode_rdev(leaf, inode_item);
-
- BTRFS_I(inode)->index_cnt = (u64)-1;
- BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
-cache_index:
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
@@ -4016,16 +4023,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
int ret;
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
return PTR_ERR(trans);
- btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
+ btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
- ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+ ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
if (ret)
goto out;
@@ -4124,7 +4131,7 @@ out:
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
@@ -4151,7 +4158,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
goto out;
/* now the directory is empty */
- err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+ err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
if (!err)
btrfs_i_size_write(inode, 0);
@@ -4162,6 +4169,21 @@ out:
return err;
}
+static int truncate_space_check(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytes_deleted)
+{
+ int ret;
+
+ bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
+ ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
+ bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+ trans->bytes_reserved += bytes_deleted;
+ return ret;
+
+}
+
/*
* this can truncate away extent items, csum items and directory items.
* It starts at a high offset and removes keys until it can't find
@@ -4197,9 +4219,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int ret;
int err = 0;
u64 ino = btrfs_ino(inode);
+ u64 bytes_deleted = 0;
+ bool be_nice = 0;
+ bool should_throttle = 0;
+ bool should_end = 0;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
+ /*
+ * for non-free space inodes and ref cows, we want to back off from
+ * time to time
+ */
+ if (!btrfs_is_free_space_inode(inode) &&
+ test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+ be_nice = 1;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -4229,6 +4263,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
key.type = (u8)-1;
search_again:
+ /*
+ * with a 16K leaf size and 128MB extents, you can actually queue
+ * up a huge file in a single leaf. Most of the time that
+ * bytes_deleted is > 0, it will be huge by the time we get here
+ */
+ if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+ if (btrfs_should_end_transaction(trans, root)) {
+ err = -EAGAIN;
+ goto error;
+ }
+ }
+
+
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0) {
@@ -4371,22 +4418,39 @@ delete:
} else {
break;
}
+ should_throttle = 0;
+
if (found_extent &&
(test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
+ bytes_deleted += extent_num_bytes;
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
btrfs_header_owner(leaf),
ino, extent_offset, 0);
BUG_ON(ret);
+ if (btrfs_should_throttle_delayed_refs(trans, root))
+ btrfs_async_run_delayed_refs(root,
+ trans->delayed_ref_updates * 2, 0);
+ if (be_nice) {
+ if (truncate_space_check(trans, root,
+ extent_num_bytes)) {
+ should_end = 1;
+ }
+ if (btrfs_should_throttle_delayed_refs(trans,
+ root)) {
+ should_throttle = 1;
+ }
+ }
}
if (found_type == BTRFS_INODE_ITEM_KEY)
break;
if (path->slots[0] == 0 ||
- path->slots[0] != pending_del_slot) {
+ path->slots[0] != pending_del_slot ||
+ should_throttle || should_end) {
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
@@ -4399,6 +4463,23 @@ delete:
pending_del_nr = 0;
}
btrfs_release_path(path);
+ if (should_throttle) {
+ unsigned long updates = trans->delayed_ref_updates;
+ if (updates) {
+ trans->delayed_ref_updates = 0;
+ ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+ if (ret && !err)
+ err = ret;
+ }
+ }
+ /*
+ * if we failed to refill our space rsv, bail out
+ * and let the transaction restart
+ */
+ if (should_end) {
+ err = -EAGAIN;
+ goto error;
+ }
goto search_again;
} else {
path->slots[0]--;
@@ -4415,7 +4496,18 @@ error:
if (last_size != (u64)-1 &&
root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
btrfs_ordered_update_i_size(inode, last_size, NULL);
+
btrfs_free_path(path);
+
+ if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+ unsigned long updates = trans->delayed_ref_updates;
+ if (updates) {
+ trans->delayed_ref_updates = 0;
+ ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+ if (ret && !err)
+ err = ret;
+ }
+ }
return err;
}
@@ -4826,7 +4918,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
int err;
@@ -4924,6 +5016,7 @@ void btrfs_evict_inode(struct inode *inode)
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
+ int steal_from_global = 0;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
int ret;
@@ -4991,9 +5084,20 @@ void btrfs_evict_inode(struct inode *inode)
* hard as possible to get this to work.
*/
if (ret)
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
+ steal_from_global++;
+ else
+ steal_from_global = 0;
+ ret = 0;
- if (ret) {
+ /*
+ * steal_from_global == 0: we reserved stuff, hooray!
+ * steal_from_global == 1: we didn't reserve stuff, boo!
+ * steal_from_global == 2: we've committed, still not a lot of
+ * room but maybe we'll have room in the global reserve this
+ * time.
+ * steal_from_global == 3: abandon all hope!
+ */
+ if (steal_from_global > 2) {
btrfs_warn(root->fs_info,
"Could not get space for a delete, will truncate on mount %d",
ret);
@@ -5009,10 +5113,40 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
+ /*
+ * We can't just steal from the global reserve, we need tomake
+ * sure there is room to do it, if not we need to commit and try
+ * again.
+ */
+ if (steal_from_global) {
+ if (!btrfs_check_space_for_delayed_refs(trans, root))
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv,
+ min_size);
+ else
+ ret = -ENOSPC;
+ }
+
+ /*
+ * Couldn't steal from the global reserve, we have too much
+ * pending stuff built up, commit the transaction and try it
+ * again.
+ */
+ if (ret) {
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret) {
+ btrfs_orphan_del(NULL, inode);
+ btrfs_free_block_rsv(root, rsv);
+ goto no_delete;
+ }
+ continue;
+ } else {
+ steal_from_global = 0;
+ }
+
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- if (ret != -ENOSPC)
+ if (ret != -ENOSPC && ret != -EAGAIN)
break;
trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -5416,10 +5550,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
if (!inode && !IS_ROOT(dentry))
- inode = dentry->d_parent->d_inode;
+ inode = d_inode(dentry->d_parent);
if (inode) {
root = BTRFS_I(inode)->root;
@@ -6226,7 +6360,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
- struct inode *inode = old_dentry->d_inode;
+ struct inode *inode = d_inode(old_dentry);
u64 index;
int err;
int drop_inode = 0;
@@ -8081,7 +8215,7 @@ free_ordered:
bio_endio(dio_bio, ret);
}
-static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
+static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
const struct iov_iter *iter, loff_t offset)
{
int seg;
@@ -8096,7 +8230,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
goto out;
/* If this is a write we don't need to check anymore */
- if (rw & WRITE)
+ if (iov_iter_rw(iter) == WRITE)
return 0;
/*
* Check to make sure we don't have duplicate iov_base's in this
@@ -8114,8 +8248,8 @@ out:
return retval;
}
-static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -8126,10 +8260,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
bool relock = false;
ssize_t ret;
- if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
+ if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
return 0;
- atomic_inc(&inode->i_dio_count);
+ inode_dio_begin(inode);
smp_mb__after_atomic();
/*
@@ -8144,7 +8278,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
filemap_fdatawrite_range(inode->i_mapping, offset,
offset + count - 1);
- if (rw & WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
/*
* If the write DIO is beyond the EOF, we need update
* the isize, but it is protected by i_mutex. So we can
@@ -8169,16 +8303,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
current->journal_info = &outstanding_extents;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
- inode_dio_done(inode);
+ inode_dio_end(inode);
flags = DIO_LOCKING | DIO_SKIP_HOLES;
wakeup = false;
}
- ret = __blockdev_direct_IO(rw, iocb, inode,
- BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
- iter, offset, btrfs_get_blocks_direct, NULL,
- btrfs_submit_direct, flags);
- if (rw & WRITE) {
+ ret = __blockdev_direct_IO(iocb, inode,
+ BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+ iter, offset, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, flags);
+ if (iov_iter_rw(iter) == WRITE) {
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count);
@@ -8188,7 +8322,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
}
out:
if (wakeup)
- inode_dio_done(inode);
+ inode_dio_end(inode);
if (relock)
mutex_lock(&inode->i_mutex);
@@ -8581,7 +8715,7 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
- if (ret != -ENOSPC) {
+ if (ret != -ENOSPC && ret != -EAGAIN) {
err = ret;
break;
}
@@ -8875,7 +9009,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
{
u64 delalloc_bytes;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
u32 blocksize = inode->i_sb->s_blocksize;
generic_fillattr(inode, stat);
@@ -8896,8 +9030,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(old_dir)->root;
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
- struct inode *new_inode = new_dentry->d_inode;
- struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = d_inode(new_dentry);
+ struct inode *old_inode = d_inode(old_dentry);
struct timespec ctime = CURRENT_TIME;
u64 index = 0;
u64 root_objectid;
@@ -9009,7 +9143,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_name.len);
} else {
ret = __btrfs_unlink_inode(trans, root, old_dir,
- old_dentry->d_inode,
+ d_inode(old_dentry),
old_dentry->d_name.name,
old_dentry->d_name.len);
if (!ret)
@@ -9033,12 +9167,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BUG_ON(new_inode->i_nlink == 0);
} else {
ret = btrfs_unlink_inode(trans, dest, new_dir,
- new_dentry->d_inode,
+ d_inode(new_dentry),
new_dentry->d_name.name,
new_dentry->d_name.len);
}
if (!ret && new_inode->i_nlink == 0)
- ret = btrfs_orphan_add(trans, new_dentry->d_inode);
+ ret = btrfs_orphan_add(trans, d_inode(new_dentry));
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
@@ -9451,6 +9585,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
btrfs_end_transaction(trans, root);
break;
}
+
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 74609b931ba5..1c22c6518504 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir,
if (ret)
return ret;
+ /*
+ * Don't create subvolume whose level is not zero. Or qgroup will be
+ * screwed up since it assume subvolme qgroup's level to be 0.
+ */
+ if (btrfs_qgroup_level(objectid))
+ return -ENOSPC;
+
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* The same as the snapshot creation, please see the comment
@@ -717,7 +724,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
goto fail;
- inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto fail;
@@ -761,10 +768,10 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
{
int error;
- if (!victim->d_inode)
+ if (d_really_is_negative(victim))
return -ENOENT;
- BUG_ON(victim->d_parent->d_inode != dir);
+ BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
@@ -772,8 +779,8 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
- IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+ if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
+ IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
return -EPERM;
if (isdir) {
if (!d_is_dir(victim))
@@ -792,7 +799,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
- if (child->d_inode)
+ if (d_really_is_positive(child))
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
@@ -810,7 +817,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit *inherit)
{
- struct inode *dir = parent->dentry->d_inode;
+ struct inode *dir = d_inode(parent->dentry);
struct dentry *dentry;
int error;
@@ -824,7 +831,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_unlock;
error = -EEXIST;
- if (dentry->d_inode)
+ if (d_really_is_positive(dentry))
goto out_dput;
error = btrfs_may_create(dir, dentry);
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
goto out_free;
}
- do_div(new_size, root->sectorsize);
+ new_size = div_u64(new_size, root->sectorsize);
new_size *= root->sectorsize;
printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
@@ -2294,7 +2301,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
{
struct dentry *parent = file->f_path.dentry;
struct dentry *dentry;
- struct inode *dir = parent->d_inode;
+ struct inode *dir = d_inode(parent);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *dest = NULL;
@@ -2333,12 +2340,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_unlock_dir;
}
- if (!dentry->d_inode) {
+ if (d_really_is_negative(dentry)) {
err = -ENOENT;
goto out_dput;
}
- inode = dentry->d_inode;
+ inode = d_inode(dentry);
dest = BTRFS_I(inode)->root;
if (!capable(CAP_SYS_ADMIN)) {
/*
@@ -2403,7 +2410,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
"Attempt to delete subvolume %llu during send",
dest->root_key.objectid);
err = -EPERM;
- goto out_dput;
+ goto out_unlock_inode;
}
d_invalidate(dentry);
@@ -2498,6 +2505,7 @@ out_up_write:
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
}
+out_unlock_inode:
mutex_unlock(&inode->i_mutex);
if (!err) {
shrink_dcache_sb(root->fs_info->sb);
@@ -2897,6 +2905,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
if (src == dst)
return -EINVAL;
+ if (len == 0)
+ return 0;
+
btrfs_double_lock(src, loff, dst, dst_loff, len);
ret = extent_same_check_offsets(src, loff, len);
@@ -3039,7 +3050,7 @@ out:
static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 disko)
{
- struct seq_list tree_mod_seq_elem = {};
+ struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
struct ulist *roots;
struct ulist_iterator uiter;
struct ulist_node *root_node = NULL;
@@ -3202,6 +3213,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
key.offset = off;
while (1) {
+ u64 next_key_min_offset = key.offset + 1;
+
/*
* note the key will change type as we walk through the
* tree.
@@ -3282,7 +3295,7 @@ process_slot:
} else if (key.offset >= off + len) {
break;
}
-
+ next_key_min_offset = key.offset + datal;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
btrfs_item_ptr_offset(leaf, slot),
@@ -3497,7 +3510,7 @@ process_slot:
break;
}
btrfs_release_path(path);
- key.offset++;
+ key.offset = next_key_min_offset;
}
ret = 0;
@@ -3626,6 +3639,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (off + len == src->i_size)
len = ALIGN(src->i_size, bs) - off;
+ if (len == 0) {
+ ret = 0;
+ goto out_unlock;
+ }
+
/* verify the end result is block aligned */
if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
!IS_ALIGNED(destoff, bs))
@@ -4624,6 +4642,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
sa->src, sa->dst);
}
+ /* update qgroup status and info */
+ err = btrfs_run_qgroups(trans, root->fs_info);
+ if (err < 0)
+ btrfs_error(root->fs_info, ret,
+ "failed to update qgroup status and info\n");
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
@@ -4669,8 +4692,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
/* FIXME: check if the IDs really exist */
if (sa->create) {
- ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
- NULL);
+ ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
} else {
ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
}
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 617553cdb7d3..a2f051347731 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -434,7 +434,7 @@ out:
return ret;
}
-struct btrfs_compress_op btrfs_lzo_compress = {
+const struct btrfs_compress_op btrfs_lzo_compress = {
.alloc_workspace = lzo_alloc_workspace,
.free_workspace = lzo_free_workspace,
.compress_pages = lzo_compress_pages,
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h
index b7816cefbd13..1b10a3cd1195 100644
--- a/fs/btrfs/math.h
+++ b/fs/btrfs/math.h
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor)
if (factor == 10)
return num;
num *= factor;
- do_div(num, 10);
- return num;
+ return div_u64(num, 10);
}
static inline u64 div_factor_fine(u64 num, int factor)
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor)
if (factor == 100)
return num;
num *= factor;
- do_div(num, 100);
- return num;
+ return div_u64(num, 100);
}
#endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 129b1dd28527..dca137b04095 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode)
return NULL;
}
+
+
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 058c79eecbfb..3d6546581bb9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -644,9 +644,8 @@ out:
}
static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 qgroupid,
- u64 flags, u64 max_rfer, u64 max_excl,
- u64 rsv_rfer, u64 rsv_excl)
+ struct btrfs_root *root,
+ struct btrfs_qgroup *qgroup)
{
struct btrfs_path *path;
struct btrfs_key key;
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
key.objectid = 0;
key.type = BTRFS_QGROUP_LIMIT_KEY;
- key.offset = qgroupid;
+ key.offset = qgroup->qgroupid;
path = btrfs_alloc_path();
if (!path)
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
l = path->nodes[0];
slot = path->slots[0];
qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
- btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
- btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
- btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
- btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
- btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
+ btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
+ btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
+ btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
+ btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
+ btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
btrfs_mark_buffer_dirty(l);
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
spin_unlock(&fs_info->qgroup_lock);
btrfs_free_qgroup_config(fs_info);
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
list_del(&quota_root->dirty_list);
btrfs_tree_lock(quota_root->node);
- clean_tree_block(trans, tree_root, quota_root->node);
+ clean_tree_block(trans, tree_root->fs_info, quota_root->node);
btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
+/*
+ * The easy accounting, if we are adding/removing the only ref for an extent
+ * then this qgroup and all of the parent qgroups get their refrence and
+ * exclusive counts adjusted.
+ *
+ * Caller should hold fs_info->qgroup_lock.
+ */
+static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
+ struct ulist *tmp, u64 ref_root,
+ u64 num_bytes, int sign)
+{
+ struct btrfs_qgroup *qgroup;
+ struct btrfs_qgroup_list *glist;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ int ret = 0;
+
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto out;
+
+ qgroup->rfer += sign * num_bytes;
+ qgroup->rfer_cmpr += sign * num_bytes;
+
+ WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+ qgroup->excl += sign * num_bytes;
+ qgroup->excl_cmpr += sign * num_bytes;
+ if (sign > 0)
+ qgroup->reserved -= num_bytes;
+
+ qgroup_dirty(fs_info, qgroup);
+
+ /* Get all of the parent groups that contain this qgroup */
+ list_for_each_entry(glist, &qgroup->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* Iterate all of the parents and adjust their reference counts */
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ qgroup = u64_to_ptr(unode->aux);
+ qgroup->rfer += sign * num_bytes;
+ qgroup->rfer_cmpr += sign * num_bytes;
+ WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+ qgroup->excl += sign * num_bytes;
+ if (sign > 0)
+ qgroup->reserved -= num_bytes;
+ qgroup->excl_cmpr += sign * num_bytes;
+ qgroup_dirty(fs_info, qgroup);
+
+ /* Add any parents of the parents */
+ list_for_each_entry(glist, &qgroup->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ ptr_to_u64(glist->group), GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/*
+ * Quick path for updating qgroup with only excl refs.
+ *
+ * In that case, just update all parent will be enough.
+ * Or we needs to do a full rescan.
+ * Caller should also hold fs_info->qgroup_lock.
+ *
+ * Return 0 for quick update, return >0 for need to full rescan
+ * and mark INCONSISTENT flag.
+ * Return < 0 for other error.
+ */
+static int quick_update_accounting(struct btrfs_fs_info *fs_info,
+ struct ulist *tmp, u64 src, u64 dst,
+ int sign)
+{
+ struct btrfs_qgroup *qgroup;
+ int ret = 1;
+ int err = 0;
+
+ qgroup = find_qgroup_rb(fs_info, src);
+ if (!qgroup)
+ goto out;
+ if (qgroup->excl == qgroup->rfer) {
+ ret = 0;
+ err = __qgroup_excl_accounting(fs_info, tmp, dst,
+ qgroup->excl, sign);
+ if (err < 0) {
+ ret = err;
+ goto out;
+ }
+ }
+out:
+ if (ret)
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ return ret;
+}
+
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
struct btrfs_qgroup_list *list;
+ struct ulist *tmp;
int ret = 0;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+
+ /* Check the level of src and dst first */
+ if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
+ return -EINVAL;
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
if (!quota_root) {
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(quota_root->fs_info, src, dst);
+ if (ret < 0) {
+ spin_unlock(&fs_info->qgroup_lock);
+ goto out;
+ }
+ ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
spin_unlock(&fs_info->qgroup_lock);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ ulist_free(tmp);
return ret;
}
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+int __del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
struct btrfs_qgroup_list *list;
+ struct ulist *tmp;
int ret = 0;
int err;
- mutex_lock(&fs_info->qgroup_ioctl_lock);
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+
quota_root = fs_info->quota_root;
if (!quota_root) {
ret = -EINVAL;
@@ -1088,14 +1211,27 @@ exist:
spin_lock(&fs_info->qgroup_lock);
del_relation_rb(fs_info, src, dst);
+ ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
spin_unlock(&fs_info->qgroup_lock);
out:
+ ulist_free(tmp);
+ return ret;
+}
+
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+{
+ int ret = 0;
+
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ ret = __del_qgroup_relation(trans, fs_info, src, dst);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
return ret;
}
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
+ struct btrfs_fs_info *fs_info, u64 qgroupid)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
+ struct btrfs_qgroup_list *list;
int ret = 0;
mutex_lock(&fs_info->qgroup_ioctl_lock);
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
ret = -ENOENT;
goto out;
} else {
- /* check if there are no relations to this qgroup */
- if (!list_empty(&qgroup->groups) ||
- !list_empty(&qgroup->members)) {
+ /* check if there are no children of this qgroup */
+ if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
goto out;
}
}
ret = del_qgroup_item(trans, quota_root, qgroupid);
+ while (!list_empty(&qgroup->groups)) {
+ list = list_first_entry(&qgroup->groups,
+ struct btrfs_qgroup_list, next_group);
+ ret = __del_qgroup_relation(trans, fs_info,
+ qgroupid,
+ list->group->qgroupid);
+ if (ret)
+ goto out;
+ }
+
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
ret = -ENOENT;
goto out;
}
- ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
- limit->flags, limit->max_rfer,
- limit->max_excl, limit->rsv_rfer,
- limit->rsv_excl);
+
+ spin_lock(&fs_info->qgroup_lock);
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
+ qgroup->max_rfer = limit->max_rfer;
+ if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+ qgroup->max_excl = limit->max_excl;
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
+ qgroup->rsv_rfer = limit->rsv_rfer;
+ if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
+ qgroup->rsv_excl = limit->rsv_excl;
+ qgroup->lim_flags |= limit->flags;
+
+ spin_unlock(&fs_info->qgroup_lock);
+
+ ret = update_qgroup_limit_item(trans, quota_root, qgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_info(fs_info, "unable to update quota limit for %llu",
qgroupid);
}
- spin_lock(&fs_info->qgroup_lock);
- qgroup->lim_flags = limit->flags;
- qgroup->max_rfer = limit->max_rfer;
- qgroup->max_excl = limit->max_excl;
- qgroup->rsv_rfer = limit->rsv_rfer;
- qgroup->rsv_excl = limit->rsv_excl;
- spin_unlock(&fs_info->qgroup_lock);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
return -1;
if (oper1->bytenr > oper2->bytenr)
return 1;
- if (oper1->seq < oper2->seq)
- return -1;
- if (oper1->seq > oper2->seq)
- return 1;
if (oper1->ref_root < oper2->ref_root)
return -1;
if (oper1->ref_root > oper2->ref_root)
return 1;
+ if (oper1->seq < oper2->seq)
+ return -1;
+ if (oper1->seq > oper2->seq)
+ return 1;
if (oper1->type < oper2->type)
return -1;
if (oper1->type > oper2->type)
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
return 0;
}
-/*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their refrence and
- * exclusive counts adjusted.
- */
static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_operation *oper)
{
- struct btrfs_qgroup *qgroup;
struct ulist *tmp;
- struct btrfs_qgroup_list *glist;
- struct ulist_node *unode;
- struct ulist_iterator uiter;
int sign = 0;
int ret = 0;
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->qgroup_lock);
if (!fs_info->quota_root)
goto out;
- qgroup = find_qgroup_rb(fs_info, oper->ref_root);
- if (!qgroup)
- goto out;
+
switch (oper->type) {
case BTRFS_QGROUP_OPER_ADD_EXCL:
sign = 1;
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
default:
ASSERT(0);
}
- qgroup->rfer += sign * oper->num_bytes;
- qgroup->rfer_cmpr += sign * oper->num_bytes;
-
- WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
- qgroup->excl += sign * oper->num_bytes;
- qgroup->excl_cmpr += sign * oper->num_bytes;
-
- qgroup_dirty(fs_info, qgroup);
-
- /* Get all of the parent groups that contain this qgroup */
- list_for_each_entry(glist, &qgroup->groups, next_group) {
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- goto out;
- }
-
- /* Iterate all of the parents and adjust their reference counts */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- qgroup = u64_to_ptr(unode->aux);
- qgroup->rfer += sign * oper->num_bytes;
- qgroup->rfer_cmpr += sign * oper->num_bytes;
- WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
- qgroup->excl += sign * oper->num_bytes;
- qgroup->excl_cmpr += sign * oper->num_bytes;
- qgroup_dirty(fs_info, qgroup);
-
- /* Add any parents of the parents */
- list_for_each_entry(glist, &qgroup->groups, next_group) {
- ret = ulist_add(tmp, glist->group->qgroupid,
- ptr_to_u64(glist->group), GFP_ATOMIC);
- if (ret < 0)
- goto out;
- }
- }
- ret = 0;
+ ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
+ oper->num_bytes, sign);
out:
spin_unlock(&fs_info->qgroup_lock);
ulist_free(tmp);
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
struct ulist *roots = NULL;
struct ulist *qgroups, *tmp;
struct btrfs_qgroup *qgroup;
- struct seq_list elem = {};
+ struct seq_list elem = SEQ_LIST_INIT(elem);
u64 seq;
int old_roots = 0;
int new_roots = 0;
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
int err;
struct btrfs_qgroup *qg;
u64 root_obj = 0;
- struct seq_list elem = {};
+ struct seq_list elem = SEQ_LIST_INIT(elem);
parents = ulist_alloc(GFP_NOFS);
if (!parents)
@@ -2156,6 +2260,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+ if (ret)
+ fs_info->qgroup_flags |=
+ BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
spin_lock(&fs_info->qgroup_lock);
}
if (fs_info->quota_enabled)
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
ret = -EINVAL;
goto out;
}
+
+ if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
+ ret = -EINVAL;
+ goto out;
+ }
++i_qgroups;
}
}
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
- ret = update_qgroup_limit_item(trans, quota_root, objectid,
- inherit->lim.flags,
- inherit->lim.max_rfer,
- inherit->lim.max_excl,
- inherit->lim.rsv_rfer,
- inherit->lim.rsv_excl);
- if (ret)
- goto out;
- }
-
if (srcid) {
struct btrfs_root *srcroot;
struct btrfs_key srckey;
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
goto unlock;
}
+ if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
+ dstgroup->lim_flags = inherit->lim.flags;
+ dstgroup->max_rfer = inherit->lim.max_rfer;
+ dstgroup->max_excl = inherit->lim.max_excl;
+ dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
+ dstgroup->rsv_excl = inherit->lim.rsv_excl;
+
+ ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+ if (ret) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ btrfs_info(fs_info, "unable to update quota limit for %llu",
+ dstgroup->qgroupid);
+ goto unlock;
+ }
+ }
+
if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
if (!srcgroup)
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
dstgroup->excl_cmpr = level_size;
srcgroup->excl = level_size;
srcgroup->excl_cmpr = level_size;
+
+ /* inherit the limit info */
+ dstgroup->lim_flags = srcgroup->lim_flags;
+ dstgroup->max_rfer = srcgroup->max_rfer;
+ dstgroup->max_excl = srcgroup->max_excl;
+ dstgroup->rsv_rfer = srcgroup->rsv_rfer;
+ dstgroup->rsv_excl = srcgroup->rsv_excl;
+
qgroup_dirty(fs_info, dstgroup);
qgroup_dirty(fs_info, srcgroup);
}
@@ -2358,12 +2484,6 @@ out:
return ret;
}
-/*
- * reserve some space for a qgroup and all its parents. The reservation takes
- * place with start_transaction or dealloc_reserve, similar to ENOSPC
- * accounting. If not enough space is available, EDQUOT is returned.
- * We assume that the requested space is new for all qgroups.
- */
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
{
struct btrfs_root *quota_root;
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
/*
* returns < 0 on error, 0 when more leafs are to be scanned.
- * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ * returns 1 when done.
*/
static int
qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
{
struct btrfs_key found;
struct ulist *roots = NULL;
- struct seq_list tree_mod_seq_elem = {};
+ struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
u64 num_bytes;
u64 seq;
int new_roots;
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
struct ulist *tmp = NULL, *qgroups = NULL;
struct extent_buffer *scratch_leaf = NULL;
int err = -ENOMEM;
+ int ret = 0;
path = btrfs_alloc_path();
if (!path)
@@ -2660,7 +2781,7 @@ out:
mutex_lock(&fs_info->qgroup_rescan_lock);
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
- if (err == 2 &&
+ if (err > 0 &&
fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
} else if (err < 0) {
@@ -2668,13 +2789,33 @@ out:
}
mutex_unlock(&fs_info->qgroup_rescan_lock);
+ /*
+ * only update status, since the previous part has alreay updated the
+ * qgroup info.
+ */
+ trans = btrfs_start_transaction(fs_info->quota_root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ btrfs_err(fs_info,
+ "fail to start transaction for status update: %d\n",
+ err);
+ goto done;
+ }
+ ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+ if (ret < 0) {
+ err = ret;
+ btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
+ }
+ btrfs_end_transaction(trans, fs_info->quota_root);
+
if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s",
- err == 2 ? " (inconsistency flag cleared)" : "");
+ err > 0 ? " (inconsistency flag cleared)" : "");
} else {
btrfs_err(fs_info, "qgroup scan failed with %d", err);
}
+done:
complete_all(&fs_info->qgroup_rescan_completion);
}
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
mutex_unlock(&fs_info->qgroup_rescan_lock);
goto err;
}
-
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 18cc68ca3090..c5242aa9a4b2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
- char *name);
+ struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5264858ed768..fa72068bd256 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
}
x = cmpxchg(&info->stripe_hash_table, NULL, table);
- if (x) {
- if (is_vmalloc_addr(x))
- vfree(x);
- else
- kfree(x);
- }
+ if (x)
+ kvfree(x);
return 0;
}
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
if (!info->stripe_hash_table)
return;
btrfs_clear_rbio_cache(info);
- if (is_vmalloc_addr(info->stripe_hash_table))
- vfree(info->stripe_hash_table);
- else
- kfree(info->stripe_hash_table);
+ kvfree(info->stripe_hash_table);
info->stripe_hash_table = NULL;
}
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
int err;
int i;
- pointers = kzalloc(rbio->real_stripes * sizeof(void *),
- GFP_NOFS);
+ pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
if (!pointers) {
err = -ENOMEM;
goto cleanup_io;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d83085381bcc..74b24b01d574 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
mutex_lock(&inode->i_mutex);
ret = btrfs_check_data_free_space(inode, cluster->end +
- 1 - cluster->start);
+ 1 - cluster->start, 0);
if (ret)
goto out;
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc,
}
static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
- struct inode *inode, u64 ino)
+ struct btrfs_block_group_cache *block_group,
+ struct inode *inode,
+ u64 ino)
{
struct btrfs_key key;
struct btrfs_root *root = fs_info->tree_root;
@@ -3463,7 +3465,7 @@ truncate:
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc,
*/
if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
ret = delete_block_group_cache(rc->extent_root->fs_info,
+ rc->block_group,
NULL, ref_objectid);
if (ret != -ENOENT)
return ret;
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
btrfs_free_path(path);
if (!IS_ERR(inode))
- ret = delete_block_group_cache(fs_info, inode, 0);
+ ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
else
ret = PTR_ERR(inode);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ec57687c9a4d..ab5811545a98 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* the statistics.
*/
- sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
- sizeof(*sblocks_for_recheck),
- GFP_NOFS);
+ sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
+ sizeof(*sblocks_for_recheck), GFP_NOFS);
if (!sblocks_for_recheck) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
unsigned long *bitmap,
u64 start, u64 len)
{
- int offset;
+ u32 offset;
int nsectors;
int sectorsize = sparity->sctx->dev_root->sectorsize;
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
}
start -= sparity->logic_start;
- offset = (int)do_div(start, sparity->stripe_len);
+ start = div_u64_rem(start, sparity->stripe_len, &offset);
offset /= sectorsize;
nsectors = (int)len / sectorsize;
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num,
int j = 0;
u64 stripe_nr;
u64 last_offset;
- int stripe_index;
- int rot;
+ u32 stripe_index;
+ u32 rot;
last_offset = (physical - map->stripes[num].physical) *
nr_data_stripes(map);
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num,
for (i = 0; i < nr_data_stripes(map); i++) {
*offset = last_offset + i * map->stripe_len;
- stripe_nr = *offset;
- do_div(stripe_nr, map->stripe_len);
- do_div(stripe_nr, nr_data_stripes(map));
+ stripe_nr = div_u64(*offset, map->stripe_len);
+ stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
/* Work out the disk rotation on this stripe-set */
- rot = do_div(stripe_nr, map->num_stripes);
+ stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
/* calculate which stripe this data locates */
rot += i;
stripe_index = rot % map->num_stripes;
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int extent_mirror_num;
int stop_loop = 0;
- nstripes = length;
physical = map->stripes[num].physical;
offset = 0;
- do_div(nstripes, map->stripe_len);
+ nstripes = div_u64(length, map->stripe_len);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
offset = map->stripe_len * num;
increment = map->stripe_len * map->num_stripes;
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
int ret = 0;
- int flags = WQ_FREEZABLE | WQ_UNBOUND;
+ unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
if (fs_info->scrub_workers_refcnt == 0) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d6033f540cc7..a1216f9b4917 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3067,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
return NULL;
}
-static int path_loop(struct send_ctx *sctx, struct fs_path *name,
- u64 ino, u64 gen, u64 *ancestor_ino)
-{
- int ret = 0;
- u64 parent_inode = 0;
- u64 parent_gen = 0;
- u64 start_ino = ino;
-
- *ancestor_ino = 0;
- while (ino != BTRFS_FIRST_FREE_OBJECTID) {
- fs_path_reset(name);
-
- if (is_waiting_for_rm(sctx, ino))
- break;
- if (is_waiting_for_move(sctx, ino)) {
- if (*ancestor_ino == 0)
- *ancestor_ino = ino;
- ret = get_first_ref(sctx->parent_root, ino,
- &parent_inode, &parent_gen, name);
- } else {
- ret = __get_cur_name_and_parent(sctx, ino, gen,
- &parent_inode,
- &parent_gen, name);
- if (ret > 0) {
- ret = 0;
- break;
- }
- }
- if (ret < 0)
- break;
- if (parent_inode == start_ino) {
- ret = 1;
- if (*ancestor_ino == 0)
- *ancestor_ino = ino;
- break;
- }
- ino = parent_inode;
- gen = parent_gen;
- }
- return ret;
-}
-
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
struct fs_path *from_path = NULL;
@@ -3120,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
int ret;
- u64 ancestor = 0;
name = fs_path_alloc();
from_path = fs_path_alloc();
@@ -3152,22 +3109,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
goto out;
sctx->send_progress = sctx->cur_ino + 1;
- ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
- if (ret) {
- LIST_HEAD(deleted_refs);
- ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
- ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
- &pm->update_refs, &deleted_refs,
- pm->is_orphan);
- if (ret < 0)
- goto out;
- if (rmdir_ino) {
- dm = get_waiting_dir_move(sctx, pm->ino);
- ASSERT(dm);
- dm->rmdir_ino = rmdir_ino;
- }
- goto out;
- }
fs_path_reset(name);
to_path = name;
name = NULL;
@@ -3610,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
if (ret < 0)
goto out;
if (ret) {
+ struct name_cache_entry *nce;
+
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
goto out;
+ /*
+ * Make sure we clear our orphanized inode's
+ * name from the name cache. This is because the
+ * inode ow_inode might be an ancestor of some
+ * other inode that will be orphanized as well
+ * later and has an inode number greater than
+ * sctx->send_progress. We need to prevent
+ * future name lookups from using the old name
+ * and get instead the orphan name.
+ */
+ nce = name_cache_search(sctx, ow_inode, ow_gen);
+ if (nce) {
+ name_cache_delete(sctx, nce);
+ kfree(nce);
+ }
} else {
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
@@ -5852,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
ret = PTR_ERR(clone_root);
goto out;
}
- clone_sources_to_rollback = i + 1;
spin_lock(&clone_root->root_item_lock);
- clone_root->send_in_progress++;
- if (!btrfs_root_readonly(clone_root)) {
+ if (!btrfs_root_readonly(clone_root) ||
+ btrfs_root_dead(clone_root)) {
spin_unlock(&clone_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -EPERM;
goto out;
}
+ clone_root->send_in_progress++;
spin_unlock(&clone_root->root_item_lock);
srcu_read_unlock(&fs_info->subvol_srcu, index);
sctx->clone_roots[i].root = clone_root;
+ clone_sources_to_rollback = i + 1;
}
vfree(clone_sources_tmp);
clone_sources_tmp = NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 05fef198ff94..9e66f5e724db 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -901,6 +901,15 @@ find_root:
if (IS_ERR(new_root))
return ERR_CAST(new_root);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ int ret;
+ down_read(&fs_info->cleanup_work_sem);
+ ret = btrfs_orphan_cleanup(new_root);
+ up_read(&fs_info->cleanup_work_sem);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
dir_id = btrfs_root_dirid(&new_root->root_item);
setup_root:
location.objectid = dir_id;
@@ -916,7 +925,7 @@ setup_root:
* a reference to the dentry. We will have already gotten a reference
* to the inode in btrfs_fill_super so we're good to go.
*/
- if (!new && sb->s_root->d_inode == inode) {
+ if (!new && d_inode(sb->s_root) == inode) {
iput(inode);
return dget(sb->s_root);
}
@@ -1221,7 +1230,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
root = mount_subtree(mnt, subvol_name);
- if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
+ if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
struct super_block *s = root->d_sb;
dput(root);
root = ERR_PTR(-EINVAL);
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
avail_space = device->total_bytes - device->bytes_used;
/* align with stripe_len */
- do_div(avail_space, BTRFS_STRIPE_LEN);
+ avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
avail_space *= BTRFS_STRIPE_LEN;
/*
@@ -1886,8 +1895,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
/* Mask in the root object ID too, to disambiguate subvols */
- buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
- buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
+ buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
+ buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
return 0;
}
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = {
};
MODULE_ALIAS_FS("btrfs");
+static int btrfs_control_open(struct inode *inode, struct file *file)
+{
+ /*
+ * The control file's private_data is used to hold the
+ * transaction when it is started and is used to keep
+ * track of whether a transaction is already in progress.
+ */
+ file->private_data = NULL;
+ return 0;
+}
+
/*
* used by btrfsctl to scan devices when no FS is mounted
*/
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = {
};
static const struct file_operations btrfs_ctl_fops = {
+ .open = btrfs_control_open,
.unlocked_ioctl = btrfs_control_ioctl,
.compat_ioctl = btrfs_control_ioctl,
.owner = THIS_MODULE,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 94edb0a2a026..e8a4c86d274d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
-static u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[3] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
[FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index f7dd298b3cf6..3a4bbed723fd 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
/* convert from attribute */
-#define to_btrfs_feature_attr(a) \
- container_of(a, struct btrfs_feature_attr, kobj_attr)
-#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
-#define attr_to_btrfs_feature_attr(a) \
- to_btrfs_feature_attr(attr_to_btrfs_attr(a))
+static inline struct btrfs_feature_attr *
+to_btrfs_feature_attr(struct kobj_attribute *a)
+{
+ return container_of(a, struct btrfs_feature_attr, kobj_attr);
+}
+
+static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr)
+{
+ return container_of(attr, struct kobj_attribute, attr);
+}
+
+static inline struct btrfs_feature_attr *
+attr_to_btrfs_feature_attr(struct attribute *attr)
+{
+ return to_btrfs_feature_attr(attr_to_btrfs_attr(attr));
+}
+
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 73f299ebdabb..c32a7ba76bca 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
init_dummy_trans(&trans);
test_msg("Qgroup basic add\n");
- ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL);
+ ret = btrfs_create_qgroup(NULL, fs_info, 5);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root)
test_msg("Qgroup multiple refs test\n");
/* We have 5 created already from the previous test */
- ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL);
+ ret = btrfs_create_qgroup(NULL, fs_info, 256);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8be4278e25e8..5628e25250c0 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,7 +35,7 @@
#define BTRFS_ROOT_TRANS_TAG 0
-static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
+static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U,
[TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE |
__TRANS_START),
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
+ if (transaction->delayed_refs.pending_csums)
+ printk(KERN_ERR "pending csums is %llu\n",
+ transaction->delayed_refs.pending_csums);
while (!list_empty(&transaction->pending_chunks)) {
struct extent_map *em;
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
*/
ASSERT(!waitqueue_active(&state->wq));
free_extent_state(state);
- if (need_resched()) {
- spin_unlock(&tree->lock);
- cond_resched();
- spin_lock(&tree->lock);
- }
+
+ cond_resched_lock(&tree->lock);
}
spin_unlock(&tree->lock);
}
@@ -222,10 +222,12 @@ loop:
atomic_set(&cur_trans->use_count, 2);
cur_trans->have_free_bgs = 0;
cur_trans->start_time = get_seconds();
+ cur_trans->dirty_bg_run = 0;
cur_trans->delayed_refs.href_root = RB_ROOT;
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
cur_trans->delayed_refs.num_heads_ready = 0;
+ cur_trans->delayed_refs.pending_csums = 0;
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
@@ -250,6 +252,9 @@ loop:
INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
+ INIT_LIST_HEAD(&cur_trans->io_bgs);
+ mutex_init(&cur_trans->cache_write_mutex);
+ cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates) {
- err = btrfs_run_delayed_refs(trans, root, updates);
+ err = btrfs_run_delayed_refs(trans, root, updates * 2);
if (err) /* Error code will also eval true */
return err;
}
@@ -1057,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
+ struct list_head *io_bgs = &trans->transaction->io_bgs;
struct list_head *next;
struct extent_buffer *eb;
int ret;
@@ -1110,7 +1116,7 @@ again:
return ret;
}
- while (!list_empty(dirty_bgs)) {
+ while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
ret = btrfs_write_dirty_block_groups(trans, root);
if (ret)
return ret;
@@ -1810,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret;
}
+ if (!cur_trans->dirty_bg_run) {
+ int run_it = 0;
+
+ /* this mutex is also taken before trying to set
+ * block groups readonly. We need to make sure
+ * that nobody has set a block group readonly
+ * after a extents from that block group have been
+ * allocated for cache files. btrfs_set_block_group_ro
+ * will wait for the transaction to commit if it
+ * finds dirty_bg_run = 1
+ *
+ * The dirty_bg_run flag is also used to make sure only
+ * one process starts all the block group IO. It wouldn't
+ * hurt to have more than one go through, but there's no
+ * real advantage to it either.
+ */
+ mutex_lock(&root->fs_info->ro_block_group_mutex);
+ if (!cur_trans->dirty_bg_run) {
+ run_it = 1;
+ cur_trans->dirty_bg_run = 1;
+ }
+ mutex_unlock(&root->fs_info->ro_block_group_mutex);
+
+ if (run_it)
+ ret = btrfs_start_dirty_block_groups(trans, root);
+ }
+ if (ret) {
+ btrfs_end_transaction(trans, root);
+ return ret;
+ }
+
spin_lock(&root->fs_info->trans_lock);
list_splice(&trans->ordered, &cur_trans->pending_ordered);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
@@ -2003,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
assert_qgroups_uptodate(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs));
+ ASSERT(list_empty(&cur_trans->io_bgs));
update_super_roots(root);
btrfs_set_super_log_root(root->fs_info->super_copy, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 937050a2b68e..0b24755596ba 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -64,9 +64,19 @@ struct btrfs_transaction {
struct list_head pending_ordered;
struct list_head switch_commits;
struct list_head dirty_bgs;
+ struct list_head io_bgs;
+ u64 num_dirty_bgs;
+
+ /*
+ * we need to make sure block group deletion doesn't race with
+ * free space cache writeout. This mutex keeps them from stomping
+ * on each other
+ */
+ struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
+ int dirty_bg_run;
};
#define __TRANS_FREEZABLE (1U << 0)
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot {
static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
struct inode *inode)
{
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+ spin_unlock(&BTRFS_I(inode)->lock);
}
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c5b8ba37f88e..d04968374e9d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
if (btrfs_inode_generation(eb, src_item) == 0) {
struct extent_buffer *dst_eb = path->nodes[0];
+ const u64 ino_size = btrfs_inode_size(eb, src_item);
+ /*
+ * For regular files an ino_size == 0 is used only when
+ * logging that an inode exists, as part of a directory
+ * fsync, and the inode wasn't fsynced before. In this
+ * case don't set the size of the inode in the fs/subvol
+ * tree, otherwise we would be throwing valid data away.
+ */
if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
- S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
+ S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
+ ino_size != 0) {
struct btrfs_map_token token;
- u64 ino_size = btrfs_inode_size(eb, src_item);
btrfs_init_map_token(&token);
btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1951,6 +1959,104 @@ out:
return ret;
}
+static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_root *log,
+ struct btrfs_path *path,
+ const u64 ino)
+{
+ struct btrfs_key search_key;
+ struct btrfs_path *log_path;
+ int i;
+ int nritems;
+ int ret;
+
+ log_path = btrfs_alloc_path();
+ if (!log_path)
+ return -ENOMEM;
+
+ search_key.objectid = ino;
+ search_key.type = BTRFS_XATTR_ITEM_KEY;
+ search_key.offset = 0;
+again:
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+process_leaf:
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ for (i = path->slots[0]; i < nritems; i++) {
+ struct btrfs_key key;
+ struct btrfs_dir_item *di;
+ struct btrfs_dir_item *log_di;
+ u32 total_size;
+ u32 cur;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, i);
+ if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
+ ret = 0;
+ goto out;
+ }
+
+ di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
+ total_size = btrfs_item_size_nr(path->nodes[0], i);
+ cur = 0;
+ while (cur < total_size) {
+ u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
+ u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
+ u32 this_len = sizeof(*di) + name_len + data_len;
+ char *name;
+
+ name = kmalloc(name_len, GFP_NOFS);
+ if (!name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ read_extent_buffer(path->nodes[0], name,
+ (unsigned long)(di + 1), name_len);
+
+ log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
+ name, name_len, 0);
+ btrfs_release_path(log_path);
+ if (!log_di) {
+ /* Doesn't exist in log tree, so delete it. */
+ btrfs_release_path(path);
+ di = btrfs_lookup_xattr(trans, root, path, ino,
+ name, name_len, -1);
+ kfree(name);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+ ASSERT(di);
+ ret = btrfs_delete_one_dir_name(trans, root,
+ path, di);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ search_key = key;
+ goto again;
+ }
+ kfree(name);
+ if (IS_ERR(log_di)) {
+ ret = PTR_ERR(log_di);
+ goto out;
+ }
+ cur += this_len;
+ di = (struct btrfs_dir_item *)((char *)di + this_len);
+ }
+ }
+ ret = btrfs_next_leaf(root, path);
+ if (ret > 0)
+ ret = 0;
+ else if (ret == 0)
+ goto process_leaf;
+out:
+ btrfs_free_path(log_path);
+ btrfs_release_path(path);
+ return ret;
+}
+
+
/*
* deletion replay happens before we copy any new directory items
* out of the log or out of backreferences from inodes. It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
inode_item = btrfs_item_ptr(eb, i,
struct btrfs_inode_item);
+ ret = replay_xattr_deletes(wc->trans, root, log,
+ path, key.objectid);
+ if (ret)
+ break;
mode = btrfs_inode_mode(eb, inode_item);
if (S_ISDIR(mode)) {
ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
+ clean_tree_block(trans, root->fs_info,
+ next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
+ clean_tree_block(trans, root->fs_info,
+ next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- clean_tree_block(trans, log, next);
+ clean_tree_block(trans, log->fs_info, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
}
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path,
struct btrfs_path *dst_path, int key_type,
+ struct btrfs_log_ctx *ctx,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
src = path->nodes[0];
nritems = btrfs_header_nritems(src);
for (i = path->slots[0]; i < nritems; i++) {
+ struct btrfs_dir_item *di;
+
btrfs_item_key_to_cpu(src, &min_key, i);
if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
err = ret;
goto done;
}
+
+ /*
+ * We must make sure that when we log a directory entry,
+ * the corresponding inode, after log replay, has a
+ * matching link count. For example:
+ *
+ * touch foo
+ * mkdir mydir
+ * sync
+ * ln foo mydir/bar
+ * xfs_io -c "fsync" mydir
+ * <crash>
+ * <mount fs and log replay>
+ *
+ * Would result in a fsync log that when replayed, our
+ * file inode would have a link count of 1, but we get
+ * two directory entries pointing to the same inode.
+ * After removing one of the names, it would not be
+ * possible to remove the other name, which resulted
+ * always in stale file handle errors, and would not
+ * be possible to rmdir the parent directory, since
+ * its i_size could never decrement to the value
+ * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
+ */
+ di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
+ btrfs_dir_item_key_to_cpu(src, di, &tmp);
+ if (ctx &&
+ (btrfs_dir_transid(src, di) == trans->transid ||
+ btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
+ tmp.type != BTRFS_ROOT_ITEM_KEY)
+ ctx->log_new_dentries = true;
}
path->slots[0] = nritems;
@@ -3175,7 +3321,8 @@ done:
static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path,
- struct btrfs_path *dst_path)
+ struct btrfs_path *dst_path,
+ struct btrfs_log_ctx *ctx)
{
u64 min_key;
u64 max_key;
@@ -3187,7 +3334,7 @@ again:
max_key = 0;
while (1) {
ret = log_dir_items(trans, root, inode, path,
- dst_path, key_type, min_key,
+ dst_path, key_type, ctx, min_key,
&max_key);
if (ret)
return ret;
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
if (ret < 0) {
return ret;
} else if (ret > 0) {
- *size_ret = i_size_read(inode);
+ *size_ret = 0;
} else {
struct btrfs_inode_item *item;
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode)) {
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
- if (inode_only == LOG_INODE_EXISTS) {
- max_key_type = BTRFS_INODE_EXTREF_KEY;
- max_key.type = max_key_type;
- }
+ if (inode_only == LOG_INODE_EXISTS)
+ max_key_type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino, max_key_type);
} else {
if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags)) {
if (inode_only == LOG_INODE_EXISTS) {
- max_key.type = BTRFS_INODE_EXTREF_KEY;
+ max_key.type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
&BTRFS_I(inode)->runtime_flags);
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
- ret = btrfs_truncate_inode_items(trans, log,
- inode, 0, 0);
+ while(1) {
+ ret = btrfs_truncate_inode_items(trans,
+ log, inode, 0, 0);
+ if (ret != -EAGAIN)
+ break;
+ }
}
- } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags) ||
+ } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+ &BTRFS_I(inode)->runtime_flags) ||
inode_only == LOG_INODE_EXISTS) {
- if (inode_only == LOG_INODE_ALL) {
- clear_bit(BTRFS_INODE_COPY_EVERYTHING,
- &BTRFS_I(inode)->runtime_flags);
+ if (inode_only == LOG_INODE_ALL)
fast_search = true;
- max_key.type = BTRFS_XATTR_ITEM_KEY;
- } else {
- max_key.type = BTRFS_INODE_EXTREF_KEY;
- }
+ max_key.type = BTRFS_XATTR_ITEM_KEY;
ret = drop_objectid_items(trans, log, path, ino,
max_key.type);
} else {
@@ -4277,15 +4421,18 @@ log_extents:
}
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
- ret = log_directory_changes(trans, root, inode, path, dst_path);
+ ret = log_directory_changes(trans, root, inode, path, dst_path,
+ ctx);
if (ret) {
err = ret;
goto out_unlock;
}
}
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->logged_trans = trans->transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
+ spin_unlock(&BTRFS_I(inode)->lock);
out_unlock:
if (unlikely(err))
btrfs_put_logged_extents(&logged_list);
@@ -4327,9 +4474,9 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
goto out;
if (!S_ISDIR(inode->i_mode)) {
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+ if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
goto out;
- inode = parent->d_inode;
+ inode = d_inode(parent);
}
while (1) {
@@ -4355,7 +4502,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
break;
}
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+ if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
break;
if (IS_ROOT(parent))
@@ -4364,7 +4511,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
parent = dget_parent(parent);
dput(old_parent);
old_parent = parent;
- inode = parent->d_inode;
+ inode = d_inode(parent);
}
dput(old_parent);
@@ -4372,6 +4519,181 @@ out:
return ret;
}
+struct btrfs_dir_list {
+ u64 ino;
+ struct list_head list;
+};
+
+/*
+ * Log the inodes of the new dentries of a directory. See log_dir_items() for
+ * details about the why it is needed.
+ * This is a recursive operation - if an existing dentry corresponds to a
+ * directory, that directory's new entries are logged too (same behaviour as
+ * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
+ * the dentries point to we do not lock their i_mutex, otherwise lockdep
+ * complains about the following circular lock dependency / possible deadlock:
+ *
+ * CPU0 CPU1
+ * ---- ----
+ * lock(&type->i_mutex_dir_key#3/2);
+ * lock(sb_internal#2);
+ * lock(&type->i_mutex_dir_key#3/2);
+ * lock(&sb->s_type->i_mutex_key#14);
+ *
+ * Where sb_internal is the lock (a counter that works as a lock) acquired by
+ * sb_start_intwrite() in btrfs_start_transaction().
+ * Not locking i_mutex of the inodes is still safe because:
+ *
+ * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
+ * that while logging the inode new references (names) are added or removed
+ * from the inode, leaving the logged inode item with a link count that does
+ * not match the number of logged inode reference items. This is fine because
+ * at log replay time we compute the real number of links and correct the
+ * link count in the inode item (see replay_one_buffer() and
+ * link_to_fixup_dir());
+ *
+ * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
+ * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
+ * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
+ * has a size that doesn't match the sum of the lengths of all the logged
+ * names. This does not result in a problem because if a dir_item key is
+ * logged but its matching dir_index key is not logged, at log replay time we
+ * don't use it to replay the respective name (see replay_one_name()). On the
+ * other hand if only the dir_index key ends up being logged, the respective
+ * name is added to the fs/subvol tree with both the dir_item and dir_index
+ * keys created (see replay_one_name()).
+ * The directory's inode item with a wrong i_size is not a problem as well,
+ * since we don't use it at log replay time to set the i_size in the inode
+ * item of the fs/subvol tree (see overwrite_item()).
+ */
+static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *start_inode,
+ struct btrfs_log_ctx *ctx)
+{
+ struct btrfs_root *log = root->log_root;
+ struct btrfs_path *path;
+ LIST_HEAD(dir_list);
+ struct btrfs_dir_list *dir_elem;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
+ if (!dir_elem) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+ dir_elem->ino = btrfs_ino(start_inode);
+ list_add_tail(&dir_elem->list, &dir_list);
+
+ while (!list_empty(&dir_list)) {
+ struct extent_buffer *leaf;
+ struct btrfs_key min_key;
+ int nritems;
+ int i;
+
+ dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
+ list);
+ if (ret)
+ goto next_dir_inode;
+
+ min_key.objectid = dir_elem->ino;
+ min_key.type = BTRFS_DIR_ITEM_KEY;
+ min_key.offset = 0;
+again:
+ btrfs_release_path(path);
+ ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+ if (ret < 0) {
+ goto next_dir_inode;
+ } else if (ret > 0) {
+ ret = 0;
+ goto next_dir_inode;
+ }
+
+process_leaf:
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ for (i = path->slots[0]; i < nritems; i++) {
+ struct btrfs_dir_item *di;
+ struct btrfs_key di_key;
+ struct inode *di_inode;
+ struct btrfs_dir_list *new_dir_elem;
+ int log_mode = LOG_INODE_EXISTS;
+ int type;
+
+ btrfs_item_key_to_cpu(leaf, &min_key, i);
+ if (min_key.objectid != dir_elem->ino ||
+ min_key.type != BTRFS_DIR_ITEM_KEY)
+ goto next_dir_inode;
+
+ di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
+ type = btrfs_dir_type(leaf, di);
+ if (btrfs_dir_transid(leaf, di) < trans->transid &&
+ type != BTRFS_FT_DIR)
+ continue;
+ btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
+ if (di_key.type == BTRFS_ROOT_ITEM_KEY)
+ continue;
+
+ di_inode = btrfs_iget(root->fs_info->sb, &di_key,
+ root, NULL);
+ if (IS_ERR(di_inode)) {
+ ret = PTR_ERR(di_inode);
+ goto next_dir_inode;
+ }
+
+ if (btrfs_inode_in_log(di_inode, trans->transid)) {
+ iput(di_inode);
+ continue;
+ }
+
+ ctx->log_new_dentries = false;
+ if (type == BTRFS_FT_DIR)
+ log_mode = LOG_INODE_ALL;
+ btrfs_release_path(path);
+ ret = btrfs_log_inode(trans, root, di_inode,
+ log_mode, 0, LLONG_MAX, ctx);
+ iput(di_inode);
+ if (ret)
+ goto next_dir_inode;
+ if (ctx->log_new_dentries) {
+ new_dir_elem = kmalloc(sizeof(*new_dir_elem),
+ GFP_NOFS);
+ if (!new_dir_elem) {
+ ret = -ENOMEM;
+ goto next_dir_inode;
+ }
+ new_dir_elem->ino = di_key.objectid;
+ list_add_tail(&new_dir_elem->list, &dir_list);
+ }
+ break;
+ }
+ if (i == nritems) {
+ ret = btrfs_next_leaf(log, path);
+ if (ret < 0) {
+ goto next_dir_inode;
+ } else if (ret > 0) {
+ ret = 0;
+ goto next_dir_inode;
+ }
+ goto process_leaf;
+ }
+ if (min_key.offset < (u64)-1) {
+ min_key.offset++;
+ goto again;
+ }
+next_dir_inode:
+ list_del(&dir_elem->list);
+ kfree(dir_elem);
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
const struct dentry * const first_parent = parent;
const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
last_committed);
+ bool log_dentries = false;
+ struct inode *orig_inode = inode;
sb = inode->i_sb;
@@ -4449,11 +4773,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_trans;
}
+ if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+ log_dentries = true;
+
while (1) {
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+ if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
break;
- inode = parent->d_inode;
+ inode = d_inode(parent);
if (root != BTRFS_I(inode)->root)
break;
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
dput(old_parent);
old_parent = parent;
}
- ret = 0;
+ if (log_dentries)
+ ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+ else
+ ret = 0;
end_trans:
dput(old_parent);
if (ret < 0) {
@@ -4515,7 +4845,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct dentry *parent = dget_parent(dentry);
int ret;
- ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+ ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
start, end, 0, ctx);
dput(parent);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 154990c26dcb..6916a781ea02 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -29,6 +29,7 @@ struct btrfs_log_ctx {
int log_ret;
int log_transid;
int io_err;
+ bool log_new_dentries;
struct list_head list;
};
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
ctx->log_ret = 0;
ctx->log_transid = 0;
ctx->io_err = 0;
+ ctx->log_new_dentries = false;
INIT_LIST_HEAD(&ctx->list);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8222f6f74147..96aebf3bcd5b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -366,8 +366,8 @@ loop_lock:
btrfsic_submit_bio(cur->bi_rw, cur);
num_run++;
batch_run++;
- if (need_resched())
- cond_resched();
+
+ cond_resched();
/*
* we made progress, there is more work to do and the bdi
@@ -400,8 +400,7 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
- if (need_resched())
- cond_resched();
+ cond_resched();
continue;
}
spin_lock(&device->io_lock);
@@ -609,8 +608,7 @@ error:
return ERR_PTR(-ENOMEM);
}
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
- struct btrfs_fs_devices *fs_devices, int step)
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
{
struct btrfs_device *device, *next;
struct btrfs_device *latest_dev = NULL;
@@ -1060,6 +1058,7 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
struct extent_map *em;
struct list_head *search_list = &trans->transaction->pending_chunks;
int ret = 0;
+ u64 physical_start = *start;
again:
list_for_each_entry(em, search_list, list) {
@@ -1070,9 +1069,9 @@ again:
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev != device)
continue;
- if (map->stripes[i].physical >= *start + len ||
+ if (map->stripes[i].physical >= physical_start + len ||
map->stripes[i].physical + em->orig_block_len <=
- *start)
+ physical_start)
continue;
*start = map->stripes[i].physical +
em->orig_block_len;
@@ -1136,11 +1135,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
-again:
+
max_hole_start = search_start;
max_hole_size = 0;
- hole_size = 0;
+again:
if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
ret = -ENOSPC;
goto out;
@@ -1195,8 +1194,14 @@ again:
*/
if (contains_pending_extent(trans, device,
&search_start,
- hole_size))
- hole_size = 0;
+ hole_size)) {
+ if (key.offset >= search_start) {
+ hole_size = key.offset - search_start;
+ } else {
+ WARN_ON_ONCE(1);
+ hole_size = 0;
+ }
+ }
if (hole_size > max_hole_size) {
max_hole_start = search_start;
@@ -1233,21 +1238,23 @@ next:
* allocated dev extents, and when shrinking the device,
* search_end may be smaller than search_start.
*/
- if (search_end > search_start)
+ if (search_end > search_start) {
hole_size = search_end - search_start;
- if (hole_size > max_hole_size) {
- max_hole_start = search_start;
- max_hole_size = hole_size;
- }
+ if (contains_pending_extent(trans, device, &search_start,
+ hole_size)) {
+ btrfs_release_path(path);
+ goto again;
+ }
- if (contains_pending_extent(trans, device, &search_start, hole_size)) {
- btrfs_release_path(path);
- goto again;
+ if (hole_size > max_hole_size) {
+ max_hole_start = search_start;
+ max_hole_size = hole_size;
+ }
}
/* See above. */
- if (hole_size < num_bytes)
+ if (max_hole_size < num_bytes)
ret = -ENOSPC;
else
ret = 0;
@@ -2487,8 +2494,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
}
static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 chunk_tree, u64 chunk_objectid,
+ struct btrfs_root *root, u64 chunk_objectid,
u64 chunk_offset)
{
int ret;
@@ -2580,7 +2586,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct map_lookup *map;
u64 dev_extent_len = 0;
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- u64 chunk_tree = root->fs_info->chunk_root->objectid;
int i, ret = 0;
/* Just in case */
@@ -2634,8 +2639,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
}
}
- ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
- chunk_offset);
+ ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out;
@@ -2664,8 +2668,8 @@ out:
}
static int btrfs_relocate_chunk(struct btrfs_root *root,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset)
+ u64 chunk_objectid,
+ u64 chunk_offset)
{
struct btrfs_root *extent_root;
struct btrfs_trans_handle *trans;
@@ -2707,7 +2711,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
struct btrfs_chunk *chunk;
struct btrfs_key key;
struct btrfs_key found_key;
- u64 chunk_tree = chunk_root->root_key.objectid;
u64 chunk_type;
bool retried = false;
int failed = 0;
@@ -2744,7 +2747,7 @@ again:
btrfs_release_path(path);
if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
- ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
+ ret = btrfs_relocate_chunk(chunk_root,
found_key.objectid,
found_key.offset);
if (ret == -ENOSPC)
@@ -3022,7 +3025,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
stripe_offset = btrfs_stripe_offset(leaf, stripe);
stripe_length = btrfs_chunk_length(leaf, chunk);
- do_div(stripe_length, factor);
+ stripe_length = div_u64(stripe_length, factor);
if (stripe_offset < bargs->pend &&
stripe_offset + stripe_length > bargs->pstart)
@@ -3255,7 +3258,6 @@ again:
}
ret = btrfs_relocate_chunk(chunk_root,
- chunk_root->root_key.objectid,
found_key.objectid,
found_key.offset);
if (ret && ret != -ENOSPC)
@@ -3957,7 +3959,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
struct btrfs_dev_extent *dev_extent = NULL;
struct btrfs_path *path;
u64 length;
- u64 chunk_tree;
u64 chunk_objectid;
u64 chunk_offset;
int ret;
@@ -4027,13 +4028,11 @@ again:
break;
}
- chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
btrfs_release_path(path);
- ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
- chunk_offset);
+ ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
if (ret && ret != -ENOSPC)
goto done;
if (ret == -ENOSPC)
@@ -4131,7 +4130,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
return 0;
}
-static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
.sub_stripes = 2,
.dev_stripes = 1,
@@ -4289,7 +4288,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
max_chunk_size);
- devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+ devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
GFP_NOFS);
if (!devices_info)
return -ENOMEM;
@@ -4400,8 +4399,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
*/
if (stripe_size * data_stripes > max_chunk_size) {
u64 mask = (1ULL << 24) - 1;
- stripe_size = max_chunk_size;
- do_div(stripe_size, data_stripes);
+
+ stripe_size = div_u64(max_chunk_size, data_stripes);
/* bump the answer up to a 16MB boundary */
stripe_size = (stripe_size + mask) & ~mask;
@@ -4413,10 +4412,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
stripe_size = devices_info[ndevs-1].max_avail;
}
- do_div(stripe_size, dev_stripes);
+ stripe_size = div_u64(stripe_size, dev_stripes);
/* align to BTRFS_STRIPE_LEN */
- do_div(stripe_size, raid_stripe_len);
+ stripe_size = div_u64(stripe_size, raid_stripe_len);
stripe_size *= raid_stripe_len;
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4954,7 +4953,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 stripe_nr_orig;
u64 stripe_nr_end;
u64 stripe_len;
- int stripe_index;
+ u32 stripe_index;
int i;
int ret = 0;
int num_stripes;
@@ -4995,7 +4994,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* stripe_nr counts the total number of stripes we have to stride
* to get to this block
*/
- do_div(stripe_nr, stripe_len);
+ stripe_nr = div64_u64(stripe_nr, stripe_len);
stripe_offset = stripe_nr * stripe_len;
BUG_ON(offset < stripe_offset);
@@ -5011,7 +5010,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* allow a write of a full stripe, but make sure we don't
* allow straddling of stripes
*/
- do_div(raid56_full_stripe_start, full_stripe_len);
+ raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+ full_stripe_len);
raid56_full_stripe_start *= full_stripe_len;
}
@@ -5136,7 +5136,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_index = 0;
stripe_nr_orig = stripe_nr;
stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
- do_div(stripe_nr_end, map->stripe_len);
+ stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
stripe_end_offset = stripe_nr_end * map->stripe_len -
(offset + *length);
@@ -5144,7 +5144,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & REQ_DISCARD)
num_stripes = min_t(u64, map->num_stripes,
stripe_nr_end - stripe_nr_orig);
- stripe_index = do_div(stripe_nr, map->num_stripes);
+ stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+ &stripe_index);
if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
@@ -5170,9 +5171,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- int factor = map->num_stripes / map->sub_stripes;
+ u32 factor = map->num_stripes / map->sub_stripes;
- stripe_index = do_div(stripe_nr, factor);
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
stripe_index *= map->sub_stripes;
if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
@@ -5198,8 +5199,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */
- stripe_nr = raid56_full_stripe_start;
- do_div(stripe_nr, stripe_len * nr_data_stripes(map));
+ stripe_nr = div_u64(raid56_full_stripe_start,
+ stripe_len * nr_data_stripes(map));
/* RAID[56] write or recovery. Return all stripes */
num_stripes = map->num_stripes;
@@ -5209,32 +5210,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_index = 0;
stripe_offset = 0;
} else {
- u64 tmp;
-
/*
* Mirror #0 or #1 means the original data block.
* Mirror #2 is RAID5 parity block.
* Mirror #3 is RAID6 Q block.
*/
- stripe_index = do_div(stripe_nr, nr_data_stripes(map));
+ stripe_nr = div_u64_rem(stripe_nr,
+ nr_data_stripes(map), &stripe_index);
if (mirror_num > 1)
stripe_index = nr_data_stripes(map) +
mirror_num - 2;
/* We distribute the parity blocks across stripes */
- tmp = stripe_nr + stripe_index;
- stripe_index = do_div(tmp, map->num_stripes);
+ div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
+ &stripe_index);
if (!(rw & (REQ_WRITE | REQ_DISCARD |
REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
mirror_num = 1;
}
} else {
/*
- * after this do_div call, stripe_nr is the number of stripes
- * on this device we have to walk to find the data, and
- * stripe_index is the number of our device in the stripe array
+ * after this, stripe_nr is the number of stripes on this
+ * device we have to walk to find the data, and stripe_index is
+ * the number of our device in the stripe array
*/
- stripe_index = do_div(stripe_nr, map->num_stripes);
+ stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+ &stripe_index);
mirror_num = stripe_index + 1;
}
BUG_ON(stripe_index >= map->num_stripes);
@@ -5261,7 +5262,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
u64 tmp;
- int i, rot;
+ unsigned rot;
bbio->raid_map = (u64 *)((void *)bbio->stripes +
sizeof(struct btrfs_bio_stripe) *
@@ -5269,8 +5270,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
sizeof(int) * tgtdev_indexes);
/* Work out the disk rotation on this stripe-set */
- tmp = stripe_nr;
- rot = do_div(tmp, num_stripes);
+ div_u64_rem(stripe_nr, num_stripes, &rot);
/* Fill in the logical address of each stripe */
tmp = stripe_nr * nr_data_stripes(map);
@@ -5285,8 +5285,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
if (rw & REQ_DISCARD) {
- int factor = 0;
- int sub_stripes = 0;
+ u32 factor = 0;
+ u32 sub_stripes = 0;
u64 stripes_per_dev = 0;
u32 remaining_stripes = 0;
u32 last_stripe = 0;
@@ -5437,9 +5437,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
if (found) {
- u64 length = map->stripe_len;
-
- if (physical_of_found + length <=
+ if (physical_of_found + map->stripe_len <=
dev_replace->cursor_left) {
struct btrfs_bio_stripe *tgtdev_stripe =
bbio->stripes + num_stripes;
@@ -5535,15 +5533,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
rmap_len = map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
- do_div(length, map->num_stripes / map->sub_stripes);
+ length = div_u64(length, map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
- do_div(length, map->num_stripes);
+ length = div_u64(length, map->num_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- do_div(length, nr_data_stripes(map));
+ length = div_u64(length, nr_data_stripes(map));
rmap_len = map->stripe_len * nr_data_stripes(map);
}
- buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+ buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
BUG_ON(!buf); /* -ENOMEM */
for (i = 0; i < map->num_stripes; i++) {
@@ -5554,11 +5552,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
continue;
stripe_nr = physical - map->stripes[i].physical;
- do_div(stripe_nr, map->stripe_len);
+ stripe_nr = div_u64(stripe_nr, map->stripe_len);
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
stripe_nr = stripe_nr * map->num_stripes + i;
- do_div(stripe_nr, map->sub_stripes);
+ stripe_nr = div_u64(stripe_nr, map->sub_stripes);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = stripe_nr * map->num_stripes + i;
} /* else if RAID[56], multiply by nr_data_stripes().
@@ -5835,8 +5833,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
u64 length = 0;
u64 map_length;
int ret;
- int dev_nr = 0;
- int total_devs = 1;
+ int dev_nr;
+ int total_devs;
struct btrfs_bio *bbio = NULL;
length = bio->bi_iter.bi_size;
@@ -5877,11 +5875,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
BUG();
}
- while (dev_nr < total_devs) {
+ for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
dev = bbio->stripes[dev_nr].dev;
if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
bbio_error(bbio, first_bio, logical);
- dev_nr++;
continue;
}
@@ -5894,7 +5891,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
ret = breakup_stripe_bio(root, bbio, first_bio, dev,
dev_nr, rw, async_submit);
BUG_ON(ret);
- dev_nr++;
continue;
}
@@ -5909,7 +5905,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
submit_stripe_bio(root, bbio, bio,
bbio->stripes[dev_nr].physical, dev_nr, rw,
async_submit);
- dev_nr++;
}
btrfs_bio_counter_dec(root->fs_info);
return 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 83069dec6898..ebc31331a837 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
- struct btrfs_fs_devices *fs_devices, int step);
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 883b93623bc5..6f518c90e1c1 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -261,7 +261,7 @@ out:
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct btrfs_key key, found_key;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -364,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
/*
* Check if the attribute is in a supported namespace.
*
- * This applied after the check for the synthetic attributes in the system
+ * This is applied after the check for the synthetic attributes in the system
* namespace.
*/
-static bool btrfs_is_valid_xattr(const char *name)
+static int btrfs_is_valid_xattr(const char *name)
{
- return !strncmp(name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN) ||
- !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
- !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
- !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
- !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
+ int len = strlen(name);
+ int prefixlen = 0;
+
+ if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN))
+ prefixlen = XATTR_SECURITY_PREFIX_LEN;
+ else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ prefixlen = XATTR_SYSTEM_PREFIX_LEN;
+ else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+ prefixlen = XATTR_TRUSTED_PREFIX_LEN;
+ else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ prefixlen = XATTR_USER_PREFIX_LEN;
+ else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+ prefixlen = XATTR_BTRFS_PREFIX_LEN;
+ else
+ return -EOPNOTSUPP;
+
+ /*
+ * The name cannot consist of just prefix
+ */
+ if (len <= prefixlen)
+ return -EINVAL;
+
+ return 0;
}
ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
+ int ret;
+
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
@@ -388,15 +408,17 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_getxattr(dentry, name, buffer, size);
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
- return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
+ ret = btrfs_is_valid_xattr(name);
+ if (ret)
+ return ret;
+ return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
}
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
- struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+ int ret;
/*
* The permission on security.* and system.* is not checked
@@ -413,23 +435,25 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
+ ret = btrfs_is_valid_xattr(name);
+ if (ret)
+ return ret;
if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(dentry->d_inode, name,
+ return btrfs_set_prop(d_inode(dentry), name,
value, size, flags);
if (size == 0)
value = ""; /* empty EA, do not remove */
- return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
+ return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
- struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+ struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+ int ret;
/*
* The permission on security.* and system.* is not checked
@@ -446,14 +470,15 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_removexattr(dentry, name);
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
+ ret = btrfs_is_valid_xattr(name);
+ if (ret)
+ return ret;
if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(dentry->d_inode, name,
+ return btrfs_set_prop(d_inode(dentry), name,
NULL, 0, XATTR_REPLACE);
- return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
+ return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
XATTR_REPLACE);
}
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index fb22fd8d8fb8..82990b8f872b 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -403,7 +403,7 @@ next:
return ret;
}
-struct btrfs_compress_op btrfs_zlib_compress = {
+const struct btrfs_compress_op btrfs_zlib_compress = {
.alloc_workspace = zlib_alloc_workspace,
.free_workspace = zlib_free_workspace,
.compress_pages = zlib_compress_pages,
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
- if (ret)
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ if (ret && TestClearPageDirty(page))
+ account_page_cleaned(page, mapping);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index fbb08e97438d..6af790fc3df8 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -123,11 +123,11 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
/* check parameters */
ret = -EOPNOTSUPP;
- if (!root->d_inode ||
- !root->d_inode->i_op->lookup ||
- !root->d_inode->i_op->mkdir ||
- !root->d_inode->i_op->setxattr ||
- !root->d_inode->i_op->getxattr ||
+ if (d_is_negative(root) ||
+ !d_backing_inode(root)->i_op->lookup ||
+ !d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->setxattr ||
+ !d_backing_inode(root)->i_op->getxattr ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs)
goto error_unsupported;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 232426214fdd..afa023dded5b 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -441,12 +441,12 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
fscache_set_store_limit(&object->fscache, ni_size);
- oi_size = i_size_read(object->backer->d_inode);
+ oi_size = i_size_read(d_backing_inode(object->backer));
if (oi_size == ni_size)
return 0;
cachefiles_begin_secure(cache, &saved_cred);
- mutex_lock(&object->backer->d_inode->i_mutex);
+ mutex_lock(&d_inode(object->backer)->i_mutex);
/* if there's an extension to a partial page at the end of the backing
* file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
ret = notify_change(object->backer, &newattrs, NULL);
truncate_failed:
- mutex_unlock(&object->backer->d_inode->i_mutex);
+ mutex_unlock(&d_inode(object->backer)->i_mutex);
cachefiles_end_secure(cache, saved_cred);
if (ret == -EIO) {
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1e51714eb33e..ab857ab9f40d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -286,13 +286,13 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
} else {
- ret = vfs_unlink(dir->d_inode, rep, NULL);
+ ret = vfs_unlink(d_inode(dir), rep, NULL);
if (preemptive)
cachefiles_mark_object_buried(cache, rep);
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
if (ret == -EIO)
cachefiles_io_error(cache, "Unlink failed");
@@ -303,7 +303,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
try_again:
/* first step is to make up a grave dentry in the graveyard */
@@ -355,7 +355,7 @@ try_again:
return -EIO;
}
- if (grave->d_inode) {
+ if (d_is_positive(grave)) {
unlock_rename(cache->graveyard, dir);
dput(grave);
grave = NULL;
@@ -387,8 +387,8 @@ try_again:
if (ret < 0) {
cachefiles_io_error(cache, "Rename security error %d", ret);
} else {
- ret = vfs_rename(dir->d_inode, rep,
- cache->graveyard->d_inode, grave, NULL, 0);
+ ret = vfs_rename(d_inode(dir), rep,
+ d_inode(cache->graveyard), grave, NULL, 0);
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache,
"Rename failed with error %d", ret);
@@ -415,18 +415,18 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
_enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
ASSERT(object->dentry);
- ASSERT(object->dentry->d_inode);
+ ASSERT(d_backing_inode(object->dentry));
ASSERT(object->dentry->d_parent);
dir = dget_parent(object->dentry);
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
/* object allocation for the same key preemptively deleted this
* object's file so that it could create its own file */
_debug("object preemptively buried");
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
ret = 0;
} else {
/* we need to check that our parent is _still_ our parent - it
@@ -438,7 +438,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
/* it got moved, presumably by cachefilesd culling it,
* so it's no longer in the key path and we can ignore
* it */
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
ret = 0;
}
}
@@ -473,7 +473,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
path.mnt = cache->mnt;
ASSERT(parent->dentry);
- ASSERT(parent->dentry->d_inode);
+ ASSERT(d_backing_inode(parent->dentry));
if (!(d_is_dir(parent->dentry))) {
// TODO: convert file to dir
@@ -497,7 +497,7 @@ lookup_again:
/* search the current directory for the element name */
_debug("lookup '%s'", name);
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
start = jiffies;
next = lookup_one_len(name, dir, nlen);
@@ -505,21 +505,21 @@ lookup_again:
if (IS_ERR(next))
goto lookup_error;
- _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+ _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative");
if (!key)
- object->new = !next->d_inode;
+ object->new = !d_backing_inode(next);
/* if this element of the path doesn't exist, then the lookup phase
* failed, and we can release any readers in the certain knowledge that
* there's nothing for them to actually read */
- if (!next->d_inode)
+ if (d_is_negative(next))
fscache_object_lookup_negative(&object->fscache);
/* we need to create the object if it's negative */
if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
/* index objects and intervening tree levels must be subdirs */
- if (!next->d_inode) {
+ if (d_is_negative(next)) {
ret = cachefiles_has_space(cache, 1, 0);
if (ret < 0)
goto create_error;
@@ -529,26 +529,26 @@ lookup_again:
if (ret < 0)
goto create_error;
start = jiffies;
- ret = vfs_mkdir(dir->d_inode, next, 0);
+ ret = vfs_mkdir(d_inode(dir), next, 0);
cachefiles_hist(cachefiles_mkdir_histogram, start);
if (ret < 0)
goto create_error;
- ASSERT(next->d_inode);
+ ASSERT(d_backing_inode(next));
_debug("mkdir -> %p{%p{ino=%lu}}",
- next, next->d_inode, next->d_inode->i_ino);
+ next, d_backing_inode(next), d_backing_inode(next)->i_ino);
} else if (!d_can_lookup(next)) {
pr_err("inode %lu is not a directory\n",
- next->d_inode->i_ino);
+ d_backing_inode(next)->i_ino);
ret = -ENOBUFS;
goto error;
}
} else {
/* non-index objects start out life as files */
- if (!next->d_inode) {
+ if (d_is_negative(next)) {
ret = cachefiles_has_space(cache, 1, 0);
if (ret < 0)
goto create_error;
@@ -558,21 +558,21 @@ lookup_again:
if (ret < 0)
goto create_error;
start = jiffies;
- ret = vfs_create(dir->d_inode, next, S_IFREG, true);
+ ret = vfs_create(d_inode(dir), next, S_IFREG, true);
cachefiles_hist(cachefiles_create_histogram, start);
if (ret < 0)
goto create_error;
- ASSERT(next->d_inode);
+ ASSERT(d_backing_inode(next));
_debug("create -> %p{%p{ino=%lu}}",
- next, next->d_inode, next->d_inode->i_ino);
+ next, d_backing_inode(next), d_backing_inode(next)->i_ino);
} else if (!d_can_lookup(next) &&
!d_is_reg(next)
) {
pr_err("inode %lu is not a file or directory\n",
- next->d_inode->i_ino);
+ d_backing_inode(next)->i_ino);
ret = -ENOBUFS;
goto error;
}
@@ -581,7 +581,7 @@ lookup_again:
/* process the next component */
if (key) {
_debug("advance");
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(dir);
dir = next;
next = NULL;
@@ -617,7 +617,7 @@ lookup_again:
/* note that we're now using this object */
ret = cachefiles_mark_object_active(cache, object);
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(dir);
dir = NULL;
@@ -646,7 +646,7 @@ lookup_again:
const struct address_space_operations *aops;
ret = -EPERM;
- aops = object->dentry->d_inode->i_mapping->a_ops;
+ aops = d_backing_inode(object->dentry)->i_mapping->a_ops;
if (!aops->bmap)
goto check_error;
@@ -659,7 +659,7 @@ lookup_again:
object->new = 0;
fscache_obtained_object(&object->fscache);
- _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+ _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino);
return 0;
create_error:
@@ -695,7 +695,7 @@ lookup_error:
cachefiles_io_error(cache, "Lookup failed");
next = NULL;
error:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(next);
error_out2:
dput(dir);
@@ -719,7 +719,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_enter(",,%s", dirname);
/* search the current directory for the element name */
- mutex_lock(&dir->d_inode->i_mutex);
+ mutex_lock(&d_inode(dir)->i_mutex);
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -731,10 +731,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
}
_debug("subdir -> %p %s",
- subdir, subdir->d_inode ? "positive" : "negative");
+ subdir, d_backing_inode(subdir) ? "positive" : "negative");
/* we need to create the subdir if it doesn't exist yet */
- if (!subdir->d_inode) {
+ if (d_is_negative(subdir)) {
ret = cachefiles_has_space(cache, 1, 0);
if (ret < 0)
goto mkdir_error;
@@ -746,22 +746,22 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
ret = security_path_mkdir(&path, subdir, 0700);
if (ret < 0)
goto mkdir_error;
- ret = vfs_mkdir(dir->d_inode, subdir, 0700);
+ ret = vfs_mkdir(d_inode(dir), subdir, 0700);
if (ret < 0)
goto mkdir_error;
- ASSERT(subdir->d_inode);
+ ASSERT(d_backing_inode(subdir));
_debug("mkdir -> %p{%p{ino=%lu}}",
subdir,
- subdir->d_inode,
- subdir->d_inode->i_ino);
+ d_backing_inode(subdir),
+ d_backing_inode(subdir)->i_ino);
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
/* we need to make sure the subdir is a directory */
- ASSERT(subdir->d_inode);
+ ASSERT(d_backing_inode(subdir));
if (!d_can_lookup(subdir)) {
pr_err("%s is not a directory\n", dirname);
@@ -770,18 +770,18 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
}
ret = -EPERM;
- if (!subdir->d_inode->i_op->setxattr ||
- !subdir->d_inode->i_op->getxattr ||
- !subdir->d_inode->i_op->lookup ||
- !subdir->d_inode->i_op->mkdir ||
- !subdir->d_inode->i_op->create ||
- (!subdir->d_inode->i_op->rename &&
- !subdir->d_inode->i_op->rename2) ||
- !subdir->d_inode->i_op->rmdir ||
- !subdir->d_inode->i_op->unlink)
+ if (!d_backing_inode(subdir)->i_op->setxattr ||
+ !d_backing_inode(subdir)->i_op->getxattr ||
+ !d_backing_inode(subdir)->i_op->lookup ||
+ !d_backing_inode(subdir)->i_op->mkdir ||
+ !d_backing_inode(subdir)->i_op->create ||
+ (!d_backing_inode(subdir)->i_op->rename &&
+ !d_backing_inode(subdir)->i_op->rename2) ||
+ !d_backing_inode(subdir)->i_op->rmdir ||
+ !d_backing_inode(subdir)->i_op->unlink)
goto check_error;
- _leave(" = [%lu]", subdir->d_inode->i_ino);
+ _leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
return subdir;
check_error:
@@ -790,19 +790,19 @@ check_error:
return ERR_PTR(ret);
mkdir_error:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
lookup_error:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
ret = PTR_ERR(subdir);
pr_err("Lookup %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
nomem_d_alloc:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
@@ -827,7 +827,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
// dir, filename);
/* look up the victim */
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
start = jiffies;
victim = lookup_one_len(filename, dir, strlen(filename));
@@ -836,13 +836,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
goto lookup_error;
//_debug("victim -> %p %s",
- // victim, victim->d_inode ? "positive" : "negative");
+ // victim, d_backing_inode(victim) ? "positive" : "negative");
/* if the object is no longer there then we probably retired the object
* at the netfs's request whilst the cull was in progress
*/
- if (!victim->d_inode) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ if (d_is_negative(victim)) {
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(victim);
_leave(" = -ENOENT [absent]");
return ERR_PTR(-ENOENT);
@@ -871,13 +871,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
object_in_use:
read_unlock(&cache->active_lock);
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(victim);
//_leave(" = -EBUSY [in use]");
return ERR_PTR(-EBUSY);
lookup_error:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
ret = PTR_ERR(victim);
if (ret == -ENOENT) {
/* file or dir now absent - probably retired by netfs */
@@ -913,7 +913,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
return PTR_ERR(victim);
_debug("victim -> %p %s",
- victim, victim->d_inode ? "positive" : "negative");
+ victim, d_backing_inode(victim) ? "positive" : "negative");
/* okay... the victim is not being used so we can cull it
* - start by marking it as stale
@@ -936,7 +936,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
return 0;
error_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
error:
dput(victim);
if (ret == -ENOENT) {
@@ -971,7 +971,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
if (IS_ERR(victim))
return PTR_ERR(victim);
- mutex_unlock(&dir->d_inode->i_mutex);
+ mutex_unlock(&d_inode(dir)->i_mutex);
dput(victim);
//_leave(" = 0");
return 0;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c6cd8d7a4eef..3cbb0e834694 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -74,12 +74,12 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
static int cachefiles_read_reissue(struct cachefiles_object *object,
struct cachefiles_one_read *monitor)
{
- struct address_space *bmapping = object->backer->d_inode->i_mapping;
+ struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
struct page *backpage = monitor->back_page, *backpage2;
int ret;
_enter("{ino=%lx},{%lx,%lx}",
- object->backer->d_inode->i_ino,
+ d_backing_inode(object->backer)->i_ino,
backpage->index, backpage->flags);
/* skip if the page was truncated away completely */
@@ -157,7 +157,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
object = container_of(op->op.object,
struct cachefiles_object, fscache);
- _enter("{ino=%lu}", object->backer->d_inode->i_ino);
+ _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino);
max = 8;
spin_lock_irq(&object->work_lock);
@@ -247,7 +247,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
/* attempt to get hold of the backing page */
- bmapping = object->backer->d_inode->i_mapping;
+ bmapping = d_backing_inode(object->backer)->i_mapping;
newpage = NULL;
for (;;) {
@@ -408,7 +408,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
if (!object->backer)
goto enobufs;
- inode = object->backer->d_inode;
+ inode = d_backing_inode(object->backer);
ASSERT(S_ISREG(inode->i_mode));
ASSERT(inode->i_mapping->a_ops->bmap);
ASSERT(inode->i_mapping->a_ops->readpages);
@@ -468,7 +468,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
struct list_head *list)
{
struct cachefiles_one_read *monitor = NULL;
- struct address_space *bmapping = object->backer->d_inode->i_mapping;
+ struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
int ret = 0;
@@ -705,7 +705,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
space = 0;
- inode = object->backer->d_inode;
+ inode = d_backing_inode(object->backer);
ASSERT(S_ISREG(inode->i_mode));
ASSERT(inode->i_mapping->a_ops->bmap);
ASSERT(inode->i_mapping->a_ops->readpages);
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index 396c18ea2764..31bbc0528b11 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -55,14 +55,14 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
{
int ret;
- ret = security_inode_mkdir(root->d_inode, root, 0);
+ ret = security_inode_mkdir(d_backing_inode(root), root, 0);
if (ret < 0) {
pr_err("Security denies permission to make dirs: error %d",
ret);
return ret;
}
- ret = security_inode_create(root->d_inode, root, 0);
+ ret = security_inode_create(d_backing_inode(root), root, 0);
if (ret < 0)
pr_err("Security denies permission to create files: error %d",
ret);
@@ -95,7 +95,7 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
/* use the cache root dir's security context as the basis with
* which create files */
- ret = set_create_files_as(new, root->d_inode);
+ ret = set_create_files_as(new, d_backing_inode(root));
if (ret < 0) {
abort_creds(new);
cachefiles_begin_secure(cache, _saved_cred);
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index a8a68745e11d..d31c1a72d8a5 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -33,7 +33,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
int ret;
ASSERT(dentry);
- ASSERT(dentry->d_inode);
+ ASSERT(d_backing_inode(dentry));
if (!object->fscache.cookie)
strcpy(type, "C3");
@@ -52,7 +52,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
if (ret != -EEXIST) {
pr_err("Can't set xattr on %pd [%lu] (err %d)\n",
- dentry, dentry->d_inode->i_ino,
+ dentry, d_backing_inode(dentry)->i_ino,
-ret);
goto error;
}
@@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
goto bad_type_length;
pr_err("Can't read xattr on %pd [%lu] (err %d)\n",
- dentry, dentry->d_inode->i_ino,
+ dentry, d_backing_inode(dentry)->i_ino,
-ret);
goto error;
}
@@ -84,14 +84,14 @@ error:
bad_type_length:
pr_err("Cache object %lu type xattr length incorrect\n",
- dentry->d_inode->i_ino);
+ d_backing_inode(dentry)->i_ino);
ret = -EIO;
goto error;
bad_type:
xtype[2] = 0;
pr_err("Cache object %pd [%lu] type %s not %s\n",
- dentry, dentry->d_inode->i_ino,
+ dentry, d_backing_inode(dentry)->i_ino,
xtype, type);
ret = -EIO;
goto error;
@@ -165,7 +165,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
int ret;
ASSERT(dentry);
- ASSERT(dentry->d_inode);
+ ASSERT(d_backing_inode(dentry));
ASSERT(object->fscache.cookie->def->check_aux);
auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
@@ -204,7 +204,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
_enter("%p,#%d", object, auxdata->len);
ASSERT(dentry);
- ASSERT(dentry->d_inode);
+ ASSERT(d_backing_inode(dentry));
auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp);
if (!auxbuf) {
@@ -225,7 +225,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
cachefiles_io_error_obj(object,
"Can't read xattr on %lu (err %d)",
- dentry->d_inode->i_ino, -ret);
+ d_backing_inode(dentry)->i_ino, -ret);
goto error;
}
@@ -276,7 +276,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
cachefiles_io_error_obj(object,
"Can't update xattr on %lu"
" (error %d)",
- dentry->d_inode->i_ino, -ret);
+ d_backing_inode(dentry)->i_ino, -ret);
goto error;
}
}
@@ -291,7 +291,7 @@ error:
bad_type_length:
pr_err("Cache object %lu xattr length incorrect\n",
- dentry->d_inode->i_ino);
+ d_backing_inode(dentry)->i_ino);
ret = -EIO;
goto error;
@@ -316,7 +316,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
cachefiles_io_error(cache,
"Can't remove xattr from %lu"
" (error %d)",
- dentry->d_inode->i_ino, -ret);
+ d_backing_inode(dentry)->i_ino, -ret);
}
_leave(" = %d", ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fd5599d32362..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
inode, page, (int)pos, (int)len);
r = ceph_update_writeable_page(file, pos, len, page);
+ if (r < 0)
+ page_cache_release(page);
+ else
+ *pagep = page;
} while (r == -EAGAIN);
return r;
@@ -1198,8 +1202,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
* intercept O_DIRECT reads and writes early, this function should
* never get called.
*/
-static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter,
loff_t pos)
{
WARN_ON(1);
@@ -1535,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
- err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version),
- CEPH_OSD_CMPXATTR_OP_GT,
- CEPH_OSD_CMPXATTR_MODE_U64);
- if (err)
- goto out_put;
-
- err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version), 0, 0);
- if (err)
- goto out_put;
+ {
+ __le64 xattr_buf = cpu_to_le64(inline_version);
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &xattr_buf,
+ sizeof(xattr_buf),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+ }
+
+ {
+ char xattr_buf[32];
+ int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
+ "%llu", inline_version);
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version",
+ xattr_buf, xattr_len, 0, 0);
+ if (err)
+ goto out_put;
+ }
ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..be5ea6af8366 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
return ret;
}
+static void drop_inode_snap_realm(struct ceph_inode_info *ci)
+{
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ ci->i_snap_realm_counter++;
+ ci->i_snap_realm = NULL;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
+ realm);
+}
+
/*
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
*
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed)
ceph_put_cap(mdsc, cap);
- if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
- struct ceph_snap_realm *realm = ci->i_snap_realm;
- spin_lock(&realm->inodes_with_caps_lock);
- list_del_init(&ci->i_snap_realm_item);
- ci->i_snap_realm_counter++;
- ci->i_snap_realm = NULL;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_put_snap_realm(mdsc, realm);
- }
+ /* when reconnect denied, we remove session caps forcibly,
+ * i_wr_ref can be non-zero. If there are ongoing write,
+ * keep i_snap_realm.
+ */
+ if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
+
if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci);
}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int was = ci->i_dirty_caps;
int dirty = 0;
+ if (!ci->i_auth_cap) {
+ pr_warn("__mark_dirty_caps %p %llx mask %s, "
+ "but no auth cap (session was closed?)\n",
+ inode, ceph_ino(inode), ceph_cap_string(mask));
+ return 0;
+ }
+
dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
ceph_cap_string(mask), ceph_cap_string(was),
ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ci->i_snap_realm->cached_context);
dout(" inode %p now dirty snapc %p auth cap %p\n",
&ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
- WARN_ON(!ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
if (!mdsc->stopping && inode->i_nlink > 0) {
if (want) {
retain |= CEPH_CAP_ANY; /* be greedy */
+ } else if (S_ISDIR(inode->i_mode) &&
+ (issued & CEPH_CAP_FILE_SHARED) &&
+ __ceph_dir_is_complete(ci)) {
+ /*
+ * If a directory is complete, we want to keep
+ * the exclusive cap. So that MDS does not end up
+ * revoking the shared cap on every create/unlink
+ * operation.
+ */
+ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ retain |= want;
} else {
+
retain |= CEPH_CAP_ANY_SHARED;
/*
* keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
wake = 1;
}
}
+ /* see comment in __ceph_remove_cap() */
+ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
}
spin_unlock(&ci->i_ceph_lock);
@@ -3391,7 +3422,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
int mds, int drop, int unless)
{
- struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *dir = d_inode(dentry->d_parent);
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 1b2355109b9f..31f831471ed2 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -84,7 +84,7 @@ static int mdsc_show(struct seq_file *s, void *p)
path = NULL;
spin_lock(&req->r_dentry->d_lock);
seq_printf(s, " #%llx/%pd (%s)",
- ceph_ino(req->r_dentry->d_parent->d_inode),
+ ceph_ino(d_inode(req->r_dentry->d_parent)),
req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..4248307fea90 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -49,9 +49,9 @@ int ceph_init_dentry(struct dentry *dentry)
goto out_unlock;
}
- if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP)
d_set_d_op(dentry, &ceph_dentry_ops);
- else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+ else if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
else
d_set_d_op(dentry, &ceph_snap_dentry_ops);
@@ -77,7 +77,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
spin_lock(&dentry->d_lock);
if (!IS_ROOT(dentry)) {
- inode = dentry->d_parent->d_inode;
+ inode = d_inode(dentry->d_parent);
ihold(inode);
}
spin_unlock(&dentry->d_lock);
@@ -122,7 +122,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
{
struct ceph_file_info *fi = file->private_data;
struct dentry *parent = file->f_path.dentry;
- struct inode *dir = parent->d_inode;
+ struct inode *dir = d_inode(parent);
struct list_head *p;
struct dentry *dentry, *last;
struct ceph_dentry_info *di;
@@ -161,15 +161,15 @@ more:
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (di->lease_shared_gen == shared_gen &&
- !d_unhashed(dentry) && dentry->d_inode &&
- ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
- ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
+ !d_unhashed(dentry) && d_really_is_positive(dentry) &&
+ ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR &&
+ ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH &&
fpos_cmp(ctx->pos, di->offset) <= 0)
break;
dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
dentry, di->offset,
ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
- !dentry->d_inode ? " null" : "");
+ !d_inode(dentry) ? " null" : "");
spin_unlock(&dentry->d_lock);
p = p->prev;
dentry = list_entry(p, struct dentry, d_child);
@@ -189,11 +189,11 @@ more:
}
dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
- dentry, dentry, dentry->d_inode);
+ dentry, dentry, d_inode(dentry));
if (!dir_emit(ctx, dentry->d_name.name,
dentry->d_name.len,
- ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
- dentry->d_inode->i_mode >> 12)) {
+ ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino),
+ d_inode(dentry)->i_mode >> 12)) {
if (last) {
/* remember our position */
fi->dentry = last;
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* can we use the dcache? */
spin_lock(&ci->i_ceph_lock);
if ((ctx->pos == 2 || fi->dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
ceph_mdsc_put_request(req);
return err;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_dentry = dget(file->f_path.dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
req->r_direct_is_hash = true;
- req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (fi->last_name) {
+ req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ return -ENOMEM;
+ }
+ }
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
+
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_dentry = dget(file->f_path.dentry);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
@@ -535,7 +543,7 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
- struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
+ struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
/* .snap dir? */
if (err == -ENOENT &&
@@ -571,8 +579,8 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
err = 0;
if (!req->r_reply_info.head->is_dentry) {
dout("ENOENT and no trace, dentry %p inode %p\n",
- dentry, dentry->d_inode);
- if (dentry->d_inode) {
+ dentry, d_inode(dentry));
+ if (d_really_is_positive(dentry)) {
d_drop(dentry);
err = -ENOENT;
} else {
@@ -619,7 +627,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(err);
/* can we conclude ENOENT locally? */
- if (dentry->d_inode == NULL) {
+ if (d_really_is_negative(dentry)) {
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di = ceph_dentry(dentry);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock);
@@ -725,7 +734,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
out:
if (!err)
- ceph_init_inode_acls(dentry->d_inode, &acls);
+ ceph_init_inode_acls(d_inode(dentry), &acls);
else
d_drop(dentry);
ceph_release_acls_info(&acls);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(req);
goto out;
}
- req->r_dentry = dget(dentry);
- req->r_num_caps = 2;
req->r_path2 = kstrdup(dest, GFP_NOFS);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ ceph_mdsc_put_request(req);
+ goto out;
+ }
req->r_locked_dir = dir;
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -821,7 +835,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ceph_mdsc_put_request(req);
out:
if (!err)
- ceph_init_inode_acls(dentry->d_inode, &acls);
+ ceph_init_inode_acls(d_inode(dentry), &acls);
else
d_drop(dentry);
ceph_release_acls_info(&acls);
@@ -858,8 +872,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
if (err) {
d_drop(dentry);
} else if (!req->r_reply_info.head->is_dentry) {
- ihold(old_dentry->d_inode);
- d_instantiate(dentry, old_dentry->d_inode);
+ ihold(d_inode(old_dentry));
+ d_instantiate(dentry, d_inode(old_dentry));
}
ceph_mdsc_put_request(req);
return err;
@@ -892,7 +906,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_mds_request *req;
int err = -EROFS;
int op;
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
+ int op = CEPH_MDS_OP_RENAME;
int err;
if (ceph_snap(old_dir) != ceph_snap(new_dir))
return -EXDEV;
- if (ceph_snap(old_dir) != CEPH_NOSNAP ||
- ceph_snap(new_dir) != CEPH_NOSNAP)
- return -EROFS;
+ if (ceph_snap(old_dir) != CEPH_NOSNAP) {
+ if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
+ op = CEPH_MDS_OP_RENAMESNAP;
+ else
+ return -EROFS;
+ }
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
ihold(old_dir);
@@ -957,8 +975,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_RDCACHE on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
- if (new_dentry->d_inode)
- req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
+ if (d_really_is_positive(new_dentry))
+ req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
err = ceph_mdsc_do_request(mdsc, old_dir, req);
if (!err && !req->r_reply_info.head->is_dentry) {
/*
@@ -1024,7 +1042,7 @@ static int dentry_lease_is_valid(struct dentry *dentry)
if (di->lease_renew_after &&
time_after(jiffies, di->lease_renew_after)) {
/* we should renew */
- dir = dentry->d_parent->d_inode;
+ dir = d_inode(dentry->d_parent);
session = ceph_get_mds_session(s);
seq = di->lease_seq;
di->lease_renew_after = 0;
@@ -1074,22 +1092,22 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
- dentry, dentry->d_inode, ceph_dentry(dentry)->offset);
+ dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
dir = ceph_get_dentry_parent_inode(dentry);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
- dentry, dentry->d_inode);
+ dentry, d_inode(dentry));
valid = 1;
- } else if (dentry->d_inode &&
- ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+ } else if (d_really_is_positive(dentry) &&
+ ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
valid = 1;
} else if (dentry_lease_is_valid(dentry) ||
dir_lease_is_valid(dir, dentry)) {
- if (dentry->d_inode)
- valid = ceph_is_any_caps(dentry->d_inode);
+ if (d_really_is_positive(dentry))
+ valid = ceph_is_any_caps(d_inode(dentry));
else
valid = 1;
}
@@ -1151,7 +1169,7 @@ static void ceph_d_prune(struct dentry *dentry)
* we hold d_lock, so d_parent is stable, and d_fsdata is never
* cleared until d_release
*/
- ceph_dir_clear_complete(dentry->d_parent->d_inode);
+ ceph_dir_clear_complete(d_inode(dentry->d_parent));
}
/*
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
dout("dir_fsync %p wait on tid %llu (until %llu)\n",
inode, req->r_tid, last_tid);
if (req->r_timeout) {
- ret = wait_for_completion_timeout(
- &req->r_safe_completion, req->r_timeout);
- if (ret > 0)
+ unsigned long time_left = wait_for_completion_timeout(
+ &req->r_safe_completion,
+ req->r_timeout);
+ if (time_left > 0)
ret = 0;
- else if (ret == 0)
+ else
ret = -EIO; /* timed out */
} else {
wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
.getattr = ceph_getattr,
.mkdir = ceph_mkdir,
.rmdir = ceph_unlink,
+ .rename = ceph_rename,
};
const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 8d7d782f4382..fe02ae7f056a 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -136,8 +136,8 @@ static struct dentry *__get_parent(struct super_block *sb,
return ERR_CAST(req);
if (child) {
- req->r_inode = child->d_inode;
- ihold(child->d_inode);
+ req->r_inode = d_inode(child);
+ ihold(d_inode(child));
} else {
req->r_ino1 = (struct ceph_vino) {
.ino = ino,
@@ -164,7 +164,7 @@ static struct dentry *__get_parent(struct super_block *sb,
return ERR_PTR(err);
}
dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
- child ? ceph_ino(child->d_inode) : ino,
+ child ? ceph_ino(d_inode(child)) : ino,
dentry, ceph_vinop(inode));
return dentry;
}
@@ -172,11 +172,11 @@ static struct dentry *__get_parent(struct super_block *sb,
static struct dentry *ceph_get_parent(struct dentry *child)
{
/* don't re-export snaps */
- if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+ if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
return ERR_PTR(-EINVAL);
dout("get_parent %p ino %llx.%llx\n",
- child, ceph_vinop(child->d_inode));
+ child, ceph_vinop(d_inode(child)));
return __get_parent(child->d_sb, child, 0);
}
@@ -209,32 +209,32 @@ static int ceph_get_name(struct dentry *parent, char *name,
struct ceph_mds_request *req;
int err;
- mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+ mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
USE_ANY_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&d_inode(parent)->i_mutex);
- req->r_inode = child->d_inode;
- ihold(child->d_inode);
- req->r_ino2 = ceph_vino(parent->d_inode);
- req->r_locked_dir = parent->d_inode;
+ req->r_inode = d_inode(child);
+ ihold(d_inode(child));
+ req->r_ino2 = ceph_vino(d_inode(parent));
+ req->r_locked_dir = d_inode(parent);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&d_inode(parent)->i_mutex);
if (!err) {
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0;
dout("get_name %p ino %llx.%llx name %s\n",
- child, ceph_vinop(child->d_inode), name);
+ child, ceph_vinop(d_inode(child)), name);
} else {
dout("get_name %p ino %llx.%llx err %d\n",
- child, ceph_vinop(child->d_inode), err);
+ child, ceph_vinop(d_inode(child)), err);
}
ceph_mdsc_put_request(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..3b6b522b4b31 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include "super.h"
@@ -292,14 +291,14 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
}
if (err)
goto out_req;
- if (dn || dentry->d_inode == NULL || d_is_symlink(dentry)) {
+ if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) {
/* make vfs retry on splice, ENOENT, or symlink */
dout("atomic_open finish_no_open on dn %p\n", dn);
err = finish_no_open(file, dn);
} else {
dout("atomic_open finish_open on dn %p\n", dn);
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
- ceph_init_inode_acls(dentry->d_inode, &acls);
+ ceph_init_inode_acls(d_inode(dentry), &acls);
*opened |= FILE_CREATED;
}
err = finish_open(file, dentry, ceph_open, opened);
@@ -458,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (ret < 0)
return ret;
- if (file->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
while (iov_iter_count(i)) {
size_t start;
ssize_t n;
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *filp = iocb->ki_filp;
struct ceph_file_info *fi = filp->private_data;
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *pinned_page = NULL;
@@ -829,7 +828,7 @@ again:
return ret;
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
- (iocb->ki_filp->f_flags & O_DIRECT) ||
+ (iocb->ki_flags & IOCB_DIRECT) ||
(fi->flags & CEPH_F_SYNC)) {
dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
@@ -942,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
- ssize_t count = iov_iter_count(from), written = 0;
+ ssize_t count, written = 0;
int err, want, got;
- loff_t pos = iocb->ki_pos;
+ loff_t pos;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -954,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
-
- if (count == 0)
+ err = generic_write_checks(iocb, from);
+ if (err <= 0)
goto out;
- iov_iter_truncate(from, count);
+ pos = iocb->ki_pos;
+ count = iov_iter_count(from);
err = file_remove_suid(file);
if (err)
goto out;
@@ -998,12 +995,12 @@ retry_snap:
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
- (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+ (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
struct iov_iter data;
mutex_unlock(&inode->i_mutex);
/* we might need to revert back to that point */
data = *from;
- if (file->f_flags & O_DIRECT)
+ if (iocb->ki_flags & IOCB_DIRECT)
written = ceph_sync_direct_write(iocb, &data, pos);
else
written = ceph_sync_write(iocb, &data, pos);
@@ -1332,8 +1329,6 @@ const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
.llseek = ceph_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = ceph_read_iter,
.write_iter = ceph_write_iter,
.mmap = ceph_mmap,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 119c43c80638..e876e1944519 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -940,7 +940,7 @@ static void update_dentry_lease(struct dentry *dentry,
dentry, duration, ttl);
/* make lease_rdcache_gen match directory */
- dir = dentry->d_parent->d_inode;
+ dir = d_inode(dentry->d_parent);
di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
if (duration == 0)
@@ -980,7 +980,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
{
struct dentry *realdn;
- BUG_ON(dn->d_inode);
+ BUG_ON(d_inode(dn));
/* dn must be unhashed */
if (!d_unhashed(dn))
@@ -998,13 +998,13 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
"inode %p ino %llx.%llx\n",
dn, d_count(dn),
realdn, d_count(realdn),
- realdn->d_inode, ceph_vinop(realdn->d_inode));
+ d_inode(realdn), ceph_vinop(d_inode(realdn)));
dput(dn);
dn = realdn;
} else {
BUG_ON(!ceph_dentry(dn));
dout("dn %p attached to %p ino %llx.%llx\n",
- dn, dn->d_inode, ceph_vinop(dn->d_inode));
+ dn, d_inode(dn), ceph_vinop(d_inode(dn)));
}
if ((!prehash || *prehash) && d_unhashed(dn))
d_rehash(dn);
@@ -1125,11 +1125,11 @@ retry_lookup:
dput(parent);
goto done;
}
- } else if (dn->d_inode &&
- (ceph_ino(dn->d_inode) != vino.ino ||
- ceph_snap(dn->d_inode) != vino.snap)) {
+ } else if (d_really_is_positive(dn) &&
+ (ceph_ino(d_inode(dn)) != vino.ino ||
+ ceph_snap(d_inode(dn)) != vino.snap)) {
dout(" dn %p points to wrong inode %p\n",
- dn, dn->d_inode);
+ dn, d_inode(dn));
d_delete(dn);
dput(dn);
goto retry_lookup;
@@ -1183,7 +1183,7 @@ retry_lookup:
BUG_ON(!dn);
BUG_ON(!dir);
- BUG_ON(dn->d_parent->d_inode != dir);
+ BUG_ON(d_inode(dn->d_parent) != dir);
BUG_ON(ceph_ino(dir) !=
le64_to_cpu(rinfo->diri.in->ino));
BUG_ON(ceph_snap(dir) !=
@@ -1235,7 +1235,7 @@ retry_lookup:
/* null dentry? */
if (!rinfo->head->is_target) {
dout("fill_trace null dentry\n");
- if (dn->d_inode) {
+ if (d_really_is_positive(dn)) {
ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
@@ -1252,7 +1252,7 @@ retry_lookup:
}
/* attach proper inode */
- if (!dn->d_inode) {
+ if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, &have_lease);
@@ -1261,9 +1261,9 @@ retry_lookup:
goto done;
}
req->r_dentry = dn; /* may have spliced */
- } else if (dn->d_inode && dn->d_inode != in) {
+ } else if (d_really_is_positive(dn) && d_inode(dn) != in) {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
- dn, dn->d_inode, ceph_vinop(dn->d_inode),
+ dn, d_inode(dn), ceph_vinop(d_inode(dn)),
ceph_vinop(in));
have_lease = false;
}
@@ -1363,7 +1363,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
return readdir_prepopulate_inodes_only(req, session);
if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
- snapdir = ceph_get_snapdir(parent->d_inode);
+ snapdir = ceph_get_snapdir(d_inode(parent));
parent = d_find_alias(snapdir);
dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
rinfo->dir_nr, parent);
@@ -1371,7 +1371,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
dout("readdir_prepopulate %d items under dn %p\n",
rinfo->dir_nr, parent);
if (rinfo->dir_dir)
- ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
+ ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
}
/* FIXME: release caps/leases if error occurs */
@@ -1405,11 +1405,11 @@ retry_lookup:
err = ret;
goto out;
}
- } else if (dn->d_inode &&
- (ceph_ino(dn->d_inode) != vino.ino ||
- ceph_snap(dn->d_inode) != vino.snap)) {
+ } else if (d_really_is_positive(dn) &&
+ (ceph_ino(d_inode(dn)) != vino.ino ||
+ ceph_snap(d_inode(dn)) != vino.snap)) {
dout(" dn %p points to wrong inode %p\n",
- dn, dn->d_inode);
+ dn, d_inode(dn));
d_delete(dn);
dput(dn);
goto retry_lookup;
@@ -1423,8 +1423,8 @@ retry_lookup:
}
/* inode */
- if (dn->d_inode) {
- in = dn->d_inode;
+ if (d_really_is_positive(dn)) {
+ in = d_inode(dn);
} else {
in = ceph_get_inode(parent->d_sb, vino);
if (IS_ERR(in)) {
@@ -1440,13 +1440,13 @@ retry_lookup:
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- if (!dn->d_inode)
+ if (d_really_is_negative(dn))
iput(in);
d_drop(dn);
goto next_item;
}
- if (!dn->d_inode) {
+ if (d_really_is_negative(dn)) {
struct dentry *realdn = splice_dentry(dn, in, NULL);
if (IS_ERR(realdn)) {
err = PTR_ERR(realdn);
@@ -1693,7 +1693,7 @@ retry:
*/
static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+ struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
nd_set_link(nd, ci->i_symlink);
return NULL;
}
@@ -1714,7 +1714,7 @@ static const struct inode_operations ceph_symlink_iops = {
*/
int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
@@ -1990,7 +1990,7 @@ int ceph_permission(struct inode *inode, int mask)
int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
int err;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..84f37f34f9aa 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -679,7 +679,7 @@ static struct dentry *get_nonsnap_parent(struct dentry *dentry)
* except to resplice to another snapdir, and either the old or new
* result is a valid result.
*/
- while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
dentry = dentry->d_parent;
return dentry;
}
@@ -716,20 +716,20 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
struct dentry *parent = req->r_dentry->d_parent;
- struct inode *dir = parent->d_inode;
+ struct inode *dir = d_inode(parent);
if (dir->i_sb != mdsc->fsc->sb) {
/* not this fs! */
- inode = req->r_dentry->d_inode;
+ inode = d_inode(req->r_dentry);
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
struct dentry *dn = get_nonsnap_parent(parent);
- inode = dn->d_inode;
+ inode = d_inode(dn);
dout("__choose_mds using nonsnap parent %p\n", inode);
} else {
/* dentry target */
- inode = req->r_dentry->d_inode;
+ inode = d_inode(req->r_dentry);
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
inode = dir;
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
spin_unlock(&session->s_cap_lock);
}
+static void cleanup_session_requests(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_request *req;
+ struct rb_node *p;
+
+ dout("cleanup_session_requests mds%d\n", session->s_mds);
+ mutex_lock(&mdsc->mutex);
+ while (!list_empty(&session->s_unsafe)) {
+ req = list_first_entry(&session->s_unsafe,
+ struct ceph_mds_request, r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ pr_info(" dropping unsafe request %llu\n", req->r_tid);
+ __unregister_request(mdsc, req);
+ }
+ /* zero r_attempts, so kick_requests() will re-send requests */
+ p = rb_first(&mdsc->request_tree);
+ while (p) {
+ req = rb_entry(p, struct ceph_mds_request, r_node);
+ p = rb_next(p);
+ if (req->r_session &&
+ req->r_session->s_mds == session->s_mds)
+ req->r_attempts = 0;
+ }
+ mutex_unlock(&mdsc->mutex);
+}
+
/*
* Helper to safely iterate over all caps associated with a session, with
* special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
__ceph_remove_cap(cap, false);
- if (!__ceph_is_any_real_caps(ci)) {
+ if (!ci->i_auth_cap) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
mdsc->num_cap_flushing--;
drop = 1;
}
- if (drop && ci->i_wrbuffer_ref) {
- pr_info(" dropping dirty data for %p %lld\n",
- inode, ceph_ino(inode));
- ci->i_wrbuffer_ref = 0;
- ci->i_wrbuffer_ref_head = 0;
- drop++;
- }
spin_unlock(&mdsc->cap_dirty_lock);
}
spin_unlock(&ci->i_ceph_lock);
@@ -1712,7 +1732,7 @@ retry:
seq = read_seqbegin(&rename_lock);
rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp);) {
- struct inode *inode = temp->d_inode;
+ struct inode *inode = d_inode(temp);
if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
len++; /* slash only */
else if (stop_on_nosnap && inode &&
@@ -1736,7 +1756,7 @@ retry:
struct inode *inode;
spin_lock(&temp->d_lock);
- inode = temp->d_inode;
+ inode = d_inode(temp);
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
pos, temp);
@@ -1770,7 +1790,7 @@ retry:
goto retry;
}
- *base = ceph_ino(temp->d_inode);
+ *base = ceph_ino(d_inode(temp));
*plen = len;
dout("build_path on %p %d built %llx '%.*s'\n",
dentry, d_count(dentry), *base, len, path);
@@ -1783,8 +1803,8 @@ static int build_dentry_path(struct dentry *dentry,
{
char *path;
- if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
- *pino = ceph_ino(dentry->d_parent->d_inode);
+ if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
+ *pino = ceph_ino(d_inode(dentry->d_parent));
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
return 0;
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
*/
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_msg *msg;
struct ceph_mds_request_head *head;
@@ -1925,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
releases = 0;
if (req->r_inode_drop)
releases += ceph_encode_inode_release(&p,
- req->r_inode ? req->r_inode : req->r_dentry->d_inode,
+ req->r_inode ? req->r_inode : d_inode(req->r_dentry),
mds, req->r_inode_drop, req->r_inode_unless, 0);
if (req->r_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_dentry,
@@ -1935,8 +1955,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
- req->r_old_dentry->d_inode,
+ d_inode(req->r_old_dentry),
mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+
+ if (drop_cap_releases) {
+ releases = 0;
+ p = msg->front.iov_base + req->r_request_release_offset;
+ }
+
head->num_releases = cpu_to_le16(releases);
/* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
*/
static int __prepare_send_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_mds_request_head *rhead;
struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
- msg = create_request_message(mdsc, req, mds);
+ msg = create_request_message(mdsc, req, mds, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
- err = __prepare_send_request(mdsc, req, mds);
+ err = __prepare_send_request(mdsc, req, mds, false);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
case CEPH_SESSION_CLOSE:
if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
pr_info("mds%d reconnect denied\n", session->s_mds);
+ cleanup_session_requests(mdsc, session);
remove_session_caps(session);
wake = 2; /* for good measure */
wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req, session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
continue; /* only old requests */
if (req->r_session &&
req->r_session->s_mds == session->s_mds) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req,
+ session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
spin_unlock(&session->s_cap_lock);
/* trim unused caps to reduce MDS's cache rejoin time */
- shrink_dcache_parent(mdsc->fsc->sb->s_root);
+ if (mdsc->fsc->sb->s_root)
+ shrink_dcache_parent(mdsc->fsc->sb->s_root);
ceph_con_close(&session->s_con);
ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
di->lease_renew_from &&
di->lease_renew_after == 0) {
unsigned long duration =
- le32_to_cpu(h->duration_ms) * HZ / 1000;
+ msecs_to_jiffies(le32_to_cpu(h->duration_ms));
di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LSSNAP: return "lssnap";
case CEPH_MDS_OP_MKSNAP: return "mksnap";
case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+ case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..4e9905374078 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -44,7 +44,7 @@ static void ceph_put_super(struct super_block *s)
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
{
- struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
struct ceph_monmap *monmap = fsc->client->monc.monmap;
struct ceph_statfs st;
u64 fsid;
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
fsopt->rsize = CEPH_RSIZE_DEFAULT;
fsopt->rasize = CEPH_RASIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ if (!fsopt->snapdir_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
struct ceph_mount_options *fsopt = fsc->mount_options;
- struct ceph_options *opt = fsc->client->options;
-
- if (opt->flags & CEPH_OPT_FSID)
- seq_printf(m, ",fsid=%pU", &opt->fsid);
- if (opt->flags & CEPH_OPT_NOSHARE)
- seq_puts(m, ",noshare");
- if (opt->flags & CEPH_OPT_NOCRC)
- seq_puts(m, ",nocrc");
- if (opt->flags & CEPH_OPT_NOMSGAUTH)
- seq_puts(m, ",nocephx_require_signatures");
- if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
- seq_puts(m, ",notcp_nodelay");
-
- if (opt->name)
- seq_printf(m, ",name=%s", opt->name);
- if (opt->key)
- seq_puts(m, ",secret=<hidden>");
-
- if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
- seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
- if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
- seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
- if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
- seq_printf(m, ",osdkeepalivetimeout=%d",
- opt->osd_keepalive_timeout);
+ size_t pos;
+ int ret;
+
+ /* a comma between MNT/MS and client options */
+ seq_putc(m, ',');
+ pos = m->count;
+
+ ret = ceph_print_client_options(m, fsc->client);
+ if (ret)
+ return ret;
+
+ /* retract our comma if no client options */
+ if (m->count == pos)
+ m->count--;
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",norbytes");
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
- if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
- seq_puts(m, ",dcache");
- else
+ if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
seq_puts(m, ",nodcache");
if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
seq_puts(m, ",fsc");
- else
- seq_puts(m, ",nofsc");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+
return 0;
}
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
if (IS_ERR(req))
return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
+ if (!req->r_path1) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
@@ -976,7 +972,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
if (IS_ERR(res))
goto out_splat;
dout("root %p inode %p ino %llx.%llx\n", res,
- res->d_inode, ceph_vinop(res->d_inode));
+ d_inode(res), ceph_vinop(d_inode(res)));
return res;
out_splat:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
-#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
+#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
+ CEPH_MOUNT_OPT_DCACHE)
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..cd7ffad4041d 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -776,12 +776,12 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_getxattr(dentry, name, value, size);
- return __ceph_getxattr(dentry->d_inode, name, value, size);
+ return __ceph_getxattr(d_inode(dentry), name, value, size);
}
ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
u32 vir_namelen = 0;
@@ -847,7 +847,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
const char *value, size_t size, int flags)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = PTR_ERR(req);
goto out;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
- req->r_num_caps = 1;
+
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ err = -ENOMEM;
+ goto out;
+ }
req->r_pagelist = pagelist;
pagelist = NULL;
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
@@ -901,7 +908,7 @@ out:
int __ceph_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_vxattr *vxattr;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued;
@@ -995,7 +1002,7 @@ out:
int ceph_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
return -EROFS;
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
@@ -1011,7 +1018,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_mds_request *req;
int err;
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2)
+ return -ENOMEM;
+
req->r_inode = inode;
ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
- req->r_path2 = kstrdup(name, GFP_NOFS);
-
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
return err;
@@ -1032,7 +1041,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
int __ceph_removexattr(struct dentry *dentry, const char *name)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct ceph_vxattr *vxattr;
struct ceph_inode_info *ci = ceph_inode(inode);
int issued;
@@ -1098,7 +1107,7 @@ out:
int ceph_removexattr(struct dentry *dentry, const char *name)
{
- if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+ if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
return -EROFS;
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index b8602f199815..430e0348c99e 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -301,7 +301,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
if (full_path == NULL)
goto cdda_exit;
- cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
+ cifs_sb = CIFS_SB(d_inode(mntpt)->i_sb);
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
mnt = ERR_CAST(tlink);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
/*
* fs/cifs/cifsencrypt.c
*
+ * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP
+ * for more detailed information
+ *
* Copyright (C) International Business Machines Corp., 2005,2013
* Author(s): Steve French (sfrench@us.ibm.com)
*
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
__func__);
return rc;
}
- } else if (ses->serverName) {
+ } else {
+ /* We use ses->serverName if no domain name available */
len = strlen(ses->serverName);
server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d72fe37f5420..f5089bde3635 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -607,7 +607,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
p = s = full_path;
do {
- struct inode *dir = dentry->d_inode;
+ struct inode *dir = d_inode(dentry);
struct dentry *child;
if (!dir) {
@@ -906,8 +906,6 @@ const struct inode_operations cifs_symlink_inode_ops = {
};
const struct file_operations cifs_file_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
@@ -926,8 +924,6 @@ const struct file_operations cifs_file_ops = {
};
const struct file_operations cifs_file_strict_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_strict_readv,
.write_iter = cifs_strict_writev,
.open = cifs_open,
@@ -947,8 +943,6 @@ const struct file_operations cifs_file_strict_ops = {
const struct file_operations cifs_file_direct_ops = {
/* BB reevaluate whether they can be done with directio, no cache */
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_user_readv,
.write_iter = cifs_user_writev,
.open = cifs_open,
@@ -967,8 +961,6 @@ const struct file_operations cifs_file_direct_ops = {
};
const struct file_operations cifs_file_nobrl_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
@@ -986,8 +978,6 @@ const struct file_operations cifs_file_nobrl_ops = {
};
const struct file_operations cifs_file_strict_nobrl_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_strict_readv,
.write_iter = cifs_strict_writev,
.open = cifs_open,
@@ -1006,8 +996,6 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
const struct file_operations cifs_file_direct_nobrl_ops = {
/* BB reevaluate whether they can be done with directio, no cache */
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_user_readv,
.write_iter = cifs_user_writev,
.open = cifs_open,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index fa13d5e79f64..84650a51c7c4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1898,7 +1898,7 @@ static void
cifs_writev_requeue(struct cifs_writedata *wdata)
{
int i, rc = 0;
- struct inode *inode = wdata->cfile->dentry->d_inode;
+ struct inode *inode = d_inode(wdata->cfile->dentry);
struct TCP_Server_Info *server;
unsigned int rest_len;
@@ -1981,7 +1981,7 @@ cifs_writev_complete(struct work_struct *work)
{
struct cifs_writedata *wdata = container_of(work,
struct cifs_writedata, work);
- struct inode *inode = wdata->cfile->dentry->d_inode;
+ struct inode *inode = d_inode(wdata->cfile->dentry);
int i = 0;
if (wdata->result == 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
}
static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
length = atomic_inc_return(&tcpSesAllocCount);
if (length > 1)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
while (server->tcpStatus != CifsExiting) {
@@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
pr_warn("CIFS: username too long\n");
goto cifs_parse_mount_err;
}
+
+ kfree(vol->username);
vol->username = kstrdup(string, GFP_KERNEL);
if (!vol->username)
goto cifs_parse_mount_err;
@@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
+ kfree(vol->domainname);
vol->domainname = kstrdup(string, GFP_KERNEL);
if (!vol->domainname) {
pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
if (strncasecmp(string, "default", 7) != 0) {
+ kfree(vol->iocharset);
vol->iocharset = kstrdup(string,
GFP_KERNEL);
if (!vol->iocharset) {
@@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
* calling name ends in null (byte 16) from old smb
* convention.
*/
- if (server->workstation_RFC1001_name &&
- server->workstation_RFC1001_name[0] != 0)
+ if (server->workstation_RFC1001_name[0] != 0)
rfc1002mangle(ses_init_buf->trailer.
session_req.calling_name,
server->workstation_RFC1001_name,
@@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
#endif /* CIFS_WEAK_PW_HASH */
rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
bcc_ptr, nls_codepage);
+ if (rc) {
+ cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
+ __func__, rc);
+ cifs_buf_release(smb_buffer);
+ return rc;
+ }
bcc_ptr += CIFS_AUTH_RESP_SIZE;
if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index b72bc29cba23..338d56936f6a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -745,13 +745,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
goto lookup_out;
}
- if (direntry->d_inode != NULL) {
+ if (d_really_is_positive(direntry)) {
cifs_dbg(FYI, "non-NULL inode in lookup\n");
} else {
cifs_dbg(FYI, "NULL inode in lookup\n");
}
cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
- full_path, direntry->d_inode);
+ full_path, d_inode(direntry));
if (pTcon->unix_ext) {
rc = cifs_get_inode_info_unix(&newInode, full_path,
@@ -792,7 +792,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- if (direntry->d_inode) {
+ if (d_really_is_positive(direntry)) {
if (cifs_revalidate_dentry(direntry))
return 0;
else {
@@ -803,7 +803,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
* attributes will have been updated by
* cifs_revalidate_dentry().
*/
- if (IS_AUTOMOUNT(direntry->d_inode) &&
+ if (IS_AUTOMOUNT(d_inode(direntry)) &&
!(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
spin_lock(&direntry->d_lock);
direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..cafbf10521d5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -273,7 +273,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock)
{
struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifsFileInfo *cfile;
struct cifs_fid_locks *fdlocks;
@@ -357,7 +357,7 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
- struct inode *inode = cifs_file->dentry->d_inode;
+ struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
@@ -386,7 +386,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
if (list_empty(&cifsi->openFileList)) {
cifs_dbg(FYI, "closing last open instance for inode %p\n",
- cifs_file->dentry->d_inode);
+ d_inode(cifs_file->dentry));
/*
* In strict cache mode we need invalidate mapping on the last
* close because it may cause a error when we open this file
@@ -572,7 +572,7 @@ static int
cifs_relock_file(struct cifsFileInfo *cfile)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
@@ -620,7 +620,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
return rc;
}
- inode = cfile->dentry->d_inode;
+ inode = d_inode(cfile->dentry);
cifs_sb = CIFS_SB(inode->i_sb);
tcon = tlink_tcon(cfile->tlink);
server = tcon->ses->server;
@@ -874,7 +874,7 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
{
bool rc = false;
struct cifs_fid_locks *cur;
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
list_for_each_entry(cur, &cinode->llist, llist) {
rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
@@ -899,7 +899,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
{
int rc = 0;
struct cifsLockInfo *conf_lock;
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
bool exist;
@@ -927,7 +927,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
static void
cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
{
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
down_write(&cinode->lock_sem);
list_add_tail(&lock->llist, &cfile->llist->locks);
up_write(&cinode->lock_sem);
@@ -944,7 +944,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
bool wait)
{
struct cifsLockInfo *conf_lock;
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
bool exist;
int rc = 0;
@@ -1125,7 +1125,7 @@ struct lock_to_push {
static int
cifs_push_posix_locks(struct cifsFileInfo *cfile)
{
- struct inode *inode = cfile->dentry->d_inode;
+ struct inode *inode = d_inode(cfile->dentry);
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock;
struct file_lock_context *flctx = inode->i_flctx;
@@ -1214,7 +1214,7 @@ static int
cifs_push_locks(struct cifsFileInfo *cfile)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
@@ -1382,7 +1382,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
unsigned int max_num, num, max_buf;
LOCKING_ANDX_RANGE *buf, *cur;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
- struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
struct cifsLockInfo *li, *tmp;
__u64 length = 1 + flock->fl_end - flock->fl_start;
struct list_head tmp_llist;
@@ -1488,7 +1488,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
- struct inode *inode = cfile->dentry->d_inode;
+ struct inode *inode = d_inode(cfile->dentry);
if (posix_lck) {
int posix_lock_type;
@@ -1643,7 +1643,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
struct TCP_Server_Info *server;
unsigned int xid;
struct dentry *dentry = open_file->dentry;
- struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
+ struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
struct cifs_io_parms io_parms;
cifs_sb = CIFS_SB(dentry->d_sb);
@@ -1676,7 +1676,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
break;
}
- len = min(server->ops->wp_retry_size(dentry->d_inode),
+ len = min(server->ops->wp_retry_size(d_inode(dentry)),
(unsigned int)write_size - total_written);
/* iov[0] is reserved for smb header */
iov[1].iov_base = (char *)write_data + total_written;
@@ -1696,9 +1696,9 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
return rc;
}
} else {
- spin_lock(&dentry->d_inode->i_lock);
+ spin_lock(&d_inode(dentry)->i_lock);
cifs_update_eof(cifsi, *offset, bytes_written);
- spin_unlock(&dentry->d_inode->i_lock);
+ spin_unlock(&d_inode(dentry)->i_lock);
*offset += bytes_written;
}
}
@@ -1706,12 +1706,12 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
cifs_stats_bytes_written(tcon, total_written);
if (total_written > 0) {
- spin_lock(&dentry->d_inode->i_lock);
- if (*offset > dentry->d_inode->i_size)
- i_size_write(dentry->d_inode, *offset);
- spin_unlock(&dentry->d_inode->i_lock);
+ spin_lock(&d_inode(dentry)->i_lock);
+ if (*offset > d_inode(dentry)->i_size)
+ i_size_write(d_inode(dentry), *offset);
+ spin_unlock(&d_inode(dentry)->i_lock);
}
- mark_inode_dirty_sync(dentry->d_inode);
+ mark_inode_dirty_sync(d_inode(dentry));
free_xid(xid);
return total_written;
}
@@ -1823,6 +1823,7 @@ refind_writable:
cifsFileInfo_put(inv_file);
spin_lock(&cifs_file_list_lock);
++refind;
+ inv_file = NULL;
goto refind_writable;
}
}
@@ -2405,7 +2406,7 @@ cifs_uncached_writev_complete(struct work_struct *work)
{
struct cifs_writedata *wdata = container_of(work,
struct cifs_writedata, work);
- struct inode *inode = wdata->cfile->dentry->d_inode;
+ struct inode *inode = d_inode(wdata->cfile->dentry);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
spin_lock(&inode->i_lock);
@@ -2559,10 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
return rc;
}
-static ssize_t
-cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
{
- size_t len;
+ struct file *file = iocb->ki_filp;
ssize_t total_written = 0;
struct cifsFileInfo *open_file;
struct cifs_tcon *tcon;
@@ -2572,15 +2572,15 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
struct iov_iter saved_from;
int rc;
- len = iov_iter_count(from);
- rc = generic_write_checks(file, poffset, &len, 0);
- if (rc)
- return rc;
-
- if (!len)
- return 0;
+ /*
+ * BB - optimize the way when signing is disabled. We can drop this
+ * extra memory-to-memory copying and use iovec buffers for constructing
+ * write request.
+ */
- iov_iter_truncate(from, len);
+ rc = generic_write_checks(iocb, from);
+ if (rc <= 0)
+ return rc;
INIT_LIST_HEAD(&wdata_list);
cifs_sb = CIFS_FILE_SB(file);
@@ -2592,8 +2592,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
memcpy(&saved_from, from, sizeof(struct iov_iter));
- rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
- &wdata_list);
+ rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
+ open_file, cifs_sb, &wdata_list);
/*
* If at least one write was successfully sent, then discard any rc
@@ -2632,7 +2632,7 @@ restart_loop:
memcpy(&tmp_from, &saved_from,
sizeof(struct iov_iter));
iov_iter_advance(&tmp_from,
- wdata->offset - *poffset);
+ wdata->offset - iocb->ki_pos);
rc = cifs_write_from_iter(wdata->offset,
wdata->bytes, &tmp_from,
@@ -2649,34 +2649,13 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (total_written > 0)
- *poffset += total_written;
+ if (unlikely(!total_written))
+ return rc;
+ iocb->ki_pos += total_written;
+ set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
cifs_stats_bytes_written(tcon, total_written);
- return total_written ? total_written : (ssize_t)rc;
-}
-
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
-{
- ssize_t written;
- struct inode *inode;
- loff_t pos = iocb->ki_pos;
-
- inode = file_inode(iocb->ki_filp);
-
- /*
- * BB - optimize the way when signing is disabled. We can drop this
- * extra memory-to-memory copying and use iovec buffers for constructing
- * write request.
- */
-
- written = cifs_iovec_write(iocb->ki_filp, from, &pos);
- if (written > 0) {
- set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
- iocb->ki_pos = pos;
- }
-
- return written;
+ return total_written;
}
static ssize_t
@@ -2687,8 +2666,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
- ssize_t rc = -EACCES;
- loff_t lock_pos = iocb->ki_pos;
+ ssize_t rc;
/*
* We need to hold the sem to be sure nobody modifies lock list
@@ -2696,23 +2674,24 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
*/
down_read(&cinode->lock_sem);
mutex_lock(&inode->i_mutex);
- if (file->f_flags & O_APPEND)
- lock_pos = i_size_read(inode);
- if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
+
+ rc = generic_write_checks(iocb, from);
+ if (rc <= 0)
+ goto out;
+
+ if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP)) {
+ CIFS_WRITE_OP))
rc = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
-
- if (rc > 0) {
- ssize_t err;
+ else
+ rc = -EACCES;
+out:
+ mutex_unlock(&inode->i_mutex);
- err = generic_write_sync(file, iocb->ki_pos - rc, rc);
- if (err < 0)
- rc = err;
- }
- } else {
- mutex_unlock(&inode->i_mutex);
+ if (rc > 0) {
+ ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+ if (err < 0)
+ rc = err;
}
up_read(&cinode->lock_sem);
return rc;
@@ -3815,7 +3794,7 @@ void cifs_oplock_break(struct work_struct *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
- struct inode *inode = cfile->dentry->d_inode;
+ struct inode *inode = d_inode(cfile->dentry);
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -3876,8 +3855,7 @@ void cifs_oplock_break(struct work_struct *work)
* Direct IO is not yet supported in the cached mode.
*/
static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t pos)
+cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
/*
* FIXME
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..55b58112d122 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
cifs_buf_release(srchinf->ntwrk_buf_start);
}
kfree(srchinf);
+ if (rc)
+ goto cgii_exit;
} else
goto cgii_exit;
@@ -1065,7 +1067,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
int rc;
struct cifs_fid fid;
struct cifs_open_parms oparms;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink;
@@ -1194,7 +1196,7 @@ cifs_drop_nlink(struct inode *inode)
}
/*
- * If dentry->d_inode is null (usually meaning the cached dentry
+ * If d_inode(dentry) is null (usually meaning the cached dentry
* is a negative dentry) then we would attempt a standard SMB delete, but
* if that fails we can not attempt the fall back mechanisms on EACCESS
* but will return the EACCESS to the caller. Note that the VFS does not call
@@ -1205,7 +1207,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
int rc = 0;
unsigned int xid;
char *full_path = NULL;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cifs_inode;
struct super_block *sb = dir->i_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1549,13 +1551,13 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
cifs_put_tlink(tlink);
if (!rc) {
- spin_lock(&direntry->d_inode->i_lock);
- i_size_write(direntry->d_inode, 0);
- clear_nlink(direntry->d_inode);
- spin_unlock(&direntry->d_inode->i_lock);
+ spin_lock(&d_inode(direntry)->i_lock);
+ i_size_write(d_inode(direntry), 0);
+ clear_nlink(d_inode(direntry));
+ spin_unlock(&d_inode(direntry)->i_lock);
}
- cifsInode = CIFS_I(direntry->d_inode);
+ cifsInode = CIFS_I(d_inode(direntry));
/* force revalidate to go get info when needed */
cifsInode->time = 0;
@@ -1566,7 +1568,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
*/
cifsInode->time = 0;
- direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
+ d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
current_fs_time(inode->i_sb);
rmdir_exit:
@@ -1725,7 +1727,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
unlink_target:
/* Try unlinking the target dentry if it's not negative */
- if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
+ if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
if (d_is_dir(target_dentry))
tmprc = cifs_rmdir(target_dir, target_dentry);
else
@@ -1865,7 +1867,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
{
unsigned int xid;
int rc = 0;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
struct super_block *sb = dentry->d_sb;
char *full_path = NULL;
@@ -1917,7 +1919,7 @@ int cifs_revalidate_file(struct file *filp)
int cifs_revalidate_dentry(struct dentry *dentry)
{
int rc;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
rc = cifs_revalidate_dentry_attr(dentry);
if (rc)
@@ -1931,7 +1933,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
{
struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = d_inode(dentry);
int rc;
/*
@@ -2108,7 +2110,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
int rc;
unsigned int xid;
char *full_path = NULL;
- struct inode *inode = direntry->d_inode;
+ struct inode *inode = d_inode(direntry);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink;
@@ -2249,7 +2251,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
unsigned int xid;
kuid_t uid = INVALID_UID;
kgid_t gid = INVALID_GID;
- struct inode *inode = direntry->d_inode;
+ struct inode *inode = d_inode(direntry);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
char *full_path = NULL;
@@ -2407,7 +2409,7 @@ cifs_setattr_exit:
int
cifs_setattr(struct dentry *direntry, struct iattr *attrs)
{
- struct inode *inode = direntry->d_inode;
+ struct inode *inode = d_inode(direntry);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 2ec6037f61c7..252e672d5604 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -586,12 +586,12 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
* if source file is cached (oplocked) revalidate will not go to server
* until the file is closed or oplock broken so update nlinks locally
*/
- if (old_file->d_inode) {
- cifsInode = CIFS_I(old_file->d_inode);
+ if (d_really_is_positive(old_file)) {
+ cifsInode = CIFS_I(d_inode(old_file));
if (rc == 0) {
- spin_lock(&old_file->d_inode->i_lock);
- inc_nlink(old_file->d_inode);
- spin_unlock(&old_file->d_inode->i_lock);
+ spin_lock(&d_inode(old_file)->i_lock);
+ inc_nlink(d_inode(old_file));
+ spin_unlock(&d_inode(old_file)->i_lock);
/*
* parent dir timestamps will update from srv within a
@@ -629,7 +629,7 @@ cifs_hl_exit:
void *
cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
{
- struct inode *inode = direntry->d_inode;
+ struct inode *inode = d_inode(direntry);
int rc = -ENOMEM;
unsigned int xid;
char *full_path = NULL;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 337946355b29..8442b8b8e0be 100644