From 2dac355c4670f66a976cbf765cc2af057e3c3a65 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 20 May 2013 17:35:28 +0000 Subject: [PATCH] New kernel --- acpi-call/PKGBUILD | 56 - acpi-call/acpi_call.install | 23 - aufs3-util/PKGBUILD | 8 +- linux-lts/PKGBUILD | 8 +- linux-lts/linux-lts.install | 2 +- linux/PKGBUILD | 27 +- linux/aufs3-base.patch | 18 +- linux/aufs3-kbuild.patch | 14 +- linux/aufs3-loopback.patch | 48 +- linux/aufs3-proc_map.patch | 4 +- linux/aufs3-standalone.patch | 40 +- linux/aufs3.patch | 28239 +++++++++-------- linux/config.x86_64 | 2 +- linux/linux.install | 4 +- ndiswrapper/PKGBUILD | 14 +- ndiswrapper/ndiswrapper-1.58-add_taint.patch | 24 + 16 files changed, 14515 insertions(+), 14016 deletions(-) delete mode 100644 acpi-call/PKGBUILD delete mode 100755 acpi-call/acpi_call.install create mode 100644 ndiswrapper/ndiswrapper-1.58-add_taint.patch diff --git a/acpi-call/PKGBUILD b/acpi-call/PKGBUILD deleted file mode 100644 index 797a2c36f..000000000 --- a/acpi-call/PKGBUILD +++ /dev/null @@ -1,56 +0,0 @@ -# -# Core Packages for Chakra, part of chakra-project.org - -_kver="3.9.0-1-CHAKRA" - -pkgname=acpi_call -_pkgname=acpi_call -pkgver=1.0.0 -pkgrel=6 -pkgdesc="A kernel module that enables you to call parameterless ACPI methods by writing the method name to /proc/acpi/call, e.g. to turn off discrete graphics card in a dual graphics environment (like NVIDIA Optimus)." -arch=('x86_64') -url=("http://github.com/mkottman/acpi_call") -license=('GPL') -depends=('linux>=3.9' 'linux<3.10' 'linux-headers') -makedepends=('git') -provides=('acpi-call') -replaces=('acpi-call') -conflicts=('acpi-call') -install=acpi_call.install -source=("https://github.com/mkottman/acpi_call/archive/v${pkgver}.tar.gz") -md5sums=('3c71239792ddd725d13f7c8d168b4ecf') - -build() { - cd ${srcdir}/$_pkgname-${pkgver} - - make -} -package() { - cd ${srcdir}/$_pkgname-${pkgver} - install -d ${pkgdir}/usr/share/$_pkgname - install -d ${pkgdir}/usr/bin - install -Dm755 ${srcdir}/$_pkgname-${pkgver}/examples/asus1215n.sh \ - ${pkgdir}/usr/share/$_pkgname - install -Dm755 ${srcdir}/$_pkgname-${pkgver}/examples/m11xr2.sh \ - ${pkgdir}/usr/share/$_pkgname - install -Dm755 ${srcdir}/$_pkgname-${pkgver}/examples/dellL702X.sh \ - ${pkgdir}/usr/share/$_pkgname - install -Dm755 ${srcdir}/$_pkgname-${pkgver}/examples/turn_off_gpu.sh \ - ${pkgdir}/usr/share/$_pkgname - ln -s /usr/share/$_pkgname/turn_off_gpu.sh \ - ${pkgdir}/usr/bin/turn_off_gpu.sh - install -Dm755 ${srcdir}/$_pkgname-${pkgver}/support/query_dsdt.pl \ - ${pkgdir}/usr/share/$_pkgname-${pkgver} - cp -R support/windump_hack \ - ${pkgdir}/usr/share/$_pkgname/ - install -Dm644 README.md \ - ${pkgdir}/usr/share/$_pkgname/README.md - - msg2 "Building module for $_kver..." - - # KDIR is necessary even when cleaning - #make KDIR=/usr/src/linux-${_kver} clean - make KDIR=/usr/src/linux-${_kver} - - install -D -m644 acpi_call.ko $pkgdir/lib/modules/${_kver}/kernel/drivers/acpi/acpi_call.ko -} diff --git a/acpi-call/acpi_call.install b/acpi-call/acpi_call.install deleted file mode 100755 index 596ff3d18..000000000 --- a/acpi-call/acpi_call.install +++ /dev/null @@ -1,23 +0,0 @@ -post_install() { - depmod -a - echo "" - echo "[+] modprobe acpi_call" - echo ">>> run /usr/bin/turn_off_gpu.sh to test/view different acpi_call functions to see if it disables your secondary/hybrid video card" - echo ">>> There are a few scripts to use in /usr/share/acpi_call/ - some for specific laptops => asus1215n.sh, dellL702X.sh, m11xr2.sh" -} - -post_upgrade() { - depmod -a - echo "" - echo "[+] modprobe acpi_call" - echo ">>> run /usr/bin/turn_off_gpu.sh to test/view different acpi_call functions to see if it disables your secondary/hybrid video card" - echo ">>> There are a few scripts to use in /usr/share/acpi_call/ - some for specific laptops => asus1215n.sh, dellL702X.sh, m11xr2.sh" -} - -post_remove() { - depmod -a -} - -op=$1 -shift -$op $* diff --git a/aufs3-util/PKGBUILD b/aufs3-util/PKGBUILD index 14fc6b7a9..7277e443b 100644 --- a/aufs3-util/PKGBUILD +++ b/aufs3-util/PKGBUILD @@ -1,13 +1,11 @@ # # Chakra Packages for Chakra, part of chakra-project.org -# -# maintainer abveritas@chakra-project.org pkgname=aufs3-util pkgver=20120607 pkgrel=2 pkgdesc="Another Unionfs Implementation that supports NFS branches" -arch=('i686' 'x86_64') +arch=('x86_64') url="http://aufs.sourceforge.net/" license=('GPL2') depends=('glibc') @@ -19,8 +17,8 @@ md5sums=('a1012045b45ec13ccc1be231d5e76464') build() { cd $srcdir/aufs3-util-$pkgver - export CPPFLAGS="-I /usr/src/linux-3.4.2-1-CHAKRA/include/" - make + export CPPFLAGS="-I /usr/src/linux-3.9.0-1-CHAKRA/include/" + make } package() { diff --git a/linux-lts/PKGBUILD b/linux-lts/PKGBUILD index a03a55420..096c420b5 100644 --- a/linux-lts/PKGBUILD +++ b/linux-lts/PKGBUILD @@ -1,14 +1,12 @@ # # Core Packages for Chakra, part of chakra-project.org -# -# maintainer abveritas@chakra-project.org pkgbase="linux-lts" pkgname=('linux-lts' 'linux-lts-headers' 'linux-lts-docs') # Build stock -CHAKRA-LTS kernel # pkgname=linux-custom # Build kernel with a different name _kernelname=${pkgname#linux} _basekernel=3.0 -pkgver=3.0.62 +pkgver=3.0.78 pkgrel=1 makedepends=('xmlto' 'docbook-xsl') arch=('x86_64') @@ -35,7 +33,7 @@ source=("ftp://ftp.kernel.org/pub/linux/kernel/v3.0/linux-$_basekernel.tar.bz2" 'module-symbol-waiting-3.0.patch' 'module-init-wait-3.0.patch') md5sums=('398e95866794def22b12dfbc15ce89c0' - 'b0c86830c65280b02223fd8fff8aec08' + 'e82abb9211fdf67d11dff1e9498bb26c' '2d787099d3a44531bcb3ab39aee5539e' '4925d736212481eb1f7b318ef9e8b95f' '821e1112493f187dc70df9412e61f324' @@ -311,4 +309,4 @@ package_linux-lts-docs() { # remove a file already in linux package rm -f "${pkgdir}/usr/src/linux-${_kernver}/Documentation/DocBook/Makefile" -} \ No newline at end of file +} diff --git a/linux-lts/linux-lts.install b/linux-lts/linux-lts.install index 1f37bb605..bcdbfbeea 100644 --- a/linux-lts/linux-lts.install +++ b/linux-lts/linux-lts.install @@ -2,7 +2,7 @@ # arg 2: the old package version KERNEL_NAME=-lts -KERNEL_VERSION=3.0.62-1-lts +KERNEL_VERSION=3.0.78-1-lts post_install () { # updating module dependencies diff --git a/linux/PKGBUILD b/linux/PKGBUILD index 97e9bb540..8c5f40d79 100644 --- a/linux/PKGBUILD +++ b/linux/PKGBUILD @@ -7,7 +7,7 @@ pkgbase=linux pkgname=('linux' 'linux-headers' 'linux-docs') _kernelname=${pkgname#linux} _basekernel=3.9 -pkgver=3.9.0 +pkgver=3.9.2 pkgrel=1 arch=('x86_64') url="http://www.kernel.org/" @@ -15,7 +15,7 @@ license=('GPL2') makedepends=('xmlto' 'docbook-xsl') options=('!strip') source=("http://www.kernel.org/pub/linux/kernel/v3.x/linux-${_basekernel}.tar.xz" - #"http://www.kernel.org/pub/linux/kernel/v3.x/patch-${pkgver}.xz" + "http://www.kernel.org/pub/linux/kernel/v3.x/patch-${pkgver}.xz" # the main kernel config files 'config.x86_64' #aufs patches @@ -26,21 +26,19 @@ source=("http://www.kernel.org/pub/linux/kernel/v3.x/linux-${_basekernel}.tar.xz 'aufs3-kbuild.patch' 'aufs3.patch' 'config.aufs' - # Fixes - alsa-firmware-loading-3.8.8.patch # standard config files for mkinitcpio ramdisk 'linux.preset' 'change-default-console-loglevel.patch') md5sums=('4348c9b6b2eb3144d601e87c19d5d909' - 'b052c16a6f0b1861233166b9d52033d2' - '52690fe8baa6d6abd858f04c4531d437' - '94a873436f6c62580ac54c1b9f0e021a' - 'a2678afaf5205f24c58affd4b1570503' - '7693e0258036ab1c6e6a42d7ee573666' - 'f9d33aeebb5c637eeefc222643632035' - '5e73a0f39d73095bdff6a9168ecc05cd' + 'adeb2556568f79e827e7a0ce4c483605' + '698318024a36e40b4d44eac67c3a7ecd' + '89fbb605ef398297b8dc43409c849c92' + 'd5f57f666f02ca25ba298210e2332ce8' + 'cf2ad9065886e56698524eab323ac656' + '460247f1b4da5dccdaacfa6d161131f0' + '498abc1e34df8a7d19e43980c74f2bc2' + '3ce07d164880e16ae622d1dadb6e5b7a' '52f4a2c7f6277774117c834d949d6b81' - 'e2ac681ffa439e969b4c3b4616852454' 'eb14dcfd80c00852ef81ded6e826826a' '65cbe8e4c8efaf96dd162102e46ce81d') @@ -48,7 +46,7 @@ build() { cd "${srcdir}/linux-${_basekernel}" # add upstream patch - # patch -p1 -i "${srcdir}/patch-${pkgver}" + patch -p1 -i "${srcdir}/patch-${pkgver}" # add latest fixes from stable queue, if needed # http://git.kernel.org/?p=linux/kernel/git/stable/stable-queue.git @@ -58,9 +56,6 @@ build() { # (relevant patch sent upstream: https://lkml.org/lkml/2011/7/26/227) patch -p1 -i "${srcdir}/change-default-console-loglevel.patch" - # Fix for alsa - patch -Np1 -i "${srcdir}/alsa-firmware-loading-3.8.8.patch" - #aufs patches for Live: patch -p1 -i "${srcdir}/aufs3.patch" patch -p1 -i "${srcdir}/aufs3-base.patch" diff --git a/linux/aufs3-base.patch b/linux/aufs3-base.patch index 854c2feec..91d549c09 100644 --- a/linux/aufs3-base.patch +++ b/linux/aufs3-base.patch @@ -1,7 +1,7 @@ -aufs3.x-rcN base patch +aufs3.9 base patch diff --git a/fs/file_table.c b/fs/file_table.c -index de9e965..e73287a 100644 +index cd4d87a..ca5948f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,7 +36,7 @@ struct files_stat_struct files_stat = { @@ -14,10 +14,10 @@ index de9e965..e73287a 100644 /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; diff --git a/fs/inode.c b/fs/inode.c -index 14084b7..ece87ed 100644 +index a898b3d..e83cd1e 100644 --- a/fs/inode.c +++ b/fs/inode.c -@@ -1503,7 +1503,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, +@@ -1498,7 +1498,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ @@ -27,10 +27,10 @@ index 14084b7..ece87ed 100644 if (inode->i_op->update_time) return inode->i_op->update_time(inode, time, flags); diff --git a/fs/splice.c b/fs/splice.c -index 6909d89..020c7bc 100644 +index 29e394e..7117387 100644 --- a/fs/splice.c +++ b/fs/splice.c -@@ -1092,8 +1092,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); +@@ -1095,8 +1095,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); /* * Attempt to initiate a splice from pipe to file. */ @@ -41,7 +41,7 @@ index 6909d89..020c7bc 100644 { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -@@ -1120,9 +1120,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +@@ -1123,9 +1123,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, /* * Attempt to initiate a splice from a file to a pipe. */ @@ -55,10 +55,10 @@ index 6909d89..020c7bc 100644 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); diff --git a/include/linux/fs.h b/include/linux/fs.h -index 7617ee0..1a39c33 100644 +index 2c28271..5c49108 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -2551,6 +2551,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); +@@ -2558,6 +2558,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); diff --git a/linux/aufs3-kbuild.patch b/linux/aufs3-kbuild.patch index cb4bb2cb2..095163594 100644 --- a/linux/aufs3-kbuild.patch +++ b/linux/aufs3-kbuild.patch @@ -1,4 +1,4 @@ -aufs3.x-rcN kbuild patch +aufs3.9 kbuild patch diff --git a/fs/Kconfig b/fs/Kconfig index 780725a..d460c05 100644 @@ -21,3 +21,15 @@ index 9d53192..e70f08f 100644 obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ +obj-$(CONFIG_AUFS_FS) += aufs/ +diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild +index 5c8a1d2..fea7572 100644 +--- a/include/uapi/linux/Kbuild ++++ b/include/uapi/linux/Kbuild +@@ -56,6 +56,7 @@ header-y += atmppp.h + header-y += atmsap.h + header-y += atmsvc.h + header-y += audit.h ++header-y += aufs_type.h + header-y += auto_fs.h + header-y += auto_fs4.h + header-y += auxvec.h diff --git a/linux/aufs3-loopback.patch b/linux/aufs3-loopback.patch index 3c1372f9c..1aed72303 100644 --- a/linux/aufs3-loopback.patch +++ b/linux/aufs3-loopback.patch @@ -1,10 +1,10 @@ -aufs3.x-rcN loopback patch +aufs3.9 loopback patch diff --git a/drivers/block/loop.c b/drivers/block/loop.c -index ae12512..ed8295b 100644 +index dfe7583..c175e22 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c -@@ -506,7 +506,7 @@ out: +@@ -511,7 +511,7 @@ out: } struct switch_request { @@ -13,7 +13,7 @@ index ae12512..ed8295b 100644 struct completion wait; }; -@@ -568,7 +568,8 @@ static int loop_thread(void *data) +@@ -573,7 +573,8 @@ static int loop_thread(void *data) * First it needs to flush existing IO, it does this by sending a magic * BIO down the pipe. The completion of this BIO does the actual switch. */ @@ -23,7 +23,7 @@ index ae12512..ed8295b 100644 { struct switch_request w; struct bio *bio = bio_alloc(GFP_KERNEL, 0); -@@ -576,6 +577,7 @@ static int loop_switch(struct loop_device *lo, struct file *file) +@@ -581,6 +582,7 @@ static int loop_switch(struct loop_device *lo, struct file *file) return -ENOMEM; init_completion(&w.wait); w.file = file; @@ -31,7 +31,7 @@ index ae12512..ed8295b 100644 bio->bi_private = &w; bio->bi_bdev = NULL; loop_make_request(lo->lo_queue, bio); -@@ -592,7 +594,7 @@ static int loop_flush(struct loop_device *lo) +@@ -597,7 +599,7 @@ static int loop_flush(struct loop_device *lo) if (!lo->lo_thread) return 0; @@ -40,7 +40,7 @@ index ae12512..ed8295b 100644 } /* -@@ -611,6 +613,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) +@@ -616,6 +618,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) mapping = file->f_mapping; mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; @@ -48,7 +48,7 @@ index ae12512..ed8295b 100644 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); -@@ -619,6 +622,13 @@ out: +@@ -624,6 +627,13 @@ out: complete(&p->wait); } @@ -62,7 +62,7 @@ index ae12512..ed8295b 100644 /* * loop_change_fd switched the backing store of a loopback device to -@@ -632,6 +642,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, +@@ -637,6 +647,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { struct file *file, *old_file; @@ -70,7 +70,7 @@ index ae12512..ed8295b 100644 struct inode *inode; int error; -@@ -648,9 +659,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, +@@ -653,9 +664,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, file = fget(arg); if (!file) goto out; @@ -87,7 +87,7 @@ index ae12512..ed8295b 100644 error = -EINVAL; -@@ -662,17 +680,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, +@@ -667,17 +685,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, goto out_putf; /* and ... switch */ @@ -110,7 +110,7 @@ index ae12512..ed8295b 100644 out: return error; } -@@ -815,7 +837,7 @@ static void loop_config_discard(struct loop_device *lo) +@@ -820,7 +842,7 @@ static void loop_config_discard(struct loop_device *lo) static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -119,7 +119,7 @@ index ae12512..ed8295b 100644 struct inode *inode; struct address_space *mapping; unsigned lo_blocksize; -@@ -830,6 +852,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, +@@ -835,6 +857,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, file = fget(arg); if (!file) goto out; @@ -132,7 +132,7 @@ index ae12512..ed8295b 100644 error = -EBUSY; if (lo->lo_state != Lo_unbound) -@@ -878,6 +906,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, +@@ -883,6 +911,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -140,7 +140,7 @@ index ae12512..ed8295b 100644 lo->transfer = transfer_none; lo->ioctl = NULL; lo->lo_sizelimit = 0; -@@ -924,6 +953,7 @@ out_clr: +@@ -934,6 +963,7 @@ out_clr: lo->lo_thread = NULL; lo->lo_device = NULL; lo->lo_backing_file = NULL; @@ -148,7 +148,7 @@ index ae12512..ed8295b 100644 lo->lo_flags = 0; set_capacity(lo->lo_disk, 0); invalidate_bdev(bdev); -@@ -933,6 +963,8 @@ out_clr: +@@ -943,6 +973,8 @@ out_clr: lo->lo_state = Lo_unbound; out_putf: fput(file); @@ -157,7 +157,7 @@ index ae12512..ed8295b 100644 out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); -@@ -979,6 +1011,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, +@@ -989,6 +1021,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, static int loop_clr_fd(struct loop_device *lo) { struct file *filp = lo->lo_backing_file; @@ -165,7 +165,7 @@ index ae12512..ed8295b 100644 gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; -@@ -1012,6 +1045,7 @@ static int loop_clr_fd(struct loop_device *lo) +@@ -1022,6 +1055,7 @@ static int loop_clr_fd(struct loop_device *lo) spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; @@ -173,7 +173,7 @@ index ae12512..ed8295b 100644 spin_unlock_irq(&lo->lo_lock); loop_release_xfer(lo); -@@ -1052,6 +1086,8 @@ static int loop_clr_fd(struct loop_device *lo) +@@ -1064,6 +1098,8 @@ static int loop_clr_fd(struct loop_device *lo) * bd_mutex which is usually taken before lo_ctl_mutex. */ fput(filp); @@ -183,7 +183,7 @@ index ae12512..ed8295b 100644 } diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c -index 5c35454..88f507b 100644 +index 56924c5..b832bf8 100644 --- a/fs/aufs/f_op.c +++ b/fs/aufs/f_op.c @@ -352,7 +352,7 @@ static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, @@ -196,10 +196,10 @@ index 5c35454..88f507b 100644 if (file->f_mapping != h_file->f_mapping) { file->f_mapping = h_file->f_mapping; diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c -index 4fa75a4..a923899 100644 +index ccae19c..15dc5c2 100644 --- a/fs/aufs/loop.c +++ b/fs/aufs/loop.c -@@ -131,3 +131,19 @@ void au_loopback_fin(void) +@@ -133,3 +133,19 @@ void au_loopback_fin(void) { kfree(au_warn_loopback_array); } @@ -242,7 +242,7 @@ index 88d019c..8707c3a 100644 #endif /* __KERNEL__ */ diff --git a/fs/aufs/super.c b/fs/aufs/super.c -index ab4c909..8e654b8 100644 +index 4f57ba0..786ffea 100644 --- a/fs/aufs/super.c +++ b/fs/aufs/super.c @@ -811,7 +811,10 @@ static const struct super_operations aufs_sop = { @@ -258,7 +258,7 @@ index ab4c909..8e654b8 100644 /* ---------------------------------------------------------------------- */ diff --git a/include/linux/fs.h b/include/linux/fs.h -index 1a39c33..146e26f 100644 +index 5c49108..f8e9fd4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1614,6 +1614,10 @@ struct super_operations { diff --git a/linux/aufs3-proc_map.patch b/linux/aufs3-proc_map.patch index 77f34c48f..5b6e2e8eb 100644 --- a/linux/aufs3-proc_map.patch +++ b/linux/aufs3-proc_map.patch @@ -1,4 +1,4 @@ -aufs3.x-rcN proc_map patch +aufs3.9 proc_map patch diff --git a/fs/buffer.c b/fs/buffer.c index b4dcb34..7b6b475 100644 @@ -163,7 +163,7 @@ index 0db0de1..147446c 100644 new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); diff --git a/mm/nommu.c b/mm/nommu.c -index 2f3ea74..06529ce 100644 +index e001768..e9f09eb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -650,6 +650,8 @@ static void __put_nommu_region(struct vm_region *region) diff --git a/linux/aufs3-standalone.patch b/linux/aufs3-standalone.patch index 6007f3f6c..3dfc2f070 100644 --- a/linux/aufs3-standalone.patch +++ b/linux/aufs3-standalone.patch @@ -1,7 +1,7 @@ -aufs3.x-rcN standalone patch +aufs3.9 standalone patch diff --git a/fs/file_table.c b/fs/file_table.c -index e73287a..b33aebe 100644 +index ca5948f..b553610 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -37,6 +37,7 @@ struct files_stat_struct files_stat = { @@ -12,7 +12,7 @@ index e73287a..b33aebe 100644 /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; -@@ -403,6 +404,8 @@ void file_sb_list_del(struct file *file) +@@ -404,6 +405,8 @@ void file_sb_list_del(struct file *file) } } @@ -22,7 +22,7 @@ index e73287a..b33aebe 100644 /* diff --git a/fs/inode.c b/fs/inode.c -index ece87ed..38f7bc8 100644 +index e83cd1e..fa2245a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -56,6 +56,7 @@ static struct hlist_head *inode_hashtable __read_mostly; @@ -33,7 +33,7 @@ index ece87ed..38f7bc8 100644 /* * Empty aops. Can be used for the cases where the user does not -@@ -1519,6 +1520,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags) +@@ -1514,6 +1515,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags) mark_inode_dirty_sync(inode); return 0; } @@ -42,7 +42,7 @@ index ece87ed..38f7bc8 100644 /** * touch_atime - update the access time diff --git a/fs/namespace.c b/fs/namespace.c -index 55605c5..aed7607 100644 +index 341d3f5..18f5bb0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -52,6 +52,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); @@ -53,7 +53,15 @@ index 55605c5..aed7607 100644 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { -@@ -1424,6 +1425,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, +@@ -425,6 +426,7 @@ void __mnt_drop_write(struct vfsmount *mnt) + mnt_dec_writers(real_mount(mnt)); + preempt_enable(); + } ++EXPORT_SYMBOL_GPL(__mnt_drop_write); + + /** + * mnt_drop_write - give up write access to a mount +@@ -1417,6 +1419,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, } return 0; } @@ -134,10 +142,10 @@ index fc6b49b..a6bb87d 100644 static int fsnotify_mark_destroy(void *ignored) { diff --git a/fs/open.c b/fs/open.c -index 9b33c0c..e3365035 100644 +index 6835446..df2262a 100644 --- a/fs/open.c +++ b/fs/open.c -@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, +@@ -61,6 +61,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, mutex_unlock(&dentry->d_inode->i_mutex); return ret; } @@ -146,10 +154,10 @@ index 9b33c0c..e3365035 100644 long vfs_truncate(struct path *path, loff_t length) { diff --git a/fs/splice.c b/fs/splice.c -index 020c7bc..a622ade 100644 +index 7117387..af8fd2d 100644 --- a/fs/splice.c +++ b/fs/splice.c -@@ -1116,6 +1116,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +@@ -1119,6 +1119,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out, return splice_write(pipe, out, ppos, len, flags); } @@ -157,7 +165,7 @@ index 020c7bc..a622ade 100644 /* * Attempt to initiate a splice from a file to a pipe. -@@ -1142,6 +1143,7 @@ long do_splice_to(struct file *in, loff_t *ppos, +@@ -1145,6 +1146,7 @@ long do_splice_to(struct file *in, loff_t *ppos, return splice_read(in, ppos, pipe, len, flags); } @@ -166,7 +174,7 @@ index 020c7bc..a622ade 100644 /** * splice_direct_to_actor - splices data directly between two non-pipes diff --git a/security/commoncap.c b/security/commoncap.c -index 7ee08c7..176edf1 100644 +index c44b6fe..d78b003 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -988,9 +988,11 @@ int cap_mmap_addr(unsigned long addr) @@ -182,7 +190,7 @@ index 7ee08c7..176edf1 100644 } +EXPORT_SYMBOL_GPL(cap_mmap_file); diff --git a/security/device_cgroup.c b/security/device_cgroup.c -index d794abc..a20f167 100644 +index 1c69e38..7392d19 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -7,6 +7,7 @@ @@ -193,7 +201,7 @@ index d794abc..a20f167 100644 #include #include #include -@@ -631,6 +632,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask) +@@ -634,6 +635,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask) return __devcgroup_check_permission(type, imajor(inode), iminor(inode), access); } @@ -202,7 +210,7 @@ index d794abc..a20f167 100644 int devcgroup_inode_mknod(int mode, dev_t dev) { diff --git a/security/security.c b/security/security.c -index 7b88c6a..5d00a30 100644 +index 03f248b..892e803 100644 --- a/security/security.c +++ b/security/security.c @@ -396,6 +396,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry) diff --git a/linux/aufs3.patch b/linux/aufs3.patch index 9ffc1dc8f..dc095e882 100644 --- a/linux/aufs3.patch +++ b/linux/aufs3.patch @@ -1,6 +1,6 @@ --- /dev/null -+++ linux-3.x-rcN/Documentation/ABI/testing/debugfs-aufs 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,37 @@ ++++ linux-3.9/Documentation/ABI/testing/debugfs-aufs 2013-05-13 17:13:17.203794085 +0200 +@@ -0,0 +1,50 @@ +What: /debug/aufs/si_/ +Date: March 2009 +Contact: J. R. Okajima @@ -9,6 +9,19 @@ + per aufs mount, where is a unique id generated + internally. + ++What: /debug/aufs/si_/plink ++Date: Apr 2013 ++Contact: J. R. Okajima ++Description: ++ It has three lines and shows the information about the ++ pseudo-link. The first line is a single number ++ representing a number of buckets. The second line is a ++ number of pseudo-links per buckets (separated by a ++ blank). The last line is a single number representing a ++ total number of psedo-links. ++ When the aufs mount option 'noplink' is specified, it ++ will show "1\n0\n0\n". ++ +What: /debug/aufs/si_/xib +Date: March 2009 +Contact: J. R. Okajima @@ -39,7 +52,7 @@ + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. --- /dev/null -+++ linux-3.x-rcN/Documentation/ABI/testing/sysfs-aufs 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/ABI/testing/sysfs-aufs 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,24 @@ +What: /sys/fs/aufs/si_/ +Date: March 2009 @@ -66,173 +79,356 @@ + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/01intro.txt 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,162 @@ ++++ linux-3.9/Documentation/filesystems/aufs/README 2013-05-13 17:13:17.203794085 +0200 +@@ -0,0 +1,345 @@ + -+# Copyright (C) 2005-2013 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++Aufs3 -- advanced multi layered unification filesystem version 3.x ++http://aufs.sf.net ++Junjiro R. Okajima + -+Introduction ++ ++0. Introduction +---------------------------------------- ++In the early days, aufs was entirely re-designed and re-implemented ++Unionfs Version 1.x series. After many original ideas, approaches, ++improvements and implementations, it becomes totally different from ++Unionfs while keeping the basic features. ++Recently, Unionfs Version 2.x series begin taking some of the same ++approaches to aufs1's. ++Unionfs is being developed by Professor Erez Zadok at Stony Brook ++University and his team. + -+aufs [ei ju: ef es] | [a u f s] -+1. abbrev. for "advanced multi-layered unification filesystem". -+2. abbrev. for "another unionfs". -+3. abbrev. for "auf das" in German which means "on the" in English. -+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). -+ But "Filesystem aufs Filesystem" is hard to understand. ++Aufs3 supports linux-3.0 and later. ++If you want older kernel version support, try aufs2-2.6.git or ++aufs2-standalone.git repository, aufs1 from CVS on SourceForge. + -+AUFS is a filesystem with features: -+- multi layered stackable unification filesystem, the member directory -+ is called as a branch. -+- branch permission and attribute, 'readonly', 'real-readonly', -+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their -+ combination. -+- internal "file copy-on-write". -+- logical deletion, whiteout. -+- dynamic branch manipulation, adding, deleting and changing permission. -+- allow bypassing aufs, user's direct branch access. -+- external inode number translation table and bitmap which maintains the -+ persistent aufs inode number. -+- seekable directory, including NFS readdir. -+- file mapping, mmap and sharing pages. -+- pseudo-link, hardlink over branches. -+- loopback mounted filesystem as a branch. -+- several policies to select one among multiple writable branches. -+- revert a single systemcall when an error occurs in aufs. -+- and more... ++Note: it becomes clear that "Aufs was rejected. Let's give it up." ++According to Christoph Hellwig, linux rejects all union-type filesystems ++but UnionMount. ++ + + -+Multi Layered Stackable Unification Filesystem -+---------------------------------------------------------------------- -+Most people already knows what it is. -+It is a filesystem which unifies several directories and provides a -+merged single directory. When users access a file, the access will be -+passed/re-directed/converted (sorry, I am not sure which English word is -+correct) to the real file on the member filesystem. The member -+filesystem is called 'lower filesystem' or 'branch' and has a mode -+'readonly' and 'readwrite.' And the deletion for a file on the lower -+readonly branch is handled by creating 'whiteout' on the upper writable -+branch. ++1. Features ++---------------------------------------- ++- unite several directories into a single virtual filesystem. The member ++ directory is called as a branch. ++- you can specify the permission flags to the branch, which are 'readonly', ++ 'readwrite' and 'whiteout-able.' ++- by upper writable branch, internal copyup and whiteout, files/dirs on ++ readonly branch are modifiable logically. ++- dynamic branch manipulation, add, del. ++- etc... + -+On LKML, there have been discussions about UnionMount (Jan Blunck, -+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took -+different approaches to implement the merged-view. -+The former tries putting it into VFS, and the latter implements as a -+separate filesystem. -+(If I misunderstand about these implementations, please let me know and -+I shall correct it. Because it is a long time ago when I read their -+source files last time). ++Also there are many enhancements in aufs1, such as: ++- readdir(3) in userspace. ++- keep inode number by external inode number table ++- keep the timestamps of file/dir in internal copyup operation ++- seekable directory, supporting NFS readdir. ++- whiteout is hardlinked in order to reduce the consumption of inodes ++ on branch ++- do not copyup, nor create a whiteout when it is unnecessary ++- revert a single systemcall when an error occurs in aufs ++- remount interface instead of ioctl ++- maintain /etc/mtab by an external command, /sbin/mount.aufs. ++- loopback mounted filesystem as a branch ++- kernel thread for removing the dir who has a plenty of whiteouts ++- support copyup sparse file (a file which has a 'hole' in it) ++- default permission flags for branches ++- selectable permission flags for ro branch, whether whiteout can ++ exist or not ++- export via NFS. ++- support /fs/aufs and /aufs. ++- support multiple writable branches, some policies to select one ++ among multiple writable branches. ++- a new semantics for link(2) and rename(2) to support multiple ++ writable branches. ++- no glibc changes are required. ++- pseudo hardlink (hardlink over branches) ++- allow a direct access manually to a file on branch, e.g. bypassing aufs. ++ including NFS or remote filesystem branch. ++- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX. ++- and more... + -+UnionMount's approach will be able to small, but may be hard to share -+branches between several UnionMount since the whiteout in it is -+implemented in the inode on branch filesystem and always -+shared. According to Bharata's post, readdir does not seems to be -+finished yet. -+There are several missing features known in this implementations such as -+- for users, the inode number may change silently. eg. copy-up. -+- link(2) may break by copy-up. -+- read(2) may get an obsoleted filedata (fstat(2) too). -+- fcntl(F_SETLK) may be broken by copy-up. -+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after -+ open(O_RDWR). ++Currently these features are dropped temporary from aufs3. ++See design/08plan.txt in detail. ++- test only the highest one for the directory permission (dirperm1) ++- copyup on open (coo=) ++- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs ++ (robr) ++- statistics of aufs thread (/sys/fs/aufs/stat) ++- delegation mode (dlgt) ++ a delegation of the internal branch access to support task I/O ++ accounting, which also supports Linux Security Modules (LSM) mainly ++ for Suse AppArmor. ++- intent.open/create (file open in a single lookup) + -+Unionfs has a longer history. When I started implementing a stacking filesystem -+(Aug 2005), it already existed. It has virtual super_block, inode, -+dentry and file objects and they have an array pointing lower same kind -+objects. After contributing many patches for Unionfs, I re-started my -+project AUFS (Jun 2006). ++Features or just an idea in the future (see also design/*.txt), ++- reorder the branch index without del/re-add. ++- permanent xino files for NFSD ++- an option for refreshing the opened files after add/del branches ++- 'move' policy for copy-up between two writable branches, after ++ checking free space. ++- light version, without branch manipulation. (unnecessary?) ++- copyup in userspace ++- inotify in userspace ++- readv/writev ++- xattr, acl + -+In AUFS, the structure of filesystem resembles to Unionfs, but I -+implemented my own ideas, approaches and enhancements and it became -+totally different one. + -+Comparing DM snapshot and fs based implementation -+- the number of bytes to be copied between devices is much smaller. -+- the type of filesystem must be one and only. -+- the fs must be writable, no readonly fs, even for the lower original -+ device. so the compression fs will not be usable. but if we use -+ loopback mount, we may address this issue. -+ for instance, -+ mount /cdrom/squashfs.img /sq -+ losetup /sq/ext2.img -+ losetup /somewhere/cow -+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..." -+- it will be difficult (or needs more operations) to extract the -+ difference between the original device and COW. -+- DM snapshot-merge may help a lot when users try merging. in the -+ fs-layer union, users will use rsync(1). ++2. Download ++---------------------------------------- ++There were three GIT trees for aufs3, aufs3-linux.git, ++aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in ++"aufs-util.git." ++While the aufs-util is always necessary, you need either of aufs3-linux ++or aufs3-standalone. + ++The aufs3-linux tree includes the whole linux mainline GIT tree, ++git://git.kernel.org/.../torvalds/linux.git. ++And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot ++build aufs3 as an external kernel module. + -+Several characters/aspects of aufs -+---------------------------------------------------------------------- ++On the other hand, the aufs3-standalone tree has only aufs source files ++and necessary patches, and you can select CONFIG_AUFS_FS=m. + -+Aufs has several characters or aspects. -+1. a filesystem, callee of VFS helper -+2. sub-VFS, caller of VFS helper for branches -+3. a virtual filesystem which maintains persistent inode number -+4. reader/writer of files on branches such like an application ++You will find GIT branches whose name is in form of "aufs3.x" where "x" ++represents the linux kernel version, "linux-3.x". For instance, ++"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use ++"aufs3.x-rcN" branch. + -+1. Callee of VFS Helper -+As an ordinary linux filesystem, aufs is a callee of VFS. For instance, -+unlink(2) from an application reaches sys_unlink() kernel function and -+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it -+calls filesystem specific unlink operation. Actually aufs implements the -+unlink operation but it behaves like a redirector. ++o aufs3-linux tree ++$ git clone --reference /your/linux/git/tree \ ++ git://git.code.sf.net/p/aufs/aufs3-linux aufs-aufs3-linux \ ++ aufs3-linux.git ++- if you don't have linux GIT tree, then remove "--reference ..." ++$ cd aufs3-linux.git ++$ git checkout origin/aufs3.0 + -+2. Caller of VFS Helper for Branches -+aufs_unlink() passes the unlink request to the branch filesystem as if -+it were called from VFS. So the called unlink operation of the branch -+filesystem acts as usual. As a caller of VFS helper, aufs should handle -+every necessary pre/post operation for the branch filesystem. -+- acquire the lock for the parent dir on a branch -+- lookup in a branch -+- revalidate dentry on a branch -+- mnt_want_write() for a branch -+- vfs_unlink() for a branch -+- mnt_drop_write() for a branch -+- release the lock on a branch ++o aufs3-standalone tree ++$ git clone git://git.code.sf.net/p/aufs/aufs3-standalone \ ++ aufs3-standalone.git ++$ cd aufs3-standalone.git ++$ git checkout origin/aufs3.0 + -+3. Persistent Inode Number -+One of the most important issue for a filesystem is to maintain inode -+numbers. This is particularly important to support exporting a -+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a -+backend block device for its own. But some storage is necessary to -+maintain inode number. It may be a large space and may not suit to keep -+in memory. Aufs rents some space from its first writable branch -+filesystem (by default) and creates file(s) on it. These files are -+created by aufs internally and removed soon (currently) keeping opened. -+Note: Because these files are removed, they are totally gone after -+ unmounting aufs. It means the inode numbers are not persistent -+ across unmount or reboot. I have a plan to make them really -+ persistent which will be important for aufs on NFS server. ++o aufs-util tree ++$ git clone git://git.code.sf.net/p/aufs/aufs-util \ ++ aufs-util.git ++$ cd aufs-util.git ++$ git checkout origin/aufs3.0 + -+4. Read/Write Files Internally (copy-on-write) -+Because a branch can be readonly, when you write a file on it, aufs will -+"copy-up" it to the upper writable branch internally. And then write the -+originally requested thing to the file. Generally kernel doesn't -+open/read/write file actively. In aufs, even a single write may cause a -+internal "file copy". This behaviour is very similar to cp(1) command. ++Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY. ++The minor version number, 'x' in '3.x', of aufs may not always ++follow the minor version number of the kernel. ++Because changes in the kernel that cause the use of a new ++minor version number do not always require changes to aufs-util. + -+Some people may think it is better to pass such work to user space -+helper, instead of doing in kernel space. Actually I am still thinking -+about it. But currently I have implemented it in kernel space. ++Since aufs-util has its own minor version number, you may not be ++able to find a GIT branch in aufs-util for your kernel's ++exact minor version number. ++In this case, you should git-checkout the branch for the ++nearest lower number. ++ ++For (an unreleased) example: ++If you are using "linux-3.10" and the "aufs3.10" branch ++does not exist in aufs-util repository, then "aufs3.9", "aufs3.8" ++or something numerically smaller is the branch for your kernel. ++ ++Also you can view all branches by ++ $ git branch -a ++ ++ ++3. Configuration and Compilation ++---------------------------------------- ++Make sure you have git-checkout'ed the correct branch. ++ ++For aufs3-linux tree, ++- enable CONFIG_AUFS_FS. ++- set other aufs configurations if necessary. ++ ++For aufs3-standalone tree, ++There are several ways to build. ++ ++1. ++- apply ./aufs3-kbuild.patch to your kernel source files. ++- apply ./aufs3-base.patch too. ++- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and ++ others including lsof(1)) show the file path on aufs instead of the ++ path on the branch fs. ++- apply ./aufs3-standalone.patch too, if you have a plan to set ++ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch. ++- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your ++ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild. ++- enable CONFIG_AUFS_FS, you can select either ++ =m or =y. ++- and build your kernel as usual. ++- install the built kernel. ++ Note: Since linux-3.9, every filesystem module requires an alias ++ "fs-". You should make sure that "fs-aufs" is listed in your ++ modules.aliases file if you set CONFIG_AUFS_FS=m. ++- install the header files too by "make headers_install" to the ++ directory where you specify. By default, it is $PWD/usr. ++ "make help" shows a brief note for headers_install. ++- and reboot your system. ++ ++2. ++- module only (CONFIG_AUFS_FS=m). ++- apply ./aufs3-base.patch to your kernel source files. ++- apply ./aufs3-proc_map.patch too to your kernel source files, ++ if you want to make /proc/PID/maps (and others including lsof(1)) show ++ the file path on aufs instead of the path on the branch fs. ++- apply ./aufs3-standalone.patch too. ++- build your kernel, don't forget "make headers_install", and reboot. ++- edit ./config.mk and set other aufs configurations if necessary. ++ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes ++ every aufs configurations. ++- build the module by simple "make". ++ Note: Since linux-3.9, every filesystem module requires an alias ++ "fs-". You should make sure that "fs-aufs" is listed in your ++ modules.aliases file. ++- you can specify ${KDIR} make variable which points to your kernel ++ source tree. ++- install the files ++ + run "make install" to install the aufs module, or copy the built ++ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply). ++ + run "make install_headers" (instead of headers_install) to install ++ the modified aufs header file (you can specify DESTDIR which is ++ available in aufs standalone version's Makefile only), or copy ++ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever ++ you like manually. By default, the target directory is $PWD/usr. ++- no need to apply aufs3-kbuild.patch, nor copying source files to your ++ kernel source tree. ++ ++Note: The header file aufs_type.h is necessary to build aufs-util ++ as well as "make headers_install" in the kernel source tree. ++ headers_install is subject to be forgotten, but it is essentially ++ necessary, not only for building aufs-util. ++ You may not meet problems without headers_install in some older ++ version though. ++ ++And then, ++- read README in aufs-util, build and install it ++- note that your distribution may contain an obsoleted version of ++ aufs_type.h in /usr/include/linux or something. When you build aufs ++ utilities, make sure that your compiler refers the correct aufs header ++ file which is built by "make headers_install." ++- if you want to use readdir(3) in userspace or pathconf(3) wrapper, ++ then run "make install_ulib" too. And refer to the aufs manual in ++ detail. ++ ++ ++4. Usage ++---------------------------------------- ++At first, make sure aufs-util are installed, and please read the aufs ++manual, aufs.5 in aufs-util.git tree. ++$ man -l aufs.5 ++ ++And then, ++$ mkdir /tmp/rw /tmp/aufs ++# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs ++ ++Here is another example. The result is equivalent. ++# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs ++ Or ++# mount -t aufs -o br:/tmp/rw none /tmp/aufs ++# mount -o remount,append:${HOME} /tmp/aufs ++ ++Then, you can see whole tree of your home dir through /tmp/aufs. If ++you modify a file under /tmp/aufs, the one on your home directory is ++not affected, instead the same named file will be newly created under ++/tmp/rw. And all of your modification to a file will be applied to ++the one under /tmp/rw. This is called the file based Copy on Write ++(COW) method. ++Aufs mount options are described in aufs.5. ++If you run chroot or something and make your aufs as a root directory, ++then you need to customize the shutdown script. See the aufs manual in ++detail. ++ ++Additionally, there are some sample usages of aufs which are a ++diskless system with network booting, and LiveCD over NFS. ++See sample dir in CVS tree on SourceForge. ++ ++ ++5. Contact ++---------------------------------------- ++When you have any problems or strange behaviour in aufs, please let me ++know with: ++- /proc/mounts (instead of the output of mount(8)) ++- /sys/module/aufs/* ++- /sys/fs/aufs/* (if you have them) ++- /debug/aufs/* (if you have them) ++- linux kernel version ++ if your kernel is not plain, for example modified by distributor, ++ the url where i can download its source is necessary too. ++- aufs version which was printed at loading the module or booting the ++ system, instead of the date you downloaded. ++- configuration (define/undefine CONFIG_AUFS_xxx) ++- kernel configuration or /proc/config.gz (if you have it) ++- behaviour which you think to be incorrect ++- actual operation, reproducible one is better ++- mailto: aufs-users at lists.sourceforge.net ++ ++Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches, ++and Feature Requests) on SourceForge. Please join and write to ++aufs-users ML. ++ ++ ++6. Acknowledgements ++---------------------------------------- ++Thanks to everyone who have tried and are using aufs, whoever ++have reported a bug or any feedback. ++ ++Especially donators: ++Tomas Matejicek(slax.org) made a donation (much more than once). ++ Since Apr 2010, Tomas M (the author of Slax and Linux Live ++ scripts) is making "doubling" donations. ++ Unfortunately I cannot list all of the donators, but I really ++ appreciate. ++ It ends Aug 2010, but the ordinary donation URL is still available. ++ ++Dai Itasaka made a donation (2007/8). ++Chuck Smith made a donation (2008/4, 10 and 12). ++Henk Schoneveld made a donation (2008/9). ++Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10). ++Francois Dupoux made a donation (2008/11). ++Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public ++ aufs2 GIT tree (2009/2). ++William Grant made a donation (2009/3). ++Patrick Lane made a donation (2009/4). ++The Mail Archive (mail-archive.com) made donations (2009/5). ++Nippy Networks (Ed Wildgoose) made a donation (2009/7). ++New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11). ++Pavel Pronskiy made a donation (2011/2). ++Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy ++ Networks (Ed Wildgoose) made a donation for hardware (2011/3). ++Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and ++11). ++Sam Liddicott made a donation (2011/9). ++Era Scarecrow made a donation (2013/4). ++Bor Ratajc made a donation (2013/4). ++Alessandro Gorreta made a donation (2013/4). ++POIRETTE Marc made a donation (2013/4). ++Alessandro Gorreta made a donation (2013/4). ++lauri kasvandik made a donation (2013/5). ++ ++Thank you very much. ++Donations are always, including future donations, very important and ++helpful for me to keep on developing aufs. ++ ++ ++7. ++---------------------------------------- ++If you are an experienced user, no explanation is needed. Aufs is ++just a linux filesystem. ++ ++ ++Enjoy! ++ ++# Local variables: ; ++# mode: text; ++# End: ; --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/02struct.txt 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,226 @@ ++++ linux-3.9/Documentation/filesystems/aufs/design/01intro.txt 2013-05-13 17:13:17.203794085 +0200 +@@ -0,0 +1,162 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima +# @@ -250,38 +446,203 @@ +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + -+Basic Aufs Internal Structure -+ -+Superblock/Inode/Dentry/File Objects -+---------------------------------------------------------------------- -+As like an ordinary filesystem, aufs has its own -+superblock/inode/dentry/file objects. All these objects have a -+dynamically allocated array and store the same kind of pointers to the -+lower filesystem, branch. -+For example, when you build a union with one readwrite branch and one -+readonly, mounted /au, /rw and /ro respectively. -+- /au = /rw + /ro -+- /ro/fileA exists but /rw/fileA -+ -+Aufs lookup operation finds /ro/fileA and gets dentry for that. These -+pointers are stored in a aufs dentry. The array in aufs dentry will be, -+- [0] = NULL -+- [1] = /ro/fileA ++Introduction ++---------------------------------------- + -+This style of an array is essentially same to the aufs -+superblock/inode/dentry/file objects. ++aufs [ei ju: ef es] | [a u f s] ++1. abbrev. for "advanced multi-layered unification filesystem". ++2. abbrev. for "another unionfs". ++3. abbrev. for "auf das" in German which means "on the" in English. ++ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). ++ But "Filesystem aufs Filesystem" is hard to understand. + -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, these objects has its own generation. When branches are -+changed, the generation in aufs superblock is incremented. And a -+generation in other object are compared when it is accessed. -+When a generation in other objects are obsoleted, aufs refreshes the -+internal array. ++AUFS is a filesystem with features: ++- multi layered stackable unification filesystem, the member directory ++ is called as a branch. ++- branch permission and attribute, 'readonly', 'real-readonly', ++ 'readwrite', 'whiteout-able', 'link-able whiteout' and their ++ combination. ++- internal "file copy-on-write". ++- logical deletion, whiteout. ++- dynamic branch manipulation, adding, deleting and changing permission. ++- allow bypassing aufs, user's direct branch access. ++- external inode number translation table and bitmap which maintains the ++ persistent aufs inode number. ++- seekable directory, including NFS readdir. ++- file mapping, mmap and sharing pages. ++- pseudo-link, hardlink over branches. ++- loopback mounted filesystem as a branch. ++- several policies to select one among multiple writable branches. ++- revert a single systemcall when an error occurs in aufs. ++- and more... + + -+Superblock ++Multi Layered Stackable Unification Filesystem +---------------------------------------------------------------------- -+Additionally aufs superblock has some data for policies to select one ++Most people already knows what it is. ++It is a filesystem which unifies several directories and provides a ++merged single directory. When users access a file, the access will be ++passed/re-directed/converted (sorry, I am not sure which English word is ++correct) to the real file on the member filesystem. The member ++filesystem is called 'lower filesystem' or 'branch' and has a mode ++'readonly' and 'readwrite.' And the deletion for a file on the lower ++readonly branch is handled by creating 'whiteout' on the upper writable ++branch. ++ ++On LKML, there have been discussions about UnionMount (Jan Blunck, ++Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took ++different approaches to implement the merged-view. ++The former tries putting it into VFS, and the latter implements as a ++separate filesystem. ++(If I misunderstand about these implementations, please let me know and ++I shall correct it. Because it is a long time ago when I read their ++source files last time). ++ ++UnionMount's approach will be able to small, but may be hard to share ++branches between several UnionMount since the whiteout in it is ++implemented in the inode on branch filesystem and always ++shared. According to Bharata's post, readdir does not seems to be ++finished yet. ++There are several missing features known in this implementations such as ++- for users, the inode number may change silently. eg. copy-up. ++- link(2) may break by copy-up. ++- read(2) may get an obsoleted filedata (fstat(2) too). ++- fcntl(F_SETLK) may be broken by copy-up. ++- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after ++ open(O_RDWR). ++ ++Unionfs has a longer history. When I started implementing a stacking filesystem ++(Aug 2005), it already existed. It has virtual super_block, inode, ++dentry and file objects and they have an array pointing lower same kind ++objects. After contributing many patches for Unionfs, I re-started my ++project AUFS (Jun 2006). ++ ++In AUFS, the structure of filesystem resembles to Unionfs, but I ++implemented my own ideas, approaches and enhancements and it became ++totally different one. ++ ++Comparing DM snapshot and fs based implementation ++- the number of bytes to be copied between devices is much smaller. ++- the type of filesystem must be one and only. ++- the fs must be writable, no readonly fs, even for the lower original ++ device. so the compression fs will not be usable. but if we use ++ loopback mount, we may address this issue. ++ for instance, ++ mount /cdrom/squashfs.img /sq ++ losetup /sq/ext2.img ++ losetup /somewhere/cow ++ dmsetup "snapshot /dev/loop0 /dev/loop1 ..." ++- it will be difficult (or needs more operations) to extract the ++ difference between the original device and COW. ++- DM snapshot-merge may help a lot when users try merging. in the ++ fs-layer union, users will use rsync(1). ++ ++ ++Several characters/aspects of aufs ++---------------------------------------------------------------------- ++ ++Aufs has several characters or aspects. ++1. a filesystem, callee of VFS helper ++2. sub-VFS, caller of VFS helper for branches ++3. a virtual filesystem which maintains persistent inode number ++4. reader/writer of files on branches such like an application ++ ++1. Callee of VFS Helper ++As an ordinary linux filesystem, aufs is a callee of VFS. For instance, ++unlink(2) from an application reaches sys_unlink() kernel function and ++then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it ++calls filesystem specific unlink operation. Actually aufs implements the ++unlink operation but it behaves like a redirector. ++ ++2. Caller of VFS Helper for Branches ++aufs_unlink() passes the unlink request to the branch filesystem as if ++it were called from VFS. So the called unlink operation of the branch ++filesystem acts as usual. As a caller of VFS helper, aufs should handle ++every necessary pre/post operation for the branch filesystem. ++- acquire the lock for the parent dir on a branch ++- lookup in a branch ++- revalidate dentry on a branch ++- mnt_want_write() for a branch ++- vfs_unlink() for a branch ++- mnt_drop_write() for a branch ++- release the lock on a branch ++ ++3. Persistent Inode Number ++One of the most important issue for a filesystem is to maintain inode ++numbers. This is particularly important to support exporting a ++filesystem via NFS. Aufs is a virtual filesystem which doesn't have a ++backend block device for its own. But some storage is necessary to ++maintain inode number. It may be a large space and may not suit to keep ++in memory. Aufs rents some space from its first writable branch ++filesystem (by default) and creates file(s) on it. These files are ++created by aufs internally and removed soon (currently) keeping opened. ++Note: Because these files are removed, they are totally gone after ++ unmounting aufs. It means the inode numbers are not persistent ++ across unmount or reboot. I have a plan to make them really ++ persistent which will be important for aufs on NFS server. ++ ++4. Read/Write Files Internally (copy-on-write) ++Because a branch can be readonly, when you write a file on it, aufs will ++"copy-up" it to the upper writable branch internally. And then write the ++originally requested thing to the file. Generally kernel doesn't ++open/read/write file actively. In aufs, even a single write may cause a ++internal "file copy". This behaviour is very similar to cp(1) command. ++ ++Some people may think it is better to pass such work to user space ++helper, instead of doing in kernel space. Actually I am still thinking ++about it. But currently I have implemented it in kernel space. +--- /dev/null ++++ linux-3.9/Documentation/filesystems/aufs/design/02struct.txt 2013-05-13 17:13:17.203794085 +0200 +@@ -0,0 +1,226 @@ ++ ++# Copyright (C) 2005-2013 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ ++Basic Aufs Internal Structure ++ ++Superblock/Inode/Dentry/File Objects ++---------------------------------------------------------------------- ++As like an ordinary filesystem, aufs has its own ++superblock/inode/dentry/file objects. All these objects have a ++dynamically allocated array and store the same kind of pointers to the ++lower filesystem, branch. ++For example, when you build a union with one readwrite branch and one ++readonly, mounted /au, /rw and /ro respectively. ++- /au = /rw + /ro ++- /ro/fileA exists but /rw/fileA ++ ++Aufs lookup operation finds /ro/fileA and gets dentry for that. These ++pointers are stored in a aufs dentry. The array in aufs dentry will be, ++- [0] = NULL ++- [1] = /ro/fileA ++ ++This style of an array is essentially same to the aufs ++superblock/inode/dentry/file objects. ++ ++Because aufs supports manipulating branches, ie. add/delete/change ++dynamically, these objects has its own generation. When branches are ++changed, the generation in aufs superblock is incremented. And a ++generation in other object are compared when it is accessed. ++When a generation in other objects are obsoleted, aufs refreshes the ++internal array. ++ ++ ++Superblock ++---------------------------------------------------------------------- ++Additionally aufs superblock has some data for policies to select one +among multiple writable branches, XIB files, pseudo-links and kobject. +See below in detail. +About the policies which supports copy-down a directory, see policy.txt @@ -460,7 +821,7 @@ +When the trigger systemcall does not update the timestamps of the parent +dir, aufs reverts it after copy-up. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/03lookup.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/03lookup.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,106 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -569,7 +930,7 @@ + aufs performance when system surely hide the aufs branches from user, + by over-mounting something (or another method). --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/04branch.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/04branch.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,76 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -648,7 +1009,7 @@ + - a regular file on the branch is opened for write and there is no + same named entry on the upper branch. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/05wbr_policy.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/05wbr_policy.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,65 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -716,7 +1077,7 @@ + one. If the selected branch is readonly, then aufs follows the + copyup policy. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/06mmap.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/06mmap.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,47 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -766,7 +1127,7 @@ +approach, aufs met a hard problem and I could not solve it without +switching the approach. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/07export.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/07export.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,59 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -828,7 +1189,7 @@ +- readdir(): call lockdep_on/off() because filldir in NFSD calls + lookup_one_len(), vfs_getattr(), encode_fh() and others. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/08shwh.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/08shwh.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,53 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -884,7 +1245,7 @@ +This new squashfs archive can be stored on the boot device and the +initramfs will use it to replace the old one at the next boot. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/10dynop.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/10dynop.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,47 @@ + +# Copyright (C) 2010-2013 Junjiro R. Okajima @@ -934,7 +1295,7 @@ +Currently this approach is applied to file_operations and +vm_operations_struct for regular files only. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/design/99plan.txt 2013-02-19 11:26:12.672456917 -0500 ++++ linux-3.9/Documentation/filesystems/aufs/design/99plan.txt 2013-05-13 17:13:17.203794085 +0200 @@ -0,0 +1,96 @@ + +# Copyright (C) 2005-2013 Junjiro R. Okajima @@ -1033,3669 +1394,1537 @@ +/new. +Otherwise from /new. --- /dev/null -+++ linux-3.x-rcN/Documentation/filesystems/aufs/README 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,333 @@ -+ -+Aufs3 -- advanced multi layered unification filesystem version 3.x -+http://aufs.sf.net -+Junjiro R. Okajima -+ ++++ linux-3.9/fs/aufs/Kconfig 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,202 @@ ++config AUFS_FS ++ tristate "Aufs (Advanced multi layered unification filesystem) support" ++ help ++ Aufs is a stackable unification filesystem such as Unionfs, ++ which unifies several directories and provides a merged single ++ directory. ++ In the early days, aufs was entirely re-designed and ++ re-implemented Unionfs Version 1.x series. Introducing many ++ original ideas, approaches and improvements, it becomes totally ++ different from Unionfs while keeping the basic features. + -+0. Introduction -+---------------------------------------- -+In the early days, aufs was entirely re-designed and re-implemented -+Unionfs Version 1.x series. After many original ideas, approaches, -+improvements and implementations, it becomes totally different from -+Unionfs while keeping the basic features. -+Recently, Unionfs Version 2.x series begin taking some of the same -+approaches to aufs1's. -+Unionfs is being developed by Professor Erez Zadok at Stony Brook -+University and his team. ++if AUFS_FS ++choice ++ prompt "Maximum number of branches" ++ default AUFS_BRANCH_MAX_127 ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_127 ++ bool "127" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_511 ++ bool "511" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_1023 ++ bool "1023" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_32767 ++ bool "32767" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++endchoice + -+Aufs3 supports linux-3.0 and later. -+If you want older kernel version support, try aufs2-2.6.git or -+aufs2-standalone.git repository, aufs1 from CVS on SourceForge. ++config AUFS_SBILIST ++ bool ++ depends on AUFS_MAGIC_SYSRQ || PROC_FS ++ default y ++ help ++ Automatic configuration for internal use. ++ When aufs supports Magic SysRq or /proc, enabled automatically. + -+Note: it becomes clear that "Aufs was rejected. Let's give it up." -+According to Christoph Hellwig, linux rejects all union-type filesystems -+but UnionMount. -+ ++config AUFS_HNOTIFY ++ bool "Detect direct branch access (bypassing aufs)" ++ help ++ If you want to modify files on branches directly, eg. bypassing aufs, ++ and want aufs to detect the changes of them fully, then enable this ++ option and use 'udba=notify' mount option. ++ Currently there is only one available configuration, "fsnotify". ++ It will have a negative impact to the performance. ++ See detail in aufs.5. + ++choice ++ prompt "method" if AUFS_HNOTIFY ++ default AUFS_HFSNOTIFY ++config AUFS_HFSNOTIFY ++ bool "fsnotify" ++ select FSNOTIFY ++endchoice + -+1. Features -+---------------------------------------- -+- unite several directories into a single virtual filesystem. The member -+ directory is called as a branch. -+- you can specify the permission flags to the branch, which are 'readonly', -+ 'readwrite' and 'whiteout-able.' -+- by upper writable branch, internal copyup and whiteout, files/dirs on -+ readonly branch are modifiable logically. -+- dynamic branch manipulation, add, del. -+- etc... -+ -+Also there are many enhancements in aufs1, such as: -+- readdir(3) in userspace. -+- keep inode number by external inode number table -+- keep the timestamps of file/dir in internal copyup operation -+- seekable directory, supporting NFS readdir. -+- whiteout is hardlinked in order to reduce the consumption of inodes -+ on branch -+- do not copyup, nor create a whiteout when it is unnecessary -+- revert a single systemcall when an error occurs in aufs -+- remount interface instead of ioctl -+- maintain /etc/mtab by an external command, /sbin/mount.aufs. -+- loopback mounted filesystem as a branch -+- kernel thread for removing the dir who has a plenty of whiteouts -+- support copyup sparse file (a file which has a 'hole' in it) -+- default permission flags for branches -+- selectable permission flags for ro branch, whether whiteout can -+ exist or not -+- export via NFS. -+- support /fs/aufs and /aufs. -+- support multiple writable branches, some policies to select one -+ among multiple writable branches. -+- a new semantics for link(2) and rename(2) to support multiple -+ writable branches. -+- no glibc changes are required. -+- pseudo hardlink (hardlink over branches) -+- allow a direct access manually to a file on branch, e.g. bypassing aufs. -+ including NFS or remote filesystem branch. -+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX. -+- and more... -+ -+Currently these features are dropped temporary from aufs3. -+See design/08plan.txt in detail. -+- test only the highest one for the directory permission (dirperm1) -+- copyup on open (coo=) -+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs -+ (robr) -+- statistics of aufs thread (/sys/fs/aufs/stat) -+- delegation mode (dlgt) -+ a delegation of the internal branch access to support task I/O -+ accounting, which also supports Linux Security Modules (LSM) mainly -+ for Suse AppArmor. -+- intent.open/create (file open in a single lookup) -+ -+Features or just an idea in the future (see also design/*.txt), -+- reorder the branch index without del/re-add. -+- permanent xino files for NFSD -+- an option for refreshing the opened files after add/del branches -+- 'move' policy for copy-up between two writable branches, after -+ checking free space. -+- light version, without branch manipulation. (unnecessary?) -+- copyup in userspace -+- inotify in userspace -+- readv/writev -+- xattr, acl -+ -+ -+2. Download -+---------------------------------------- -+There were three GIT trees for aufs3, aufs3-linux.git, -+aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in -+"aufs-util.git." -+While the aufs-util is always necessary, you need either of aufs3-linux -+or aufs3-standalone. -+ -+The aufs3-linux tree includes the whole linux mainline GIT tree, -+git://git.kernel.org/.../torvalds/linux.git. -+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot -+build aufs3 as an external kernel module. -+ -+On the other hand, the aufs3-standalone tree has only aufs source files -+and necessary patches, and you can select CONFIG_AUFS_FS=m. -+ -+You will find GIT branches whose name is in form of "aufs3.x" where "x" -+represents the linux kernel version, "linux-3.x". For instance, -+"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use -+"aufs3.x-rcN" branch. -+ -+o aufs3-linux tree -+$ git clone --reference /your/linux/git/tree \ -+ git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-linux.git \ -+ aufs3-linux.git -+- if you don't have linux GIT tree, then remove "--reference ..." -+$ cd aufs3-linux.git -+$ git checkout origin/aufs3.0 -+ -+o aufs3-standalone tree -+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \ -+ aufs3-standalone.git -+$ cd aufs3-standalone.git -+$ git checkout origin/aufs3.0 -+ -+o aufs-util tree -+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \ -+ aufs-util.git -+$ cd aufs-util.git -+$ git checkout origin/aufs3.0 -+ -+Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY. -+The minor version number, 'x' in '3.x', of aufs may not always -+follow the minor version number of the kernel. -+Because changes in the kernel that cause the use of a new -+minor version number do not always require changes to aufs-util. -+ -+Since aufs-util has its own minor version number, you may not be -+able to find a GIT branch in aufs-util for your kernel's -+exact minor version number. -+In this case, you should git-checkout the branch for the -+nearest lower number. -+ -+For (an unreleased) example: -+If you are using "linux-3.10" and the "aufs3.10" branch -+does not exist in aufs-util repository, then "aufs3.9", "aufs3.8" -+or something numerically smaller is the branch for your kernel. -+ -+Also you can view all branches by -+ $ git branch -a -+ -+ -+3. Configuration and Compilation -+---------------------------------------- -+Make sure you have git-checkout'ed the correct branch. -+ -+For aufs3-linux tree, -+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS. -+- set other aufs configurations if necessary. -+ -+For aufs3-standalone tree, -+There are several ways to build. -+ -+1. -+- apply ./aufs3-kbuild.patch to your kernel source files. -+- apply ./aufs3-base.patch too. -+- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and -+ others including lsof(1)) show the file path on aufs instead of the -+ path on the branch fs. -+- apply ./aufs3-standalone.patch too, if you have a plan to set -+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch. -+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your -+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild. -+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either -+ =m or =y. -+- and build your kernel as usual. -+- install the built kernel. -+- install the header files too by "make headers_install" to the -+ directory where you specify. By default, it is $PWD/usr. -+ "make help" shows a brief note for headers_install. -+- and reboot your system. -+ -+2. -+- module only (CONFIG_AUFS_FS=m). -+- apply ./aufs3-base.patch to your kernel source files. -+- apply ./aufs3-proc_map.patch too to your kernel source files, -+ if you want to make /proc/PID/maps (and others including lsof(1)) show -+ the file path on aufs instead of the path on the branch fs. -+- apply ./aufs3-standalone.patch too. -+- build your kernel, don't forget "make headers_install", and reboot. -+- edit ./config.mk and set other aufs configurations if necessary. -+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes -+ every aufs configurations. -+- build the module by simple "make". -+- you can specify ${KDIR} make variable which points to your kernel -+ source tree. -+- install the files -+ + run "make install" to install the aufs module, or copy the built -+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply). -+ + run "make install_headers" (instead of headers_install) to install -+ the modified aufs header file (you can specify DESTDIR which is -+ available in aufs standalone version's Makefile only), or copy -+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever -+ you like manually. By default, the target directory is $PWD/usr. -+- no need to apply aufs3-kbuild.patch, nor copying source files to your -+ kernel source tree. -+ -+Note: The header file aufs_type.h is necessary to build aufs-util -+ as well as "make headers_install" in the kernel source tree. -+ headers_install is subject to be forgotten, but it is essentially -+ necessary, not only for building aufs-util. -+ You may not meet problems without headers_install in some older -+ version though. -+ -+And then, -+- read README in aufs-util, build and install it -+- note that your distribution may contain an obsoleted version of -+ aufs_type.h in /usr/include/linux or something. When you build aufs -+ utilities, make sure that your compiler refers the correct aufs header -+ file which is built by "make headers_install." -+- if you want to use readdir(3) in userspace or pathconf(3) wrapper, -+ then run "make install_ulib" too. And refer to the aufs manual in -+ detail. -+ -+ -+4. Usage -+---------------------------------------- -+At first, make sure aufs-util are installed, and please read the aufs -+manual, aufs.5 in aufs-util.git tree. -+$ man -l aufs.5 -+ -+And then, -+$ mkdir /tmp/rw /tmp/aufs -+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs -+ -+Here is another example. The result is equivalent. -+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs -+ Or -+# mount -t aufs -o br:/tmp/rw none /tmp/aufs -+# mount -o remount,append:${HOME} /tmp/aufs -+ -+Then, you can see whole tree of your home dir through /tmp/aufs. If -+you modify a file under /tmp/aufs, the one on your home directory is -+not affected, instead the same named file will be newly created under -+/tmp/rw. And all of your modification to a file will be applied to -+the one under /tmp/rw. This is called the file based Copy on Write -+(COW) method. -+Aufs mount options are described in aufs.5. -+If you run chroot or something and make your aufs as a root directory, -+then you need to customize the shutdown script. See the aufs manual in -+detail. -+ -+Additionally, there are some sample usages of aufs which are a -+diskless system with network booting, and LiveCD over NFS. -+See sample dir in CVS tree on SourceForge. -+ -+ -+5. Contact -+---------------------------------------- -+When you have any problems or strange behaviour in aufs, please let me -+know with: -+- /proc/mounts (instead of the output of mount(8)) -+- /sys/module/aufs/* -+- /sys/fs/aufs/* (if you have them) -+- /debug/aufs/* (if you have them) -+- linux kernel version -+ if your kernel is not plain, for example modified by distributor, -+ the url where i can download its source is necessary too. -+- aufs version which was printed at loading the module or booting the -+ system, instead of the date you downloaded. -+- configuration (define/undefine CONFIG_AUFS_xxx) -+- kernel configuration or /proc/config.gz (if you have it) -+- behaviour which you think to be incorrect -+- actual operation, reproducible one is better -+- mailto: aufs-users at lists.sourceforge.net -+ -+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches, -+and Feature Requests) on SourceForge. Please join and write to -+aufs-users ML. -+ -+ -+6. Acknowledgements -+---------------------------------------- -+Thanks to everyone who have tried and are using aufs, whoever -+have reported a bug or any feedback. -+ -+Especially donators: -+Tomas Matejicek(slax.org) made a donation (much more than once). -+ Since Apr 2010, Tomas M (the author of Slax and Linux Live -+ scripts) is making "doubling" donations. -+ Unfortunately I cannot list all of the donators, but I really -+ appreciate. -+ It ends Aug 2010, but the ordinary donation URL is still available. -+ -+Dai Itasaka made a donation (2007/8). -+Chuck Smith made a donation (2008/4, 10 and 12). -+Henk Schoneveld made a donation (2008/9). -+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10). -+Francois Dupoux made a donation (2008/11). -+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public -+ aufs2 GIT tree (2009/2). -+William Grant made a donation (2009/3). -+Patrick Lane made a donation (2009/4). -+The Mail Archive (mail-archive.com) made donations (2009/5). -+Nippy Networks (Ed Wildgoose) made a donation (2009/7). -+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11). -+Pavel Pronskiy made a donation (2011/2). -+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy -+ Networks (Ed Wildgoose) made a donation for hardware (2011/3). -+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and -+11). -+Sam Liddicott made a donation (2011/9). -+ -+Thank you very much. -+Donations are always, including future donations, very important and -+helpful for me to keep on developing aufs. -+ -+ -+7. -+---------------------------------------- -+If you are an experienced user, no explanation is needed. Aufs is -+just a linux filesystem. -+ -+ -+Enjoy! -+ -+# Local variables: ; -+# mode: text; -+# End: ; ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/aufs.h 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * all header files -+ */ -+ -+#ifndef __AUFS_H__ -+#define __AUFS_H__ -+ -+#ifdef __KERNEL__ -+ -+#define AuStub(type, name, body, ...) \ -+ static inline type name(__VA_ARGS__) { body; } -+ -+#define AuStubVoid(name, ...) \ -+ AuStub(void, name, , __VA_ARGS__) -+#define AuStubInt0(name, ...) \ -+ AuStub(int, name, return 0, __VA_ARGS__) -+ -+#include "debug.h" -+ -+#include "branch.h" -+#include "cpup.h" -+#include "dcsub.h" -+#include "dbgaufs.h" -+#include "dentry.h" -+#include "dir.h" -+#include "dynop.h" -+#include "file.h" -+#include "fstype.h" -+#include "inode.h" -+#include "loop.h" -+#include "module.h" -+#include "opts.h" -+#include "rwsem.h" -+#include "spl.h" -+#include "super.h" -+#include "sysaufs.h" -+#include "vfsub.h" -+#include "whout.h" -+#include "wkq.h" -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/branch.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,1172 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * branch management -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+/* -+ * free a single branch -+ */ -+static void au_br_do_free(struct au_branch *br) -+{ -+ int i; -+ struct au_wbr *wbr; -+ struct au_dykey **key; -+ -+ au_hnotify_fin_br(br); -+ -+ if (br->br_xino.xi_file) -+ fput(br->br_xino.xi_file); -+ mutex_destroy(&br->br_xino.xi_nondir_mtx); -+ -+ AuDebugOn(atomic_read(&br->br_count)); -+ -+ wbr = br->br_wbr; -+ if (wbr) { -+ for (i = 0; i < AuBrWh_Last; i++) -+ dput(wbr->wbr_wh[i]); -+ AuDebugOn(atomic_read(&wbr->wbr_wh_running)); -+ AuRwDestroy(&wbr->wbr_wh_rwsem); -+ } -+ -+ key = br->br_dykey; -+ for (i = 0; i < AuBrDynOp; i++, key++) -+ if (*key) -+ au_dy_put(*key); -+ else -+ break; -+ -+ /* recursive lock, s_umount of branch's */ -+ lockdep_off(); -+ mntput(br->br_mnt); -+ lockdep_on(); -+ kfree(wbr); -+ kfree(br); -+} -+ -+/* -+ * frees all branches -+ */ -+void au_br_free(struct au_sbinfo *sbinfo) -+{ -+ aufs_bindex_t bmax; -+ struct au_branch **br; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ bmax = sbinfo->si_bend + 1; -+ br = sbinfo->si_branch; -+ while (bmax--) -+ au_br_do_free(*br++); -+} -+ -+/* -+ * find the index of a branch which is specified by @br_id. -+ */ -+int au_br_index(struct super_block *sb, aufs_bindex_t br_id) -+{ -+ aufs_bindex_t bindex, bend; -+ -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (au_sbr_id(sb, bindex) == br_id) -+ return bindex; -+ return -1; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * add a branch -+ */ -+ -+static int test_overlap(struct super_block *sb, struct dentry *h_adding, -+ struct dentry *h_root) -+{ -+ if (unlikely(h_adding == h_root -+ || au_test_loopback_overlap(sb, h_adding))) -+ return 1; -+ if (h_adding->d_sb != h_root->d_sb) -+ return 0; -+ return au_test_subdir(h_adding, h_root) -+ || au_test_subdir(h_root, h_adding); -+} -+ -+/* -+ * returns a newly allocated branch. @new_nbranch is a number of branches -+ * after adding a branch. -+ */ -+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch, -+ int perm) -+{ -+ struct au_branch *add_branch; -+ struct dentry *root; -+ int err; -+ -+ err = -ENOMEM; -+ root = sb->s_root; -+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS); -+ if (unlikely(!add_branch)) -+ goto out; -+ -+ err = au_hnotify_init_br(add_branch, perm); -+ if (unlikely(err)) -+ goto out_br; -+ -+ add_branch->br_wbr = NULL; -+ if (au_br_writable(perm)) { -+ /* may be freed separately at changing the branch permission */ -+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr), -+ GFP_NOFS); -+ if (unlikely(!add_branch->br_wbr)) -+ goto out_hnotify; -+ } -+ -+ err = au_sbr_realloc(au_sbi(sb), new_nbranch); -+ if (!err) -+ err = au_di_realloc(au_di(root), new_nbranch); -+ if (!err) -+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch); -+ if (!err) -+ return add_branch; /* success */ -+ -+ kfree(add_branch->br_wbr); -+ -+out_hnotify: -+ au_hnotify_fin_br(add_branch); -+out_br: -+ kfree(add_branch); -+out: -+ return ERR_PTR(err); -+} -+ -+/* -+ * test if the branch permission is legal or not. -+ */ -+static int test_br(struct inode *inode, int brperm, char *path) -+{ -+ int err; -+ -+ err = (au_br_writable(brperm) && IS_RDONLY(inode)); -+ if (!err) -+ goto out; -+ -+ err = -EINVAL; -+ pr_err("write permission for readonly mount or inode, %s\n", path); -+ -+out: -+ return err; -+} -+ -+/* -+ * returns: -+ * 0: success, the caller will add it -+ * plus: success, it is already unified, the caller should ignore it -+ * minus: error -+ */ -+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct dentry *root; -+ struct inode *inode, *h_inode; -+ -+ root = sb->s_root; -+ bend = au_sbend(sb); -+ if (unlikely(bend >= 0 -+ && au_find_dbindex(root, add->path.dentry) >= 0)) { -+ err = 1; -+ if (!remount) { -+ err = -EINVAL; -+ pr_err("%s duplicated\n", add->pathname); -+ } -+ goto out; -+ } -+ -+ err = -ENOSPC; /* -E2BIG; */ -+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex -+ || AUFS_BRANCH_MAX - 1 <= bend)) { -+ pr_err("number of branches exceeded %s\n", add->pathname); -+ goto out; -+ } -+ -+ err = -EDOM; -+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) { -+ pr_err("bad index %d\n", add->bindex); -+ goto out; -+ } -+ -+ inode = add->path.dentry->d_inode; -+ err = -ENOENT; -+ if (unlikely(!inode->i_nlink)) { -+ pr_err("no existence %s\n", add->pathname); -+ goto out; -+ } -+ -+ err = -EINVAL; -+ if (unlikely(inode->i_sb == sb)) { -+ pr_err("%s must be outside\n", add->pathname); -+ goto out; -+ } -+ -+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) { -+ pr_err("unsupported filesystem, %s (%s)\n", -+ add->pathname, au_sbtype(inode->i_sb)); -+ goto out; -+ } -+ -+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname); -+ if (unlikely(err)) -+ goto out; -+ -+ if (bend < 0) -+ return 0; /* success */ -+ -+ err = -EINVAL; -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (unlikely(test_overlap(sb, add->path.dentry, -+ au_h_dptr(root, bindex)))) { -+ pr_err("%s is overlapped\n", add->pathname); -+ goto out; -+ } -+ -+ err = 0; -+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) { -+ h_inode = au_h_dptr(root, 0)->d_inode; -+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO) -+ || !uid_eq(h_inode->i_uid, inode->i_uid) -+ || !gid_eq(h_inode->i_gid, inode->i_gid)) -+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n", -+ add->pathname, -+ i_uid_read(inode), i_gid_read(inode), -+ (inode->i_mode & S_IALLUGO), -+ i_uid_read(h_inode), i_gid_read(h_inode), -+ (h_inode->i_mode & S_IALLUGO)); -+ } -+ -+out: -+ return err; -+} -+ -+/* -+ * initialize or clean the whiteouts for an adding branch -+ */ -+static int au_br_init_wh(struct super_block *sb, struct au_branch *br, -+ int new_perm, struct dentry *h_root) -+{ -+ int err, old_perm; -+ aufs_bindex_t bindex; -+ struct mutex *h_mtx; -+ struct au_wbr *wbr; -+ struct au_hinode *hdir; -+ -+ wbr = br->br_wbr; -+ old_perm = br->br_perm; -+ br->br_perm = new_perm; -+ hdir = NULL; -+ h_mtx = NULL; -+ bindex = au_br_index(sb, br->br_id); -+ if (0 <= bindex) { -+ hdir = au_hi(sb->s_root->d_inode, bindex); -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); -+ } else { -+ h_mtx = &h_root->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT); -+ } -+ if (!wbr) -+ err = au_wh_init(h_root, br, sb); -+ else { -+ wbr_wh_write_lock(wbr); -+ err = au_wh_init(h_root, br, sb); -+ wbr_wh_write_unlock(wbr); -+ } -+ if (hdir) -+ au_hn_imtx_unlock(hdir); -+ else -+ mutex_unlock(h_mtx); -+ br->br_perm = old_perm; -+ -+ if (!err && wbr && !au_br_writable(new_perm)) { -+ kfree(wbr); -+ br->br_wbr = NULL; -+ } -+ -+ return err; -+} -+ -+static int au_wbr_init(struct au_branch *br, struct super_block *sb, -+ int perm, struct path *path) -+{ -+ int err; -+ struct kstatfs kst; -+ struct au_wbr *wbr; -+ struct dentry *h_dentry; -+ -+ wbr = br->br_wbr; -+ au_rw_init(&wbr->wbr_wh_rwsem); -+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh)); -+ atomic_set(&wbr->wbr_wh_running, 0); -+ wbr->wbr_bytes = 0; -+ -+ /* -+ * a limit for rmdir/rename a dir -+ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h -+ */ -+ err = vfs_statfs(path, &kst); -+ if (unlikely(err)) -+ goto out; -+ err = -EINVAL; -+ h_dentry = path->dentry; -+ if (kst.f_namelen >= NAME_MAX) -+ err = au_br_init_wh(sb, br, perm, h_dentry); -+ else -+ pr_err("%.*s(%s), unsupported namelen %ld\n", -+ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb), -+ kst.f_namelen); -+ -+out: -+ return err; -+} -+ -+/* intialize a new branch */ -+static int au_br_init(struct au_branch *br, struct super_block *sb, -+ struct au_opt_add *add) -+{ -+ int err; -+ -+ err = 0; -+ memset(&br->br_xino, 0, sizeof(br->br_xino)); -+ mutex_init(&br->br_xino.xi_nondir_mtx); -+ br->br_perm = add->perm; -+ br->br_mnt = add->path.mnt; /* set first, mntget() later */ -+ spin_lock_init(&br->br_dykey_lock); -+ memset(br->br_dykey, 0, sizeof(br->br_dykey)); -+ atomic_set(&br->br_count, 0); -+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT; -+ atomic_set(&br->br_xino_running, 0); -+ br->br_id = au_new_br_id(sb); -+ AuDebugOn(br->br_id < 0); -+ -+ if (au_br_writable(add->perm)) { -+ err = au_wbr_init(br, sb, add->perm, &add->path); -+ if (unlikely(err)) -+ goto out_err; -+ } -+ -+ if (au_opt_test(au_mntflags(sb), XINO)) { -+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino, -+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1); -+ if (unlikely(err)) { -+ AuDebugOn(br->br_xino.xi_file); -+ goto out_err; -+ } -+ } -+ -+ sysaufs_br_init(br); -+ mntget(add->path.mnt); -+ goto out; /* success */ -+ -+out_err: -+ br->br_mnt = NULL; -+out: -+ return err; -+} -+ -+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex, -+ struct au_branch *br, aufs_bindex_t bend, -+ aufs_bindex_t amount) -+{ -+ struct au_branch **brp; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ brp = sbinfo->si_branch + bindex; -+ memmove(brp + 1, brp, sizeof(*brp) * amount); -+ *brp = br; -+ sbinfo->si_bend++; -+ if (unlikely(bend < 0)) -+ sbinfo->si_bend = 0; -+} -+ -+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex, -+ aufs_bindex_t bend, aufs_bindex_t amount) -+{ -+ struct au_hdentry *hdp; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ hdp = dinfo->di_hdentry + bindex; -+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount); -+ au_h_dentry_init(hdp); -+ dinfo->di_bend++; -+ if (unlikely(bend < 0)) -+ dinfo->di_bstart = 0; -+} -+ -+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex, -+ aufs_bindex_t bend, aufs_bindex_t amount) -+{ -+ struct au_hinode *hip; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ hip = iinfo->ii_hinode + bindex; -+ memmove(hip + 1, hip, sizeof(*hip) * amount); -+ hip->hi_inode = NULL; -+ au_hn_init(hip); -+ iinfo->ii_bend++; -+ if (unlikely(bend < 0)) -+ iinfo->ii_bstart = 0; -+} -+ -+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry, -+ struct au_branch *br, aufs_bindex_t bindex) -+{ -+ struct dentry *root; -+ struct inode *root_inode; -+ aufs_bindex_t bend, amount; -+ -+ root = sb->s_root; -+ root_inode = root->d_inode; -+ bend = au_sbend(sb); -+ amount = bend + 1 - bindex; -+ au_sbilist_lock(); -+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount); -+ au_br_do_add_hdp(au_di(root), bindex, bend, amount); -+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount); -+ au_set_h_dptr(root, bindex, dget(h_dentry)); -+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode), -+ /*flags*/0); -+ au_sbilist_unlock(); -+} -+ -+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount) -+{ -+ int err; -+ aufs_bindex_t bend, add_bindex; -+ struct dentry *root, *h_dentry; -+ struct inode *root_inode; -+ struct au_branch *add_branch; -+ -+ root = sb->s_root; -+ root_inode = root->d_inode; -+ IMustLock(root_inode); -+ err = test_add(sb, add, remount); -+ if (unlikely(err < 0)) -+ goto out; -+ if (err) { -+ err = 0; -+ goto out; /* success */ -+ } -+ -+ bend = au_sbend(sb); -+ add_branch = au_br_alloc(sb, bend + 2, add->perm); -+ err = PTR_ERR(add_branch); -+ if (IS_ERR(add_branch)) -+ goto out; -+ -+ err = au_br_init(add_branch, sb, add); -+ if (unlikely(err)) { -+ au_br_do_free(add_branch); -+ goto out; -+ } -+ -+ add_bindex = add->bindex; -+ h_dentry = add->path.dentry; -+ if (!remount) -+ au_br_do_add(sb, h_dentry, add_branch, add_bindex); -+ else { -+ sysaufs_brs_del(sb, add_bindex); -+ au_br_do_add(sb, h_dentry, add_branch, add_bindex); -+ sysaufs_brs_add(sb, add_bindex); -+ } -+ -+ if (!add_bindex) { -+ au_cpup_attr_all(root_inode, /*force*/1); -+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes; -+ } else -+ au_add_nlink(root_inode, h_dentry->d_inode); -+ -+ /* -+ * this test/set prevents aufs from handling unnecesary notify events -+ * of xino files, in case of re-adding a writable branch which was -+ * once detached from aufs. -+ */ -+ if (au_xino_brid(sb) < 0 -+ && au_br_writable(add_branch->br_perm) -+ && !au_test_fs_bad_xino(h_dentry->d_sb) -+ && add_branch->br_xino.xi_file -+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry) -+ au_xino_brid_set(sb, add_branch->br_id); -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * delete a branch -+ */ -+ -+/* to show the line number, do not make it inlined function */ -+#define AuVerbose(do_info, fmt, ...) do { \ -+ if (do_info) \ -+ pr_info(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart, -+ aufs_bindex_t bend) -+{ -+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend; -+} -+ -+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart, -+ aufs_bindex_t bend) -+{ -+ return au_test_ibusy(dentry->d_inode, bstart, bend); -+} -+ -+/* -+ * test if the branch is deletable or not. -+ */ -+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex, -+ unsigned int sigen, const unsigned int verbose) -+{ -+ int err, i, j, ndentry; -+ aufs_bindex_t bstart, bend; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry *d; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, root, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; -+ -+ for (i = 0; !err && i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ ndentry = dpage->ndentry; -+ for (j = 0; !err && j < ndentry; j++) { -+ d = dpage->dentries[j]; -+ AuDebugOn(!d->d_count); -+ if (!au_digen_test(d, sigen)) { -+ di_read_lock_child(d, AuLock_IR); -+ if (unlikely(au_dbrange_test(d))) { -+ di_read_unlock(d, AuLock_IR); -+ continue; -+ } -+ } else { -+ di_write_lock_child(d); -+ if (unlikely(au_dbrange_test(d))) { -+ di_write_unlock(d); -+ continue; -+ } -+ err = au_reval_dpath(d, sigen); -+ if (!err) -+ di_downgrade_lock(d, AuLock_IR); -+ else { -+ di_write_unlock(d); -+ break; -+ } -+ } -+ -+ /* AuDbgDentry(d); */ -+ bstart = au_dbstart(d); -+ bend = au_dbend(d); -+ if (bstart <= bindex -+ && bindex <= bend -+ && au_h_dptr(d, bindex) -+ && au_test_dbusy(d, bstart, bend)) { -+ err = -EBUSY; -+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d)); -+ AuDbgDentry(d); -+ } -+ di_read_unlock(d, AuLock_IR); -+ } -+ } -+ -+out_dpages: -+ au_dpages_free(&dpages); -+out: -+ return err; -+} -+ -+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex, -+ unsigned int sigen, const unsigned int verbose) -+{ -+ int err; -+ unsigned long long max, ull; -+ struct inode *i, **array; -+ aufs_bindex_t bstart, bend; -+ -+ array = au_iarray_alloc(sb, &max); -+ err = PTR_ERR(array); -+ if (IS_ERR(array)) -+ goto out; -+ -+ err = 0; -+ AuDbg("b%d\n", bindex); -+ for (ull = 0; !err && ull < max; ull++) { -+ i = array[ull]; -+ if (i->i_ino == AUFS_ROOT_INO) -+ continue; -+ -+ /* AuDbgInode(i); */ -+ if (au_iigen(i, NULL) == sigen) -+ ii_read_lock_child(i); -+ else { -+ ii_write_lock_child(i); -+ err = au_refresh_hinode_self(i); -+ au_iigen_dec(i); -+ if (!err) -+ ii_downgrade_lock(i); -+ else { -+ ii_write_unlock(i); -+ break; -+ } -+ } -+ -+ bstart = au_ibstart(i); -+ bend = au_ibend(i); -+ if (bstart <= bindex -+ && bindex <= bend -+ && au_h_iptr(i, bindex) -+ && au_test_ibusy(i, bstart, bend)) { -+ err = -EBUSY; -+ AuVerbose(verbose, "busy i%lu\n", i->i_ino); -+ AuDbgInode(i); -+ } -+ ii_read_unlock(i); -+ } -+ au_iarray_free(array, max); -+ -+out: -+ return err; -+} -+ -+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex, -+ const unsigned int verbose) -+{ -+ int err; -+ unsigned int sigen; -+ -+ sigen = au_sigen(root->d_sb); -+ DiMustNoWaiters(root); -+ IiMustNoWaiters(root->d_inode); -+ di_write_unlock(root); -+ err = test_dentry_busy(root, bindex, sigen, verbose); -+ if (!err) -+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose); -+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */ -+ -+ return err; -+} -+ -+static void au_br_do_del_brp(struct au_sbinfo *sbinfo, -+ const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_branch **brp, **p; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ brp = sbinfo->si_branch + bindex; -+ if (bindex < bend) -+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex)); -+ sbinfo->si_branch[0 + bend] = NULL; -+ sbinfo->si_bend--; -+ -+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ sbinfo->si_branch = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_hdentry *hdp, *p; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ hdp = dinfo->di_hdentry; -+ if (bindex < bend) -+ memmove(hdp + bindex, hdp + bindex + 1, -+ sizeof(*hdp) * (bend - bindex)); -+ hdp[0 + bend].hd_dentry = NULL; -+ dinfo->di_bend--; -+ -+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ dinfo->di_hdentry = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_hinode *hip, *p; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ hip = iinfo->ii_hinode + bindex; -+ if (bindex < bend) -+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex)); -+ iinfo->ii_hinode[0 + bend].hi_inode = NULL; -+ au_hn_init(iinfo->ii_hinode + bend); -+ iinfo->ii_bend--; -+ -+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ iinfo->ii_hinode = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex, -+ struct au_branch *br) -+{ -+ aufs_bindex_t bend; -+ struct au_sbinfo *sbinfo; -+ struct dentry *root, *h_root; -+ struct inode *inode, *h_inode; -+ struct au_hinode *hinode; -+ -+ SiMustWriteLock(sb); -+ -+ root = sb->s_root; -+ inode = root->d_inode; -+ sbinfo = au_sbi(sb); -+ bend = sbinfo->si_bend; -+ -+ h_root = au_h_dptr(root, bindex); -+ hinode = au_hi(inode, bindex); -+ h_inode = au_igrab(hinode->hi_inode); -+ au_hiput(hinode); -+ -+ au_sbilist_lock(); -+ au_br_do_del_brp(sbinfo, bindex, bend); -+ au_br_do_del_hdp(au_di(root), bindex, bend); -+ au_br_do_del_hip(au_ii(inode), bindex, bend); -+ au_sbilist_unlock(); -+ -+ dput(h_root); -+ iput(h_inode); -+ au_br_do_free(br); -+} -+ -+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount) -+{ -+ int err, rerr, i; -+ unsigned int mnt_flags; -+ aufs_bindex_t bindex, bend, br_id; -+ unsigned char do_wh, verbose; -+ struct au_branch *br; -+ struct au_wbr *wbr; -+ -+ err = 0; -+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry); -+ if (bindex < 0) { -+ if (remount) -+ goto out; /* success */ -+ err = -ENOENT; -+ pr_err("%s no such branch\n", del->pathname); -+ goto out; -+ } -+ AuDbg("bindex b%d\n", bindex); -+ -+ err = -EBUSY; -+ mnt_flags = au_mntflags(sb); -+ verbose = !!au_opt_test(mnt_flags, VERBOSE); -+ bend = au_sbend(sb); -+ if (unlikely(!bend)) { -+ AuVerbose(verbose, "no more branches left\n"); -+ goto out; -+ } -+ br = au_sbr(sb, bindex); -+ i = atomic_read(&br->br_count); -+ if (unlikely(i)) { -+ AuVerbose(verbose, "%d file(s) opened\n", i); -+ goto out; -+ } -+ -+ wbr = br->br_wbr; -+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph); -+ if (do_wh) { -+ /* instead of WbrWhMustWriteLock(wbr) */ -+ SiMustWriteLock(sb); -+ for (i = 0; i < AuBrWh_Last; i++) { -+ dput(wbr->wbr_wh[i]); -+ wbr->wbr_wh[i] = NULL; -+ } -+ } -+ -+ err = test_children_busy(sb->s_root, bindex, verbose); -+ if (unlikely(err)) { -+ if (do_wh) -+ goto out_wh; -+ goto out; -+ } -+ -+ err = 0; -+ br_id = br->br_id; -+ if (!remount) -+ au_br_do_del(sb, bindex, br); -+ else { -+ sysaufs_brs_del(sb, bindex); -+ au_br_do_del(sb, bindex, br); -+ sysaufs_brs_add(sb, bindex); -+ } -+ -+ if (!bindex) { -+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1); -+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes; -+ } else -+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode); -+ if (au_opt_test(mnt_flags, PLINK)) -+ au_plink_half_refresh(sb, br_id); -+ -+ if (au_xino_brid(sb) == br_id) -+ au_xino_brid_set(sb, -1); -+ goto out; /* success */ -+ -+out_wh: -+ /* revert */ -+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry); -+ if (rerr) -+ pr_warn("failed re-creating base whiteout, %s. (%d)\n", -+ del->pathname, rerr); -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg) -+{ -+ int err; -+ aufs_bindex_t bstart, bend; -+ struct aufs_ibusy ibusy; -+ struct inode *inode, *h_inode; -+ -+ err = -EPERM; -+ if (unlikely(!capable(CAP_SYS_ADMIN))) -+ goto out; -+ -+ err = copy_from_user(&ibusy, arg, sizeof(ibusy)); -+ if (!err) -+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out; -+ } -+ -+ err = -EINVAL; -+ si_read_lock(sb, AuLock_FLUSH); -+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb))) -+ goto out_unlock; -+ -+ err = 0; -+ ibusy.h_ino = 0; /* invalid */ -+ inode = ilookup(sb, ibusy.ino); -+ if (!inode -+ || inode->i_ino == AUFS_ROOT_INO -+ || is_bad_inode(inode)) -+ goto out_unlock; -+ -+ ii_read_lock_child(inode); -+ bstart = au_ibstart(inode); -+ bend = au_ibend(inode); -+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) { -+ h_inode = au_h_iptr(inode, ibusy.bindex); -+ if (h_inode && au_test_ibusy(inode, bstart, bend)) -+ ibusy.h_ino = h_inode->i_ino; -+ } -+ ii_read_unlock(inode); -+ iput(inode); -+ -+out_unlock: -+ si_read_unlock(sb); -+ if (!err) { -+ err = __put_user(ibusy.h_ino, &arg->h_ino); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ } -+ } -+out: -+ return err; -+} -+ -+long au_ibusy_ioctl(struct file *file, unsigned long arg) -+{ -+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg); -+} -+ -+#ifdef CONFIG_COMPAT -+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg) -+{ -+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg)); -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * change a branch permission -+ */ -+ -+static void au_warn_ima(void) -+{ -+#ifdef CONFIG_IMA -+ /* since it doesn't support mark_files_ro() */ -+ AuWarn1("RW -> RO makes IMA to produce wrong message\n"); -+#endif -+} -+ -+static int do_need_sigen_inc(int a, int b) -+{ -+ return au_br_whable(a) && !au_br_whable(b); -+} -+ -+static int need_sigen_inc(int old, int new) -+{ -+ return do_need_sigen_inc(old, new) -+ || do_need_sigen_inc(new, old); -+} -+ -+static unsigned long long au_farray_cb(void *a, -+ unsigned long long max __maybe_unused, -+ void *arg) -+{ -+ unsigned long long n; -+ struct file **p, *f; -+ struct super_block *sb = arg; -+ -+ n = 0; -+ p = a; -+ lg_global_lock(&files_lglock); -+ do_file_list_for_each_entry(sb, f) { -+ if (au_fi(f) -+ && file_count(f) -+ && !special_file(f->f_dentry->d_inode->i_mode)) { -+ get_file(f); -+ *p++ = f; -+ n++; -+ AuDebugOn(n > max); -+ } -+ } while_file_list_for_each_entry; -+ lg_global_unlock(&files_lglock); -+ -+ return n; -+} -+ -+static struct file **au_farray_alloc(struct super_block *sb, -+ unsigned long long *max) -+{ -+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles); -+ return au_array_alloc(max, au_farray_cb, sb); -+} -+ -+static void au_farray_free(struct file **a, unsigned long long max) -+{ -+ unsigned long long ull; -+ -+ for (ull = 0; ull < max; ull++) -+ if (a[ull]) -+ fput(a[ull]); -+ au_array_free(a); -+} -+ -+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ int err, do_warn; -+ unsigned int mnt_flags; -+ unsigned long long ull, max; -+ aufs_bindex_t br_id; -+ unsigned char verbose; -+ struct file *file, *hf, **array; -+ struct inode *inode; -+ struct au_hfile *hfile; -+ -+ mnt_flags = au_mntflags(sb); -+ verbose = !!au_opt_test(mnt_flags, VERBOSE); -+ -+ array = au_farray_alloc(sb, &max); -+ err = PTR_ERR(array); -+ if (IS_ERR(array)) -+ goto out; -+ -+ do_warn = 0; -+ br_id = au_sbr_id(sb, bindex); -+ for (ull = 0; ull < max; ull++) { -+ file = array[ull]; -+ -+ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */ -+ fi_read_lock(file); -+ if (unlikely(au_test_mmapped(file))) { -+ err = -EBUSY; -+ AuVerbose(verbose, "mmapped %.*s\n", -+ AuDLNPair(file->f_dentry)); -+ AuDbgFile(file); -+ FiMustNoWaiters(file); -+ fi_read_unlock(file); -+ goto out_array; -+ } -+ -+ inode = file->f_dentry->d_inode; -+ hfile = &au_fi(file)->fi_htop; -+ hf = hfile->hf_file; -+ if (!S_ISREG(inode->i_mode) -+ || !(file->f_mode & FMODE_WRITE) -+ || hfile->hf_br->br_id != br_id -+ || !(hf->f_mode & FMODE_WRITE)) -+ array[ull] = NULL; -+ else { -+ do_warn = 1; -+ get_file(file); -+ } -+ -+ FiMustNoWaiters(file); -+ fi_read_unlock(file); -+ fput(file); -+ } -+ -+ err = 0; -+ if (do_warn) -+ au_warn_ima(); -+ -+ for (ull = 0; ull < max; ull++) { -+ file = array[ull]; -+ if (!file) -+ continue; -+ -+ /* todo: already flushed? */ -+ /* cf. fs/super.c:mark_files_ro() */ -+ /* fi_read_lock(file); */ -+ hfile = &au_fi(file)->fi_htop; -+ hf = hfile->hf_file; -+ /* fi_read_unlock(file); */ -+ spin_lock(&hf->f_lock); -+ hf->f_mode &= ~FMODE_WRITE; -+ spin_unlock(&hf->f_lock); -+ if (!file_check_writeable(hf)) { -+ file_release_write(hf); -+ vfsub_mnt_drop_write(hf->f_vfsmnt); -+ } -+ } -+ -+out_array: -+ au_farray_free(array, max); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, -+ int *do_refresh) -+{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ struct path path; -+ struct dentry *root; -+ struct au_branch *br; -+ -+ root = sb->s_root; -+ bindex = au_find_dbindex(root, mod->h_root); -+ if (bindex < 0) { -+ if (remount) -+ return 0; /* success */ -+ err = -ENOENT; -+ pr_err("%s no such branch\n", mod->path); -+ goto out; -+ } -+ AuDbg("bindex b%d\n", bindex); -+ -+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path); -+ if (unlikely(err)) -+ goto out; -+ -+ br = au_sbr(sb, bindex); -+ if (br->br_perm == mod->perm) -+ return 0; /* success */ -+ -+ if (au_br_writable(br->br_perm)) { -+ /* remove whiteout base */ -+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root); -+ if (unlikely(err)) -+ goto out; -+ -+ if (!au_br_writable(mod->perm)) { -+ /* rw --> ro, file might be mmapped */ -+ DiMustNoWaiters(root); -+ IiMustNoWaiters(root->d_inode); -+ di_write_unlock(root); -+ err = au_br_mod_files_ro(sb, bindex); -+ /* aufs_write_lock() calls ..._child() */ -+ di_write_lock_child(root); -+ -+ if (unlikely(err)) { -+ rerr = -ENOMEM; -+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), -+ GFP_NOFS); -+ if (br->br_wbr) { -+ path.mnt = br->br_mnt; -+ path.dentry = mod->h_root; -+ rerr = au_wbr_init(br, sb, br->br_perm, -+ &path); -+ } -+ if (unlikely(rerr)) { -+ AuIOErr("nested error %d (%d)\n", -+ rerr, err); -+ br->br_perm = mod->perm; -+ } -+ } -+ } -+ } else if (au_br_writable(mod->perm)) { -+ /* ro --> rw */ -+ err = -ENOMEM; -+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS); -+ if (br->br_wbr) { -+ path.mnt = br->br_mnt; -+ path.dentry = mod->h_root; -+ err = au_wbr_init(br, sb, mod->perm, &path); -+ if (unlikely(err)) { -+ kfree(br->br_wbr); -+ br->br_wbr = NULL; -+ } -+ } -+ } -+ -+ if (!err) { -+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm); -+ br->br_perm = mod->perm; -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/branch.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,236 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * branch filesystems and xino for them -+ */ -+ -+#ifndef __AUFS_BRANCH_H__ -+#define __AUFS_BRANCH_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "dynop.h" -+#include "rwsem.h" -+#include "super.h" -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* a xino file */ -+struct au_xino_file { -+ struct file *xi_file; -+ struct mutex xi_nondir_mtx; -+ -+ /* todo: make xino files an array to support huge inode number */ -+ -+#ifdef CONFIG_DEBUG_FS -+ struct dentry *xi_dbgaufs; -+#endif -+}; -+ -+/* members for writable branch only */ -+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last}; -+struct au_wbr { -+ struct au_rwsem wbr_wh_rwsem; -+ struct dentry *wbr_wh[AuBrWh_Last]; -+ atomic_t wbr_wh_running; -+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */ -+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */ -+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */ -+ -+ /* mfs mode */ -+ unsigned long long wbr_bytes; -+}; -+ -+/* ext2 has 3 types of operations at least, ext3 has 4 */ -+#define AuBrDynOp (AuDyLast * 4) -+ -+#ifdef CONFIG_AUFS_HFSNOTIFY -+/* support for asynchronous destruction */ -+struct au_br_hfsnotify { -+ struct fsnotify_group *hfsn_group; -+}; -+#endif -+ -+/* protected by superblock rwsem */ -+struct au_branch { -+ struct au_xino_file br_xino; -+ -+ aufs_bindex_t br_id; -+ -+ int br_perm; -+ struct vfsmount *br_mnt; -+ spinlock_t br_dykey_lock; -+ struct au_dykey *br_dykey[AuBrDynOp]; -+ atomic_t br_count; -+ -+ struct au_wbr *br_wbr; -+ -+ /* xino truncation */ -+ blkcnt_t br_xino_upper; /* watermark in blocks */ -+ atomic_t br_xino_running; -+ -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ struct au_br_hfsnotify *br_hfsn; -+#endif -+ -+#ifdef CONFIG_SYSFS -+ /* an entry under sysfs per mount-point */ -+ char br_name[8]; -+ struct attribute br_attr; -+#endif -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* branch permissions and attributes */ -+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */ -+#define AuBrPerm_RO (1 << 1) /* readonly */ -+#define AuBrPerm_RR (1 << 2) /* natively readonly */ -+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR) -+ -+#define AuBrRAttr_WH (1 << 3) /* whiteout-able */ -+ -+#define AuBrWAttr_NoLinkWH (1 << 4) /* un-hardlinkable whiteouts */ -+ -+static inline int au_br_writable(int brperm) -+{ -+ return brperm & AuBrPerm_RW; -+} -+ -+static inline int au_br_whable(int brperm) -+{ -+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH); -+} -+ -+static inline int au_br_wh_linkable(int brperm) -+{ -+ return !(brperm & AuBrWAttr_NoLinkWH); -+} -+ -+static inline int au_br_rdonly(struct au_branch *br) -+{ -+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY) -+ || !au_br_writable(br->br_perm)) -+ ? -EROFS : 0; -+} -+ -+static inline int au_br_hnotifyable(int brperm __maybe_unused) -+{ -+#ifdef CONFIG_AUFS_HNOTIFY -+ return !(brperm & AuBrPerm_RR); -+#else -+ return 0; -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* branch.c */ -+struct au_sbinfo; -+void au_br_free(struct au_sbinfo *sinfo); -+int au_br_index(struct super_block *sb, aufs_bindex_t br_id); -+struct au_opt_add; -+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount); -+struct au_opt_del; -+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount); -+long au_ibusy_ioctl(struct file *file, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg); -+#endif -+struct au_opt_mod; -+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, -+ int *do_refresh); -+ -+/* xino.c */ -+static const loff_t au_loff_max = LLONG_MAX; -+ -+int au_xib_trunc(struct super_block *sb); -+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size, -+ loff_t *pos); -+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size, -+ loff_t *pos); -+struct file *au_xino_create2(struct file *base_file, struct file *copy_src); -+struct file *au_xino_create(struct super_block *sb, char *fname, int silent); -+ino_t au_xino_new_ino(struct super_block *sb); -+void au_xino_delete_inode(struct inode *inode, const int unlinked); -+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t ino); -+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t *ino); -+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino, -+ struct file *base_file, int do_test); -+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex); -+ -+struct au_opt_xino; -+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount); -+void au_xino_clr(struct super_block *sb); -+struct file *au_xino_def(struct super_block *sb); -+int au_xino_path(struct seq_file *seq, struct file *file); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* Superblock to branch */ -+static inline -+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr(sb, bindex)->br_id; -+} -+ -+static inline -+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr(sb, bindex)->br_mnt; -+} -+ -+static inline -+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr_mnt(sb, bindex)->mnt_sb; -+} -+ -+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ atomic_dec(&au_sbr(sb, bindex)->br_count); -+} -+ -+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr(sb, bindex)->br_perm; -+} -+ -+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_br_whable(au_sbr_perm(sb, bindex)); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * wbr_wh_read_lock, wbr_wh_write_lock -+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem); -+ -+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem) -+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem) -+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem) -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_BRANCH_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/conf.mk 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,38 @@ -+ -+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS} -+ -+define AuConf -+ifdef ${1} -+AuConfStr += ${1}=${${1}} -+endif -+endef -+ -+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \ -+ SBILIST \ -+ HNOTIFY HFSNOTIFY \ -+ EXPORT INO_T_64 \ -+ RDU \ -+ PROC_MAP \ -+ SP_IATTR \ -+ SHWH \ -+ BR_RAMFS \ -+ BR_FUSE POLL \ -+ BR_HFSPLUS \ -+ BDEV_LOOP \ -+ DEBUG MAGIC_SYSRQ -+$(foreach i, ${AuConfAll}, \ -+ $(eval $(call AuConf,CONFIG_AUFS_${i}))) -+ -+AuConfName = ${obj}/conf.str -+${AuConfName}.tmp: FORCE -+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@ -+${AuConfName}: ${AuConfName}.tmp -+ @diff -q $< $@ > /dev/null 2>&1 || { \ -+ echo ' GEN ' $@; \ -+ cp -p $< $@; \ -+ } -+FORCE: -+clean-files += ${AuConfName} ${AuConfName}.tmp -+${obj}/sysfs.o: ${AuConfName} -+ -+-include ${srctree}/${src}/conf_priv.mk ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/cpup.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,1085 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * copy-up functions, see wbr_policy.c for copy-down -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+void au_cpup_attr_flags(struct inode *dst, struct inode *src) -+{ -+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE -+ | S_NOATIME | S_NOCMTIME; -+ -+ dst->i_flags |= src->i_flags & ~mask; -+ if (au_test_fs_notime(dst->i_sb)) -+ dst->i_flags |= S_NOATIME | S_NOCMTIME; -+} -+ -+void au_cpup_attr_timesizes(struct inode *inode) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ fsstack_copy_attr_times(inode, h_inode); -+ fsstack_copy_inode_size(inode, h_inode); -+} -+ -+void au_cpup_attr_nlink(struct inode *inode, int force) -+{ -+ struct inode *h_inode; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend; -+ -+ sb = inode->i_sb; -+ bindex = au_ibstart(inode); -+ h_inode = au_h_iptr(inode, bindex); -+ if (!force -+ && !S_ISDIR(h_inode->i_mode) -+ && au_opt_test(au_mntflags(sb), PLINK) -+ && au_plink_test(inode)) -+ return; -+ -+ /* -+ * 0 can happen in revalidating. -+ * h_inode->i_mutex is not held, but it is harmless since once i_nlink -+ * reaches 0, it will never become positive. -+ */ -+ set_nlink(inode, h_inode->i_nlink); -+ -+ /* -+ * fewer nlink makes find(1) noisy, but larger nlink doesn't. -+ * it may includes whplink directory. -+ */ -+ if (S_ISDIR(h_inode->i_mode)) { -+ bend = au_ibend(inode); -+ for (bindex++; bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode) -+ au_add_nlink(inode, h_inode); -+ } -+ } -+} -+ -+void au_cpup_attr_changeable(struct inode *inode) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ inode->i_mode = h_inode->i_mode; -+ inode->i_uid = h_inode->i_uid; -+ inode->i_gid = h_inode->i_gid; -+ au_cpup_attr_timesizes(inode); -+ au_cpup_attr_flags(inode, h_inode); -+} -+ -+void au_cpup_igen(struct inode *inode, struct inode *h_inode) -+{ -+ struct au_iinfo *iinfo = au_ii(inode); -+ -+ IiMustWriteLock(inode); -+ -+ iinfo->ii_higen = h_inode->i_generation; -+ iinfo->ii_hsb1 = h_inode->i_sb; -+} -+ -+void au_cpup_attr_all(struct inode *inode, int force) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ au_cpup_attr_changeable(inode); -+ if (inode->i_nlink > 0) -+ au_cpup_attr_nlink(inode, force); -+ inode->i_rdev = h_inode->i_rdev; -+ inode->i_blkbits = h_inode->i_blkbits; -+ au_cpup_igen(inode, h_inode); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */ -+ -+/* keep the timestamps of the parent dir when cpup */ -+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, -+ struct path *h_path) -+{ -+ struct inode *h_inode; -+ -+ dt->dt_dentry = dentry; -+ dt->dt_h_path = *h_path; -+ h_inode = h_path->dentry->d_inode; -+ dt->dt_atime = h_inode->i_atime; -+ dt->dt_mtime = h_inode->i_mtime; -+ /* smp_mb(); */ -+} -+ -+void au_dtime_revert(struct au_dtime *dt) -+{ -+ struct iattr attr; -+ int err; -+ -+ attr.ia_atime = dt->dt_atime; -+ attr.ia_mtime = dt->dt_mtime; -+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET -+ | ATTR_ATIME | ATTR_ATIME_SET; -+ -+ err = vfsub_notify_change(&dt->dt_h_path, &attr); -+ if (unlikely(err)) -+ pr_warn("restoring timestamps failed(%d). ignored\n", err); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static noinline_for_stack -+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src) -+{ -+ int err, sbits; -+ struct iattr ia; -+ struct path h_path; -+ struct inode *h_isrc, *h_idst; -+ -+ h_path.dentry = au_h_dptr(dst, bindex); -+ h_idst = h_path.dentry->d_inode; -+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex); -+ h_isrc = h_src->d_inode; -+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID -+ | ATTR_ATIME | ATTR_MTIME -+ | ATTR_ATIME_SET | ATTR_MTIME_SET; -+ ia.ia_uid = h_isrc->i_uid; -+ ia.ia_gid = h_isrc->i_gid; -+ ia.ia_atime = h_isrc->i_atime; -+ ia.ia_mtime = h_isrc->i_mtime; -+ if (h_idst->i_mode != h_isrc->i_mode -+ && !S_ISLNK(h_idst->i_mode)) { -+ ia.ia_valid |= ATTR_MODE; -+ ia.ia_mode = h_isrc->i_mode; -+ } -+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID)); -+ au_cpup_attr_flags(h_idst, h_isrc); -+ err = vfsub_notify_change(&h_path, &ia); -+ -+ /* is this nfs only? */ -+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) { -+ ia.ia_valid = ATTR_FORCE | ATTR_MODE; -+ ia.ia_mode = h_isrc->i_mode; -+ err = vfsub_notify_change(&h_path, &ia); -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len, -+ char *buf, unsigned long blksize) -+{ -+ int err; -+ size_t sz, rbytes, wbytes; -+ unsigned char all_zero; -+ char *p, *zp; -+ struct mutex *h_mtx; -+ /* reduce stack usage */ -+ struct iattr *ia; -+ -+ zp = page_address(ZERO_PAGE(0)); -+ if (unlikely(!zp)) -+ return -ENOMEM; /* possible? */ -+ -+ err = 0; -+ all_zero = 0; -+ while (len) { -+ AuDbg("len %lld\n", len); -+ sz = blksize; -+ if (len < blksize) -+ sz = len; -+ -+ rbytes = 0; -+ /* todo: signal_pending? */ -+ while (!rbytes || err == -EAGAIN || err == -EINTR) { -+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos); -+ err = rbytes; -+ } -+ if (unlikely(err < 0)) -+ break; -+ -+ all_zero = 0; -+ if (len >= rbytes && rbytes == blksize) -+ all_zero = !memcmp(buf, zp, rbytes); -+ if (!all_zero) { -+ wbytes = rbytes; -+ p = buf; -+ while (wbytes) { -+ size_t b; -+ -+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos); -+ err = b; -+ /* todo: signal_pending? */ -+ if (unlikely(err == -EAGAIN || err == -EINTR)) -+ continue; -+ if (unlikely(err < 0)) -+ break; -+ wbytes -= b; -+ p += b; -+ } -+ } else { -+ loff_t res; -+ -+ AuLabel(hole); -+ res = vfsub_llseek(dst, rbytes, SEEK_CUR); -+ err = res; -+ if (unlikely(res < 0)) -+ break; -+ } -+ len -= rbytes; -+ err = 0; -+ } -+ -+ /* the last block may be a hole */ -+ if (!err && all_zero) { -+ AuLabel(last hole); -+ -+ err = 1; -+ if (au_test_nfs(dst->f_dentry->d_sb)) { -+ /* nfs requires this step to make last hole */ -+ /* is this only nfs? */ -+ do { -+ /* todo: signal_pending? */ -+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos); -+ } while (err == -EAGAIN || err == -EINTR); -+ if (err == 1) -+ dst->f_pos--; -+ } -+ -+ if (err == 1) { -+ ia = (void *)buf; -+ ia->ia_size = dst->f_pos; -+ ia->ia_valid = ATTR_SIZE | ATTR_FILE; -+ ia->ia_file = dst; -+ h_mtx = &dst->f_dentry->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2); -+ err = vfsub_notify_change(&dst->f_path, ia); -+ mutex_unlock(h_mtx); -+ } -+ } -+ -+ return err; -+} -+ -+int au_copy_file(struct file *dst, struct file *src, loff_t len) -+{ -+ int err; -+ unsigned long blksize; -+ unsigned char do_kfree; -+ char *buf; -+ -+ err = -ENOMEM; -+ blksize = dst->f_dentry->d_sb->s_blocksize; -+ if (!blksize || PAGE_SIZE < blksize) -+ blksize = PAGE_SIZE; -+ AuDbg("blksize %lu\n", blksize); -+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *)); -+ if (do_kfree) -+ buf = kmalloc(blksize, GFP_NOFS); -+ else -+ buf = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!buf)) -+ goto out; ++config AUFS_EXPORT ++ bool "NFS-exportable aufs" ++ depends on EXPORTFS ++ help ++ If you want to export your mounted aufs via NFS, then enable this ++ option. There are several requirements for this configuration. ++ See detail in aufs.5. + -+ if (len > (1 << 22)) -+ AuDbg("copying a large file %lld\n", (long long)len); ++config AUFS_INO_T_64 ++ bool ++ depends on AUFS_EXPORT ++ depends on 64BIT && !(ALPHA || S390) ++ default y ++ help ++ Automatic configuration for internal use. ++ /* typedef unsigned long/int __kernel_ino_t */ ++ /* alpha and s390x are int */ + -+ src->f_pos = 0; -+ dst->f_pos = 0; -+ err = au_do_copy_file(dst, src, len, buf, blksize); -+ if (do_kfree) -+ kfree(buf); -+ else -+ free_page((unsigned long)buf); ++config AUFS_RDU ++ bool "Readdir in userspace" ++ help ++ Aufs has two methods to provide a merged view for a directory, ++ by a user-space library and by kernel-space natively. The latter ++ is always enabled but sometimes large and slow. ++ If you enable this option, install the library in aufs2-util ++ package, and set some environment variables for your readdir(3), ++ then the work will be handled in user-space which generally ++ shows better performance in most cases. ++ See detail in aufs.5. + -+out: -+ return err; -+} ++config AUFS_PROC_MAP ++ bool "support for /proc/maps and lsof(1)" ++ depends on PROC_FS ++ help ++ When you issue mmap(2) in aufs, it is actually a direct mmap(2) ++ call to the file on the branch fs since the file in aufs is ++ purely virtual. And the file path printed in /proc/maps (and ++ others) will be the path on the branch fs. In most cases, it ++ does no harm. But some utilities like lsof(1) may confuse since ++ the utility or user may expect the file path in aufs to be ++ printed. ++ To address this issue, aufs provides a patch which introduces a ++ new member called vm_prfile into struct vm_are_struct. The patch ++ is meaningless without enabling this configuration since nobody ++ sets the new vm_prfile member. ++ If you don't apply the patch, then enabling this configuration ++ will cause a compile error. ++ This approach is fragile since if someone else make some changes ++ around vm_file, then vm_prfile may not work anymore. As a ++ workaround such case, aufs provides this configuration. If you ++ disable it, then lsof(1) may produce incorrect result but the ++ problem will be gone even if the aufs patch is applied (I hope). + -+/* -+ * to support a sparse file which is opened with O_APPEND, -+ * we need to close the file. -+ */ -+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len) -+{ -+ int err, i; -+ enum { SRC, DST }; -+ struct { -+ aufs_bindex_t bindex; -+ unsigned int flags; -+ struct dentry *dentry; -+ struct file *file; -+ void *label, *label_file; -+ } *f, file[] = { -+ { -+ .bindex = bsrc, -+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE, -+ .file = NULL, -+ .label = &&out, -+ .label_file = &&out_src -+ }, -+ { -+ .bindex = bdst, -+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE, -+ .file = NULL, -+ .label = &&out_src, -+ .label_file = &&out_dst -+ } -+ }; -+ struct super_block *sb; ++config AUFS_SP_IATTR ++ bool "Respect the attributes (mtime/ctime mainly) of special files" ++ help ++ When you write something to a special file, some attributes of it ++ (mtime/ctime mainly) may be updated. Generally such updates are ++ less important (actually some device drivers and NFS ignore ++ it). But some applications (such like test program) requires ++ such updates. If you need these updates, then enable this ++ configuration which introduces some overhead. ++ Currently this configuration handles FIFO only. + -+ /* bsrc branch can be ro/rw. */ -+ sb = dentry->d_sb; -+ f = file; -+ for (i = 0; i < 2; i++, f++) { -+ f->dentry = au_h_dptr(dentry, f->bindex); -+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL); -+ err = PTR_ERR(f->file); -+ if (IS_ERR(f->file)) -+ goto *f->label; -+ err = -EINVAL; -+ if (unlikely(!f->file->f_op)) -+ goto *f->label_file; -+ } ++config AUFS_SHWH ++ bool "Show whiteouts" ++ help ++ If you want to make the whiteouts in aufs visible, then enable ++ this option and specify 'shwh' mount option. Although it may ++ sounds like philosophy or something, but in technically it ++ simply shows the name of whiteout with keeping its behaviour. + -+ /* try stopping to update while we copyup */ -+ IMustLock(file[SRC].dentry->d_inode); -+ err = au_copy_file(file[DST].file, file[SRC].file, len); ++config AUFS_BR_RAMFS ++ bool "Ramfs (initramfs/rootfs) as an aufs branch" ++ help ++ If you want to use ramfs as an aufs branch fs, then enable this ++ option. Generally tmpfs is recommended. ++ Aufs prohibited them to be a branch fs by default, because ++ initramfs becomes unusable after switch_root or something ++ generally. If you sets initramfs as an aufs branch and boot your ++ system by switch_root, you will meet a problem easily since the ++ files in initramfs may be inaccessible. ++ Unless you are going to use ramfs as an aufs branch fs without ++ switch_root or something, leave it N. + -+out_dst: -+ fput(file[DST].file); -+ au_sbr_put(sb, file[DST].bindex); -+out_src: -+ fput(file[SRC].file); -+ au_sbr_put(sb, file[SRC].bindex); -+out: -+ return err; -+} ++config AUFS_BR_FUSE ++ bool "Fuse fs as an aufs branch" ++ depends on FUSE_FS ++ select AUFS_POLL ++ help ++ If you want to use fuse-based userspace filesystem as an aufs ++ branch fs, then enable this option. ++ It implements the internal poll(2) operation which is ++ implemented by fuse only (curretnly). + -+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len, -+ struct inode *h_dir, struct path *h_path) -+{ -+ int err, rerr; -+ loff_t l; ++config AUFS_POLL ++ bool ++ help ++ Automatic configuration for internal use. + -+ err = 0; -+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc)); -+ if (len == -1 || l < len) -+ len = l; -+ if (len) -+ err = au_cp_regular(dentry, bdst, bsrc, len); -+ if (!err) -+ goto out; /* success */ ++config AUFS_BR_HFSPLUS ++ bool "Hfsplus as an aufs branch" ++ depends on HFSPLUS_FS ++ default y ++ help ++ If you want to use hfsplus fs as an aufs branch fs, then enable ++ this option. This option introduces a small overhead at ++ copying-up a file on hfsplus. + -+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0); -+ if (rerr) { -+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n", -+ AuDLNPair(h_path->dentry), err, rerr); -+ err = -EIO; -+ } ++config AUFS_BDEV_LOOP ++ bool ++ depends on BLK_DEV_LOOP ++ default y ++ help ++ Automatic configuration for internal use. ++ Convert =[ym] into =y. + -+out: -+ return err; -+} ++config AUFS_DEBUG ++ bool "Debug aufs" ++ help ++ Enable this to compile aufs internal debug code. ++ It will have a negative impact to the performance. + -+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src, -+ struct inode *h_dir) -+{ -+ int err, symlen; -+ mm_segment_t old_fs; -+ union { -+ char *k; -+ char __user *u; -+ } sym; ++config AUFS_MAGIC_SYSRQ ++ bool ++ depends on AUFS_DEBUG && MAGIC_SYSRQ ++ default y ++ help ++ Automatic configuration for internal use. ++ When aufs supports Magic SysRq, enabled automatically. ++endif +--- /dev/null ++++ linux-3.9/fs/aufs/Makefile 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,42 @@ + -+ err = -ENOSYS; -+ if (unlikely(!h_src->d_inode->i_op->readlink)) -+ goto out; ++include ${src}/magic.mk ++ifeq (${CONFIG_AUFS_FS},m) ++include ${src}/conf.mk ++endif ++-include ${src}/priv_def.mk + -+ err = -ENOMEM; -+ sym.k = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!sym.k)) -+ goto out; ++# cf. include/linux/kernel.h ++# enable pr_debug ++ccflags-y += -DDEBUG ++# sparse requires the full pathname ++ifdef M ++ccflags-y += -include ${M}/../../include/linux/aufs_type.h ++else ++ccflags-y += -include ${srctree}/include/linux/aufs_type.h ++endif + -+ /* unnecessary to support mmap_sem since symlink is not mmap-able */ -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX); -+ err = symlen; -+ set_fs(old_fs); ++obj-$(CONFIG_AUFS_FS) += aufs.o ++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ ++ wkq.o vfsub.o dcsub.o \ ++ cpup.o whout.o wbr_policy.o \ ++ dinfo.o dentry.o \ ++ dynop.o \ ++ finfo.o file.o f_op.o \ ++ dir.o vdir.o \ ++ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ ++ ioctl.o + -+ if (symlen > 0) { -+ sym.k[symlen] = 0; -+ err = vfsub_symlink(h_dir, h_path, sym.k); -+ } -+ free_page((unsigned long)sym.k); ++# all are boolean ++aufs-$(CONFIG_PROC_FS) += procfs.o plink.o ++aufs-$(CONFIG_SYSFS) += sysfs.o ++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o ++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o ++aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o ++aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o ++aufs-$(CONFIG_AUFS_EXPORT) += export.o ++aufs-$(CONFIG_AUFS_POLL) += poll.o ++aufs-$(CONFIG_AUFS_RDU) += rdu.o ++aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o ++aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o ++aufs-$(CONFIG_AUFS_DEBUG) += debug.o ++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o +--- /dev/null ++++ linux-3.9/fs/aufs/aufs.h 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+out: -+ return err; -+} ++/* ++ * all header files ++ */ + -+/* return with the lower dst inode is locked */ -+static noinline_for_stack -+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len, unsigned int flags, -+ struct dentry *dst_parent) -+{ -+ int err; -+ umode_t mode; -+ unsigned int mnt_flags; -+ unsigned char isdir; -+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME); -+ struct au_dtime dt; -+ struct path h_path; -+ struct dentry *h_src, *h_dst, *h_parent; -+ struct inode *h_inode, *h_dir; -+ struct super_block *sb; ++#ifndef __AUFS_H__ ++#define __AUFS_H__ + -+ /* bsrc branch can be ro/rw. */ -+ h_src = au_h_dptr(dentry, bsrc); -+ h_inode = h_src->d_inode; -+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc)); ++#ifdef __KERNEL__ + -+ /* try stopping to be referenced while we are creating */ -+ h_dst = au_h_dptr(dentry, bdst); -+ h_parent = h_dst->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ AuDebugOn(h_parent != h_dst->d_parent); ++#define AuStub(type, name, body, ...) \ ++ static inline type name(__VA_ARGS__) { body; } + -+ sb = dentry->d_sb; -+ h_path.mnt = au_sbr_mnt(sb, bdst); -+ if (do_dt) { -+ h_path.dentry = h_parent; -+ au_dtime_store(&dt, dst_parent, &h_path); -+ } -+ h_path.dentry = h_dst; ++#define AuStubVoid(name, ...) \ ++ AuStub(void, name, , __VA_ARGS__) ++#define AuStubInt0(name, ...) \ ++ AuStub(int, name, return 0, __VA_ARGS__) + -+ isdir = 0; -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ /* try stopping to update while we are referencing */ -+ IMustLock(h_inode); -+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR, -+ /*want_excl*/true); -+ if (!err) -+ err = au_do_cpup_regular -+ (dentry, bdst, bsrc, len, -+ au_h_iptr(dst_parent->d_inode, bdst), &h_path); -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ err = vfsub_mkdir(h_dir, &h_path, mode); -+ if (!err) { -+ /* -+ * strange behaviour from the users view, -+ * particularry setattr case -+ */ -+ if (au_ibstart(dst_parent->d_inode) == bdst) -+ au_cpup_attr_nlink(dst_parent->d_inode, -+ /*force*/1); -+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1); -+ } -+ break; -+ case S_IFLNK: -+ err = au_do_cpup_symlink(&h_path, h_src, h_dir); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ AuDebugOn(!capable(CAP_MKNOD)); -+ /*FALLTHROUGH*/ -+ case S_IFIFO: -+ case S_IFSOCK: -+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown inode type 0%o\n", mode); -+ err = -EIO; -+ } ++#include "debug.h" + -+ mnt_flags = au_mntflags(sb); -+ if (!au_opt_test(mnt_flags, UDBA_NONE) -+ && !isdir -+ && au_opt_test(mnt_flags, XINO) -+ && h_inode->i_nlink == 1 -+ /* todo: unnecessary? */ -+ /* && dentry->d_inode->i_nlink == 1 */ -+ && bdst < bsrc -+ && !au_ftest_cpup(flags, KEEPLINO)) -+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0); -+ /* ignore this error */ ++#include "branch.h" ++#include "cpup.h" ++#include "dcsub.h" ++#include "dbgaufs.h" ++#include "dentry.h" ++#include "dir.h" ++#include "dynop.h" ++#include "file.h" ++#include "fstype.h" ++#include "inode.h" ++#include "loop.h" ++#include "module.h" ++#include "opts.h" ++#include "rwsem.h" ++#include "spl.h" ++#include "super.h" ++#include "sysaufs.h" ++#include "vfsub.h" ++#include "whout.h" ++#include "wkq.h" + -+ if (do_dt) -+ au_dtime_revert(&dt); -+ return err; -+} ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/branch.c 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,1213 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + +/* -+ * copyup the @dentry from @bsrc to @bdst. -+ * the caller must set the both of lower dentries. -+ * @len is for truncating when it is -1 copyup the entire file. -+ * in link/rename cases, @dst_parent may be different from the real one. ++ * branch management + */ -+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len, unsigned int flags, -+ struct dentry *dst_parent) -+{ -+ int err, rerr; -+ aufs_bindex_t old_ibstart; -+ unsigned char isdir, plink; -+ struct au_dtime dt; -+ struct path h_path; -+ struct dentry *h_src, *h_dst, *h_parent; -+ struct inode *dst_inode, *h_dir, *inode; -+ struct super_block *sb; + -+ AuDebugOn(bsrc <= bdst); ++#include ++#include ++#include "aufs.h" + -+ sb = dentry->d_sb; -+ h_path.mnt = au_sbr_mnt(sb, bdst); -+ h_dst = au_h_dptr(dentry, bdst); -+ h_parent = h_dst->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); ++/* ++ * free a single branch ++ */ + -+ h_src = au_h_dptr(dentry, bsrc); -+ inode = dentry->d_inode; ++/* prohibit rmdir to the root of the branch */ ++/* todo: another new flag? */ ++static void au_br_dflags_force(struct au_branch *br) ++{ ++ struct dentry *h_dentry; + -+ if (!dst_parent) -+ dst_parent = dget_parent(dentry); -+ else -+ dget(dst_parent); ++ h_dentry = au_br_dentry(br); ++ spin_lock(&h_dentry->d_lock); ++ br->br_dflags = h_dentry->d_flags & DCACHE_MOUNTED; ++ h_dentry->d_flags |= DCACHE_MOUNTED; ++ spin_unlock(&h_dentry->d_lock); ++} + -+ plink = !!au_opt_test(au_mntflags(sb), PLINK); -+ dst_inode = au_h_iptr(inode, bdst); -+ if (dst_inode) { -+ if (unlikely(!plink)) { -+ err = -EIO; -+ AuIOErr("hi%lu(i%lu) exists on b%d " -+ "but plink is disabled\n", -+ dst_inode->i_ino, inode->i_ino, bdst); -+ goto out; -+ } ++/* restore its d_flags */ ++static void au_br_dflags_restore(struct au_branch *br) ++{ ++ struct dentry *h_dentry; + -+ if (dst_inode->i_nlink) { -+ const int do_dt = au_ftest_cpup(flags, DTIME); ++ if (br->br_dflags) ++ return; + -+ h_src = au_plink_lkup(inode, bdst); -+ err = PTR_ERR(h_src); -+ if (IS_ERR(h_src)) -+ goto out; -+ if (unlikely(!h_src->d_inode)) { -+ err = -EIO; -+ AuIOErr("i%lu exists on a upper branch " -+ "but not pseudo-linked\n", -+ inode->i_ino); -+ dput(h_src); -+ goto out; -+ } ++ h_dentry = au_br_dentry(br); ++ spin_lock(&h_dentry->d_lock); ++ h_dentry->d_flags &= ~DCACHE_MOUNTED; ++ spin_unlock(&h_dentry->d_lock); ++} + -+ if (do_dt) { -+ h_path.dentry = h_parent; -+ au_dtime_store(&dt, dst_parent, &h_path); -+ } -+ h_path.dentry = h_dst; -+ err = vfsub_link(h_src, h_dir, &h_path); -+ if (do_dt) -+ au_dtime_revert(&dt); -+ dput(h_src); -+ goto out; -+ } else -+ /* todo: cpup_wh_file? */ -+ /* udba work */ -+ au_update_ibrange(inode, /*do_put_zero*/1); -+ } ++static void au_br_do_free(struct au_branch *br) ++{ ++ int i; ++ struct au_wbr *wbr; ++ struct au_dykey **key; + -+ old_ibstart = au_ibstart(inode); -+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent); -+ if (unlikely(err)) -+ goto out; -+ dst_inode = h_dst->d_inode; -+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2); ++ au_hnotify_fin_br(br); + -+ err = cpup_iattr(dentry, bdst, h_src); -+ isdir = S_ISDIR(dst_inode->i_mode); -+ if (!err) { -+ if (bdst < old_ibstart) { -+ if (S_ISREG(inode->i_mode)) { -+ err = au_dy_iaop(inode, bdst, dst_inode); -+ if (unlikely(err)) -+ goto out_rev; -+ } -+ au_set_ibstart(inode, bdst); -+ } -+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode), -+ au_hi_flags(inode, isdir)); -+ mutex_unlock(&dst_inode->i_mutex); -+ if (!isdir -+ && h_src->d_inode->i_nlink > 1 -+ && plink) -+ au_plink_append(inode, bdst, h_dst); -+ goto out; /* success */ -+ } ++ if (br->br_xino.xi_file) ++ fput(br->br_xino.xi_file); ++ mutex_destroy(&br->br_xino.xi_nondir_mtx); + -+ /* revert */ -+out_rev: -+ h_path.dentry = h_parent; -+ mutex_unlock(&dst_inode->i_mutex); -+ au_dtime_store(&dt, dst_parent, &h_path); -+ h_path.dentry = h_dst; -+ if (!isdir) -+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); -+ else -+ rerr = vfsub_rmdir(h_dir, &h_path); -+ au_dtime_revert(&dt); -+ if (rerr) { -+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr); -+ err = -EIO; ++ AuDebugOn(atomic_read(&br->br_count)); ++ ++ wbr = br->br_wbr; ++ if (wbr) { ++ for (i = 0; i < AuBrWh_Last; i++) ++ dput(wbr->wbr_wh[i]); ++ AuDebugOn(atomic_read(&wbr->wbr_wh_running)); ++ AuRwDestroy(&wbr->wbr_wh_rwsem); + } + -+out: -+ dput(dst_parent); -+ return err; -+} ++ key = br->br_dykey; ++ for (i = 0; i < AuBrDynOp; i++, key++) ++ if (*key) ++ au_dy_put(*key); ++ else ++ break; + -+struct au_cpup_single_args { -+ int *errp; -+ struct dentry *dentry; -+ aufs_bindex_t bdst, bsrc; -+ loff_t len; -+ unsigned int flags; -+ struct dentry *dst_parent; -+}; ++ au_br_dflags_restore(br); + -+static void au_call_cpup_single(void *args) -+{ -+ struct au_cpup_single_args *a = args; -+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len, -+ a->flags, a->dst_parent); ++ /* recursive lock, s_umount of branch's */ ++ lockdep_off(); ++ path_put(&br->br_path); ++ lockdep_on(); ++ kfree(wbr); ++ kfree(br); +} + +/* -+ * prevent SIGXFSZ in copy-up. -+ * testing CAP_MKNOD is for generic fs, -+ * but CAP_FSETID is for xfs only, currently. ++ * frees all branches + */ -+static int au_cpup_sio_test(struct super_block *sb, umode_t mode) ++void au_br_free(struct au_sbinfo *sbinfo) +{ -+ int do_sio; ++ aufs_bindex_t bmax; ++ struct au_branch **br; + -+ do_sio = 0; -+ if (!au_wkq_test() -+ && (!au_sbi(sb)->si_plink_maint_pid -+ || au_plink_maint(sb, AuLock_NOPLM))) { -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ /* no condition about RLIMIT_FSIZE and the file size */ -+ do_sio = 1; -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ do_sio = !capable(CAP_MKNOD); -+ break; -+ } -+ if (!do_sio) -+ do_sio = ((mode & (S_ISUID | S_ISGID)) -+ && !capable(CAP_FSETID)); -+ } ++ AuRwMustWriteLock(&sbinfo->si_rwsem); + -+ return do_sio; ++ bmax = sbinfo->si_bend + 1; ++ br = sbinfo->si_branch; ++ while (bmax--) ++ au_br_do_free(*br++); +} + -+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len, unsigned int flags, -+ struct dentry *dst_parent) ++/* ++ * find the index of a branch which is specified by @br_id. ++ */ ++int au_br_index(struct super_block *sb, aufs_bindex_t br_id) +{ -+ int err, wkq_err; -+ struct dentry *h_dentry; ++ aufs_bindex_t bindex, bend; + -+ h_dentry = au_h_dptr(dentry, bsrc); -+ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode)) -+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, -+ dst_parent); -+ else { -+ struct au_cpup_single_args args = { -+ .errp = &err, -+ .dentry = dentry, -+ .bdst = bdst, -+ .bsrc = bsrc, -+ .len = len, -+ .flags = flags, -+ .dst_parent = dst_parent -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_single, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } ++ bend = au_sbend(sb); ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (au_sbr_id(sb, bindex) == br_id) ++ return bindex; ++ return -1; ++} + -+ return err; ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * add a branch ++ */ ++ ++static int test_overlap(struct super_block *sb, struct dentry *h_adding, ++ struct dentry *h_root) ++{ ++ if (unlikely(h_adding == h_root ++ || au_test_loopback_overlap(sb, h_adding))) ++ return 1; ++ if (h_adding->d_sb != h_root->d_sb) ++ return 0; ++ return au_test_subdir(h_adding, h_root) ++ || au_test_subdir(h_root, h_adding); +} + +/* -+ * copyup the @dentry from the first active lower branch to @bdst, -+ * using au_cpup_single(). ++ * returns a newly allocated branch. @new_nbranch is a number of branches ++ * after adding a branch. + */ -+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ unsigned int flags) ++static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch, ++ int perm) +{ ++ struct au_branch *add_branch; ++ struct dentry *root; + int err; -+ aufs_bindex_t bsrc, bend; + -+ bend = au_dbend(dentry); -+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++) -+ if (au_h_dptr(dentry, bsrc)) -+ break; ++ err = -ENOMEM; ++ root = sb->s_root; ++ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS); ++ if (unlikely(!add_branch)) ++ goto out; + -+ err = au_lkup_neg(dentry, bdst); -+ if (!err) { -+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL); -+ if (!err) -+ return 0; /* success */ ++ err = au_hnotify_init_br(add_branch, perm); ++ if (unlikely(err)) ++ goto out_br; + -+ /* revert */ -+ au_set_h_dptr(dentry, bdst, NULL); -+ au_set_dbstart(dentry, bsrc); ++ add_branch->br_wbr = NULL; ++ if (au_br_writable(perm)) { ++ /* may be freed separately at changing the branch permission */ ++ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr), ++ GFP_NOFS); ++ if (unlikely(!add_branch->br_wbr)) ++ goto out_hnotify; + } + -+ return err; -+} ++ err = au_sbr_realloc(au_sbi(sb), new_nbranch); ++ if (!err) ++ err = au_di_realloc(au_di(root), new_nbranch); ++ if (!err) ++ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch); ++ if (!err) ++ return add_branch; /* success */ + -+struct au_cpup_simple_args { -+ int *errp; -+ struct dentry *dentry; -+ aufs_bindex_t bdst; -+ loff_t len; -+ unsigned int flags; -+}; ++ kfree(add_branch->br_wbr); + -+static void au_call_cpup_simple(void *args) -+{ -+ struct au_cpup_simple_args *a = args; -+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags); ++out_hnotify: ++ au_hnotify_fin_br(add_branch); ++out_br: ++ kfree(add_branch); ++out: ++ return ERR_PTR(err); +} + -+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ unsigned int flags) ++/* ++ * test if the branch permission is legal or not. ++ */ ++static int test_br(struct inode *inode, int brperm, char *path) +{ -+ int err, wkq_err; -+ struct dentry *parent; -+ struct inode *h_dir; ++ int err; + -+ parent = dget_parent(dentry); -+ h_dir = au_h_iptr(parent->d_inode, bdst); -+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE) -+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode)) -+ err = au_cpup_simple(dentry, bdst, len, flags); -+ else { -+ struct au_cpup_simple_args args = { -+ .errp = &err, -+ .dentry = dentry, -+ .bdst = bdst, -+ .len = len, -+ .flags = flags -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } ++ err = (au_br_writable(brperm) && IS_RDONLY(inode)); ++ if (!err) ++ goto out; + -+ dput(parent); ++ err = -EINVAL; ++ pr_err("write permission for readonly mount or inode, %s\n", path); ++ ++out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ +/* -+ * copyup the deleted file for writing. ++ * returns: ++ * 0: success, the caller will add it ++ * plus: success, it is already unified, the caller should ignore it ++ * minus: error + */ -+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, -+ struct dentry *wh_dentry, struct file *file, -+ loff_t len) ++static int test_add(struct super_block *sb, struct au_opt_add *add, int remount) +{ + int err; -+ aufs_bindex_t bstart; -+ struct au_dinfo *dinfo; -+ struct dentry *h_d_dst, *h_d_start; -+ struct au_hdentry *hdp; ++ aufs_bindex_t bend, bindex; ++ struct dentry *root; ++ struct inode *inode, *h_inode; + -+ dinfo = au_di(dentry); -+ AuRwMustWriteLock(&dinfo->di_rwsem); ++ root = sb->s_root; ++ bend = au_sbend(sb); ++ if (unlikely(bend >= 0 ++ && au_find_dbindex(root, add->path.dentry) >= 0)) { ++ err = 1; ++ if (!remount) { ++ err = -EINVAL; ++ pr_err("%s duplicated\n", add->pathname); ++ } ++ goto out; ++ } + -+ bstart = dinfo->di_bstart; -+ hdp = dinfo->di_hdentry; -+ h_d_dst = hdp[0 + bdst].hd_dentry; -+ dinfo->di_bstart = bdst; -+ hdp[0 + bdst].hd_dentry = wh_dentry; -+ if (file) { -+ h_d_start = hdp[0 + bstart].hd_dentry; -+ hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry; ++ err = -ENOSPC; /* -E2BIG; */ ++ if (unlikely(AUFS_BRANCH_MAX <= add->bindex ++ || AUFS_BRANCH_MAX - 1 <= bend)) { ++ pr_err("number of branches exceeded %s\n", add->pathname); ++ goto out; + } -+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME, -+ /*h_parent*/NULL); -+ if (file) { -+ if (!err) -+ err = au_reopen_nondir(file); -+ hdp[0 + bstart].hd_dentry = h_d_start; ++ ++ err = -EDOM; ++ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) { ++ pr_err("bad index %d\n", add->bindex); ++ goto out; + } -+ hdp[0 + bdst].hd_dentry = h_d_dst; -+ dinfo->di_bstart = bstart; + -+ return err; -+} ++ inode = add->path.dentry->d_inode; ++ err = -ENOENT; ++ if (unlikely(!inode->i_nlink)) { ++ pr_err("no existence %s\n", add->pathname); ++ goto out; ++ } + -+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ struct file *file) -+{ -+ int err; -+ struct au_dtime dt; -+ struct dentry *parent, *h_parent, *wh_dentry; -+ struct au_branch *br; -+ struct path h_path; ++ err = -EINVAL; ++ if (unlikely(inode->i_sb == sb)) { ++ pr_err("%s must be outside\n", add->pathname); ++ goto out; ++ } + -+ br = au_sbr(dentry->d_sb, bdst); -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bdst); -+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) ++ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) { ++ pr_err("unsupported filesystem, %s (%s)\n", ++ add->pathname, au_sbtype(inode->i_sb)); + goto out; ++ } + -+ h_path.dentry = h_parent; -+ h_path.mnt = br->br_mnt; -+ au_dtime_store(&dt, parent, &h_path); -+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len); ++ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname); + if (unlikely(err)) -+ goto out_wh; ++ goto out; + -+ dget(wh_dentry); -+ h_path.dentry = wh_dentry; -+ if (!S_ISDIR(wh_dentry->d_inode->i_mode)) -+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0); -+ else -+ err = vfsub_rmdir(h_parent->d_inode, &h_path); -+ if (unlikely(err)) { -+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n", -+ AuDLNPair(wh_dentry), err); -+ err = -EIO; ++ if (bend < 0) ++ return 0; /* success */ ++ ++ err = -EINVAL; ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (unlikely(test_overlap(sb, add->path.dentry, ++ au_h_dptr(root, bindex)))) { ++ pr_err("%s is overlapped\n", add->pathname); ++ goto out; ++ } ++ ++ err = 0; ++ if (au_opt_test(au_mntflags(sb), WARN_PERM)) { ++ h_inode = au_h_dptr(root, 0)->d_inode; ++ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO) ++ || !uid_eq(h_inode->i_uid, inode->i_uid) ++ || !gid_eq(h_inode->i_gid, inode->i_gid)) ++ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n", ++ add->pathname, ++ i_uid_read(inode), i_gid_read(inode), ++ (inode->i_mode & S_IALLUGO), ++ i_uid_read(h_inode), i_gid_read(h_inode), ++ (h_inode->i_mode & S_IALLUGO)); + } -+ au_dtime_revert(&dt); -+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry); + -+out_wh: -+ dput(wh_dentry); +out: -+ dput(parent); + return err; +} + -+struct au_cpup_wh_args { -+ int *errp; -+ struct dentry *dentry; -+ aufs_bindex_t bdst; -+ loff_t len; -+ struct file *file; -+}; -+ -+static void au_call_cpup_wh(void *args) -+{ -+ struct au_cpup_wh_args *a = args; -+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file); -+} -+ -+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ struct file *file) ++/* ++ * initialize or clean the whiteouts for an adding branch ++ */ ++static int au_br_init_wh(struct super_block *sb, struct au_branch *br, ++ int new_perm) +{ -+ int err, wkq_err; -+ struct dentry *parent, *h_orph, *h_parent, *h_dentry; -+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode; -+ struct au_wbr *wbr; -+ -+ parent = dget_parent(dentry); -+ dir = parent->d_inode; -+ h_orph = NULL; -+ h_parent = NULL; -+ h_dir = au_igrab(au_h_iptr(dir, bdst)); -+ h_tmpdir = h_dir; -+ if (!h_dir->i_nlink) { -+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr; -+ h_orph = wbr->wbr_orph; ++ int err, old_perm; ++ aufs_bindex_t bindex; ++ struct mutex *h_mtx; ++ struct au_wbr *wbr; ++ struct au_hinode *hdir; + -+ h_parent = dget(au_h_dptr(parent, bdst)); -+ au_set_h_dptr(parent, bdst, dget(h_orph)); -+ h_tmpdir = h_orph->d_inode; -+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0); ++ err = vfsub_mnt_want_write(au_br_mnt(br)); ++ if (unlikely(err)) ++ goto out; + -+ /* this temporary unlock is safe */ -+ if (file) -+ h_dentry = au_hf_top(file)->f_dentry; -+ else -+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry)); -+ h_inode = h_dentry->d_inode; -+ IMustLock(h_inode); -+ mutex_unlock(&h_inode->i_mutex); -+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3); -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ /* todo: au_h_open_pre()? */ ++ wbr = br->br_wbr; ++ old_perm = br->br_perm; ++ br->br_perm = new_perm; ++ hdir = NULL; ++ h_mtx = NULL; ++ bindex = au_br_index(sb, br->br_id); ++ if (0 <= bindex) { ++ hdir = au_hi(sb->s_root->d_inode, bindex); ++ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); ++ } else { ++ h_mtx = &au_br_dentry(br)->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_PARENT); + } -+ -+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE) -+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode)) -+ err = au_cpup_wh(dentry, bdst, len, file); ++ if (!wbr) ++ err = au_wh_init(br, sb); + else { -+ struct au_cpup_wh_args args = { -+ .errp = &err, -+ .dentry = dentry, -+ .bdst = bdst, -+ .len = len, -+ .file = file -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; ++ wbr_wh_write_lock(wbr); ++ err = au_wh_init(br, sb); ++ wbr_wh_write_unlock(wbr); + } ++ if (hdir) ++ au_hn_imtx_unlock(hdir); ++ else ++ mutex_unlock(h_mtx); ++ vfsub_mnt_drop_write(au_br_mnt(br)); ++ br->br_perm = old_perm; + -+ if (h_orph) { -+ mutex_unlock(&h_tmpdir->i_mutex); -+ /* todo: au_h_open_post()? */ -+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0); -+ au_set_h_dptr(parent, bdst, h_parent); ++ if (!err && wbr && !au_br_writable(new_perm)) { ++ kfree(wbr); ++ br->br_wbr = NULL; + } -+ iput(h_dir); -+ dput(parent); + ++out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * generic routine for both of copy-up and copy-down. -+ */ -+/* cf. revalidate function in file.c */ -+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, -+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, -+ struct dentry *h_parent, void *arg), -+ void *arg) ++static int au_wbr_init(struct au_branch *br, struct super_block *sb, ++ int perm) +{ + int err; -+ struct au_pin pin; -+ struct dentry *d, *parent, *h_parent, *real_parent; ++ struct kstatfs kst; ++ struct au_wbr *wbr; + -+ err = 0; -+ parent = dget_parent(dentry); -+ if (IS_ROOT(parent)) -+ goto out; ++ wbr = br->br_wbr; ++ au_rw_init(&wbr->wbr_wh_rwsem); ++ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh)); ++ atomic_set(&wbr->wbr_wh_running, 0); ++ wbr->wbr_bytes = 0; + -+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2, -+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE); ++ /* ++ * a limit for rmdir/rename a dir ++ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h ++ */ ++ err = vfs_statfs(&br->br_path, &kst); ++ if (unlikely(err)) ++ goto out; ++ err = -EINVAL; ++ if (kst.f_namelen >= NAME_MAX) ++ err = au_br_init_wh(sb, br, perm); ++ else ++ pr_err("%.*s(%s), unsupported namelen %ld\n", ++ AuDLNPair(au_br_dentry(br)), ++ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen); + -+ /* do not use au_dpage */ -+ real_parent = parent; -+ while (1) { -+ dput(parent); -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bdst); -+ if (h_parent) -+ goto out; /* success */ ++out: ++ return err; ++} + -+ /* find top dir which is necessary to cpup */ -+ do { -+ d = parent; -+ dput(parent); -+ parent = dget_parent(d); -+ di_read_lock_parent3(parent, !AuLock_IR); -+ h_parent = au_h_dptr(parent, bdst); -+ di_read_unlock(parent, !AuLock_IR); -+ } while (!h_parent); ++/* intialize a new branch */ ++static int au_br_init(struct au_branch *br, struct super_block *sb, ++ struct au_opt_add *add) ++{ ++ int err; + -+ if (d != real_parent) -+ di_write_lock_child3(d); ++ err = 0; ++ memset(&br->br_xino, 0, sizeof(br->br_xino)); ++ mutex_init(&br->br_xino.xi_nondir_mtx); ++ br->br_perm = add->perm; ++ BUILD_BUG_ON(sizeof(br->br_dflags) ++ != sizeof(br->br_path.dentry->d_flags)); ++ br->br_dflags = DCACHE_MOUNTED; ++ br->br_path = add->path; /* set first, path_get() later */ ++ spin_lock_init(&br->br_dykey_lock); ++ memset(br->br_dykey, 0, sizeof(br->br_dykey)); ++ atomic_set(&br->br_count, 0); ++ br->br_xino_upper = AUFS_XINO_TRUNC_INIT; ++ atomic_set(&br->br_xino_running, 0); ++ br->br_id = au_new_br_id(sb); ++ AuDebugOn(br->br_id < 0); + -+ /* somebody else might create while we were sleeping */ -+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) { -+ if (au_h_dptr(d, bdst)) -+ au_update_dbstart(d); ++ if (au_br_writable(add->perm)) { ++ err = au_wbr_init(br, sb, add->perm); ++ if (unlikely(err)) ++ goto out_err; ++ } + -+ au_pin_set_dentry(&pin, d); -+ err = au_do_pin(&pin); -+ if (!err) { -+ err = cp(d, bdst, h_parent, arg); -+ au_unpin(&pin); -+ } ++ if (au_opt_test(au_mntflags(sb), XINO)) { ++ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino, ++ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1); ++ if (unlikely(err)) { ++ AuDebugOn(br->br_xino.xi_file); ++ goto out_err; + } -+ -+ if (d != real_parent) -+ di_write_unlock(d); -+ if (unlikely(err)) -+ break; + } + ++ sysaufs_br_init(br); ++ path_get(&br->br_path); ++ goto out; /* success */ ++ ++out_err: ++ memset(&br->br_path, 0, sizeof(br->br_path)); +out: -+ dput(parent); + return err; +} + -+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst, -+ struct dentry *h_parent __maybe_unused , -+ void *arg __maybe_unused) ++static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex, ++ struct au_branch *br, aufs_bindex_t bend, ++ aufs_bindex_t amount) +{ -+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME); ++ struct au_branch **brp; ++ ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ ++ brp = sbinfo->si_branch + bindex; ++ memmove(brp + 1, brp, sizeof(*brp) * amount); ++ *brp = br; ++ sbinfo->si_bend++; ++ if (unlikely(bend < 0)) ++ sbinfo->si_bend = 0; +} + -+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) ++static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex, ++ aufs_bindex_t bend, aufs_bindex_t amount) +{ -+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL); ++ struct au_hdentry *hdp; ++ ++ AuRwMustWriteLock(&dinfo->di_rwsem); ++ ++ hdp = dinfo->di_hdentry + bindex; ++ memmove(hdp + 1, hdp, sizeof(*hdp) * amount); ++ au_h_dentry_init(hdp); ++ dinfo->di_bend++; ++ if (unlikely(bend < 0)) ++ dinfo->di_bstart = 0; +} + -+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) ++static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex, ++ aufs_bindex_t bend, aufs_bindex_t amount) +{ -+ int err; -+ struct dentry *parent; -+ struct inode *dir; -+ -+ parent = dget_parent(dentry); -+ dir = parent->d_inode; -+ err = 0; -+ if (au_h_iptr(dir, bdst)) -+ goto out; ++ struct au_hinode *hip; + -+ di_read_unlock(parent, AuLock_IR); -+ di_write_lock_parent(parent); -+ /* someone else might change our inode while we were sleeping */ -+ if (!au_h_iptr(dir, bdst)) -+ err = au_cpup_dirs(dentry, bdst); -+ di_downgrade_lock(parent, AuLock_IR); ++ AuRwMustWriteLock(&iinfo->ii_rwsem); + -+out: -+ dput(parent); -+ return err; ++ hip = iinfo->ii_hinode + bindex; ++ memmove(hip + 1, hip, sizeof(*hip) * amount); ++ hip->hi_inode = NULL; ++ au_hn_init(hip); ++ iinfo->ii_bend++; ++ if (unlikely(bend < 0)) ++ iinfo->ii_bstart = 0; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/cpup.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ + -+/* -+ * copy-up/down functions -+ */ ++static void au_br_do_add(struct super_block *sb, struct au_branch *br, ++ aufs_bindex_t bindex) ++{ ++ struct dentry *root, *h_dentry; ++ struct inode *root_inode; ++ aufs_bindex_t bend, amount; + -+#ifndef __AUFS_CPUP_H__ -+#define __AUFS_CPUP_H__ ++ au_br_dflags_force(br); ++ ++ root = sb->s_root; ++ root_inode = root->d_inode; ++ bend = au_sbend(sb); ++ amount = bend + 1 - bindex; ++ h_dentry = au_br_dentry(br); ++ au_sbilist_lock(); ++ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount); ++ au_br_do_add_hdp(au_di(root), bindex, bend, amount); ++ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount); ++ au_set_h_dptr(root, bindex, dget(h_dentry)); ++ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode), ++ /*flags*/0); ++ au_sbilist_unlock(); ++} + -+#ifdef __KERNEL__ ++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount) ++{ ++ int err; ++ aufs_bindex_t bend, add_bindex; ++ struct dentry *root, *h_dentry; ++ struct inode *root_inode; ++ struct au_branch *add_branch; + -+#include ++ root = sb->s_root; ++ root_inode = root->d_inode; ++ IMustLock(root_inode); ++ err = test_add(sb, add, remount); ++ if (unlikely(err < 0)) ++ goto out; ++ if (err) { ++ err = 0; ++ goto out; /* success */ ++ } + -+struct inode; -+struct file; ++ bend = au_sbend(sb); ++ add_branch = au_br_alloc(sb, bend + 2, add->perm); ++ err = PTR_ERR(add_branch); ++ if (IS_ERR(add_branch)) ++ goto out; + -+void au_cpup_attr_flags(struct inode *dst, struct inode *src); -+void au_cpup_attr_timesizes(struct inode *inode); -+void au_cpup_attr_nlink(struct inode *inode, int force); -+void au_cpup_attr_changeable(struct inode *inode); -+void au_cpup_igen(struct inode *inode, struct inode *h_inode); -+void au_cpup_attr_all(struct inode *inode, int force); ++ err = au_br_init(add_branch, sb, add); ++ if (unlikely(err)) { ++ au_br_do_free(add_branch); ++ goto out; ++ } + -+/* ---------------------------------------------------------------------- */ ++ add_bindex = add->bindex; ++ if (!remount) ++ au_br_do_add(sb, add_branch, add_bindex); ++ else { ++ sysaufs_brs_del(sb, add_bindex); ++ au_br_do_add(sb, add_branch, add_bindex); ++ sysaufs_brs_add(sb, add_bindex); ++ } + -+/* cpup flags */ -+#define AuCpup_DTIME 1 /* do dtime_store/revert */ -+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino, -+ for link(2) */ -+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name) -+#define au_fset_cpup(flags, name) \ -+ do { (flags) |= AuCpup_##name; } while (0) -+#define au_fclr_cpup(flags, name) \ -+ do { (flags) &= ~AuCpup_##name; } while (0) ++ h_dentry = add->path.dentry; ++ if (!add_bindex) { ++ au_cpup_attr_all(root_inode, /*force*/1); ++ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes; ++ } else ++ au_add_nlink(root_inode, h_dentry->d_inode); + -+int au_copy_file(struct file *dst, struct file *src, loff_t len); -+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, -+ aufs_bindex_t bsrc, loff_t len, unsigned int flags, -+ struct dentry *dst_parent); -+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ unsigned int flags); -+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, -+ struct file *file); ++ /* ++ * this test/set prevents aufs from handling unnecesary notify events ++ * of xino files, in case of re-adding a writable branch which was ++ * once detached from aufs. ++ */ ++ if (au_xino_brid(sb) < 0 ++ && au_br_writable(add_branch->br_perm) ++ && !au_test_fs_bad_xino(h_dentry->d_sb) ++ && add_branch->br_xino.xi_file ++ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry) ++ au_xino_brid_set(sb, add_branch->br_id); + -+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, -+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, -+ struct dentry *h_parent, void *arg), -+ void *arg); -+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); -+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); ++out: ++ return err; ++} + +/* ---------------------------------------------------------------------- */ + -+/* keep timestamps when copyup */ -+struct au_dtime { -+ struct dentry *dt_dentry; -+ struct path dt_h_path; -+ struct timespec dt_atime, dt_mtime; -+}; -+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, -+ struct path *h_path); -+void au_dtime_revert(struct au_dtime *dt); -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_CPUP_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dbgaufs.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,334 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ +/* -+ * debugfs interface ++ * delete a branch + */ + -+#include -+#include "aufs.h" -+ -+#ifndef CONFIG_SYSFS -+#error DEBUG_FS depends upon SYSFS -+#endif ++/* to show the line number, do not make it inlined function */ ++#define AuVerbose(do_info, fmt, ...) do { \ ++ if (do_info) \ ++ pr_info(fmt, ##__VA_ARGS__); \ ++} while (0) + -+static struct dentry *dbgaufs; -+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH; ++static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart, ++ aufs_bindex_t bend) ++{ ++ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend; ++} + -+/* 20 is max digits length of ulong 64 */ -+struct dbgaufs_arg { -+ int n; -+ char a[20 * 4]; -+}; ++static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart, ++ aufs_bindex_t bend) ++{ ++ return au_test_ibusy(dentry->d_inode, bstart, bend); ++} + +/* -+ * common function for all XINO files ++ * test if the branch is deletable or not. + */ -+static int dbgaufs_xi_release(struct inode *inode __maybe_unused, -+ struct file *file) ++static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex, ++ unsigned int sigen, const unsigned int verbose) +{ -+ kfree(file->private_data); -+ return 0; ++ int err, i, j, ndentry; ++ aufs_bindex_t bstart, bend; ++ struct au_dcsub_pages dpages; ++ struct au_dpage *dpage; ++ struct dentry *d; ++ ++ err = au_dpages_init(&dpages, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; ++ err = au_dcsub_pages(&dpages, root, NULL, NULL); ++ if (unlikely(err)) ++ goto out_dpages; ++ ++ for (i = 0; !err && i < dpages.ndpage; i++) { ++ dpage = dpages.dpages + i; ++ ndentry = dpage->ndentry; ++ for (j = 0; !err && j < ndentry; j++) { ++ d = dpage->dentries[j]; ++ AuDebugOn(!d->d_count); ++ if (!au_digen_test(d, sigen)) { ++ di_read_lock_child(d, AuLock_IR); ++ if (unlikely(au_dbrange_test(d))) { ++ di_read_unlock(d, AuLock_IR); ++ continue; ++ } ++ } else { ++ di_write_lock_child(d); ++ if (unlikely(au_dbrange_test(d))) { ++ di_write_unlock(d); ++ continue; ++ } ++ err = au_reval_dpath(d, sigen); ++ if (!err) ++ di_downgrade_lock(d, AuLock_IR); ++ else { ++ di_write_unlock(d); ++ break; ++ } ++ } ++ ++ /* AuDbgDentry(d); */ ++ bstart = au_dbstart(d); ++ bend = au_dbend(d); ++ if (bstart <= bindex ++ && bindex <= bend ++ && au_h_dptr(d, bindex) ++ && au_test_dbusy(d, bstart, bend)) { ++ err = -EBUSY; ++ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d)); ++ AuDbgDentry(d); ++ } ++ di_read_unlock(d, AuLock_IR); ++ } ++ } ++ ++out_dpages: ++ au_dpages_free(&dpages); ++out: ++ return err; +} + -+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt) ++static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex, ++ unsigned int sigen, const unsigned int verbose) +{ + int err; -+ struct kstat st; -+ struct dbgaufs_arg *p; ++ unsigned long long max, ull; ++ struct inode *i, **array; ++ aufs_bindex_t bstart, bend; + -+ err = -ENOMEM; -+ p = kmalloc(sizeof(*p), GFP_NOFS); -+ if (unlikely(!p)) ++ array = au_iarray_alloc(sb, &max); ++ err = PTR_ERR(array); ++ if (IS_ERR(array)) + goto out; + + err = 0; -+ p->n = 0; -+ file->private_data = p; -+ if (!xf) -+ goto out; ++ AuDbg("b%d\n", bindex); ++ for (ull = 0; !err && ull < max; ull++) { ++ i = array[ull]; ++ if (i->i_ino == AUFS_ROOT_INO) ++ continue; ++ ++ /* AuDbgInode(i); */ ++ if (au_iigen(i, NULL) == sigen) ++ ii_read_lock_child(i); ++ else { ++ ii_write_lock_child(i); ++ err = au_refresh_hinode_self(i); ++ au_iigen_dec(i); ++ if (!err) ++ ii_downgrade_lock(i); ++ else { ++ ii_write_unlock(i); ++ break; ++ } ++ } + -+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st); -+ if (!err) { -+ if (do_fcnt) -+ p->n = snprintf -+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n", -+ (long)file_count(xf), st.blocks, st.blksize, -+ (long long)st.size); -+ else -+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n", -+ st.blocks, st.blksize, -+ (long long)st.size); -+ AuDebugOn(p->n >= sizeof(p->a)); -+ } else { -+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err); -+ err = 0; ++ bstart = au_ibstart(i); ++ bend = au_ibend(i); ++ if (bstart <= bindex ++ && bindex <= bend ++ && au_h_iptr(i, bindex) ++ && au_test_ibusy(i, bstart, bend)) { ++ err = -EBUSY; ++ AuVerbose(verbose, "busy i%lu\n", i->i_ino); ++ AuDbgInode(i); ++ } ++ ii_read_unlock(i); + } ++ au_iarray_free(array, max); + +out: + return err; -+ +} + -+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf, -+ size_t count, loff_t *ppos) ++static int test_children_busy(struct dentry *root, aufs_bindex_t bindex, ++ const unsigned int verbose) +{ -+ struct dbgaufs_arg *p; ++ int err; ++ unsigned int sigen; + -+ p = file->private_data; -+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n); -+} ++ sigen = au_sigen(root->d_sb); ++ DiMustNoWaiters(root); ++ IiMustNoWaiters(root->d_inode); ++ di_write_unlock(root); ++ err = test_dentry_busy(root, bindex, sigen, verbose); ++ if (!err) ++ err = test_inode_busy(root->d_sb, bindex, sigen, verbose); ++ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */ + -+/* ---------------------------------------------------------------------- */ ++ return err; ++} + -+static int dbgaufs_xib_open(struct inode *inode, struct file *file) ++static void au_br_do_del_brp(struct au_sbinfo *sbinfo, ++ const aufs_bindex_t bindex, ++ const aufs_bindex_t bend) +{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; ++ struct au_branch **brp, **p; + -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0); -+ si_read_unlock(sb); -+ return err; ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ ++ brp = sbinfo->si_branch + bindex; ++ if (bindex < bend) ++ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex)); ++ sbinfo->si_branch[0 + bend] = NULL; ++ sbinfo->si_bend--; ++ ++ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST); ++ if (p) ++ sbinfo->si_branch = p; ++ /* harmless error */ +} + -+static const struct file_operations dbgaufs_xib_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xib_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; ++static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex, ++ const aufs_bindex_t bend) ++{ ++ struct au_hdentry *hdp, *p; + -+/* ---------------------------------------------------------------------- */ ++ AuRwMustWriteLock(&dinfo->di_rwsem); + -+#define DbgaufsXi_PREFIX "xi" ++ hdp = dinfo->di_hdentry; ++ if (bindex < bend) ++ memmove(hdp + bindex, hdp + bindex + 1, ++ sizeof(*hdp) * (bend - bindex)); ++ hdp[0 + bend].hd_dentry = NULL; ++ dinfo->di_bend--; + -+static int dbgaufs_xino_open(struct inode *inode, struct file *file) ++ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST); ++ if (p) ++ dinfo->di_hdentry = p; ++ /* harmless error */ ++} ++ ++static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex, ++ const aufs_bindex_t bend) +{ -+ int err; -+ long l; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ struct file *xf; -+ struct qstr *name; ++ struct au_hinode *hip, *p; + -+ err = -ENOENT; -+ xf = NULL; -+ name = &file->f_dentry->d_name; -+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX) -+ || memcmp(name->name, DbgaufsXi_PREFIX, -+ sizeof(DbgaufsXi_PREFIX) - 1))) -+ goto out; -+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l); -+ if (unlikely(err)) -+ goto out; ++ AuRwMustWriteLock(&iinfo->ii_rwsem); + -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ if (l <= au_sbend(sb)) { -+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file; -+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1); -+ } else -+ err = -ENOENT; -+ si_read_unlock(sb); ++ hip = iinfo->ii_hinode + bindex; ++ if (bindex < bend) ++ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex)); ++ iinfo->ii_hinode[0 + bend].hi_inode = NULL; ++ au_hn_init(iinfo->ii_hinode + bend); ++ iinfo->ii_bend--; + -+out: -+ return err; ++ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST); ++ if (p) ++ iinfo->ii_hinode = p; ++ /* harmless error */ +} + -+static const struct file_operations dbgaufs_xino_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xino_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; -+ -+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex) ++static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex, ++ struct au_branch *br) +{ + aufs_bindex_t bend; -+ struct au_branch *br; -+ struct au_xino_file *xi; ++ struct au_sbinfo *sbinfo; ++ struct dentry *root, *h_root; ++ struct inode *inode, *h_inode; ++ struct au_hinode *hinode; + -+ if (!au_sbi(sb)->si_dbgaufs) -+ return; ++ SiMustWriteLock(sb); + -+ bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ xi = &br->br_xino; -+ if (xi->xi_dbgaufs) { -+ debugfs_remove(xi->xi_dbgaufs); -+ xi->xi_dbgaufs = NULL; -+ } -+ } ++ root = sb->s_root; ++ inode = root->d_inode; ++ sbinfo = au_sbi(sb); ++ bend = sbinfo->si_bend; ++ ++ h_root = au_h_dptr(root, bindex); ++ hinode = au_hi(inode, bindex); ++ h_inode = au_igrab(hinode->hi_inode); ++ au_hiput(hinode); ++ ++ au_sbilist_lock(); ++ au_br_do_del_brp(sbinfo, bindex, bend); ++ au_br_do_del_hdp(au_di(root), bindex, bend); ++ au_br_do_del_hip(au_ii(inode), bindex, bend); ++ au_sbilist_unlock(); ++ ++ dput(h_root); ++ iput(h_inode); ++ au_br_do_free(br); +} + -+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex) ++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount) +{ -+ struct au_sbinfo *sbinfo; -+ struct dentry *parent; ++ int err, rerr, i; ++ unsigned int mnt_flags; ++ aufs_bindex_t bindex, bend, br_id; ++ unsigned char do_wh, verbose; + struct au_branch *br; -+ struct au_xino_file *xi; -+ aufs_bindex_t bend; -+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */ ++ struct au_wbr *wbr; + -+ sbinfo = au_sbi(sb); -+ parent = sbinfo->si_dbgaufs; -+ if (!parent) -+ return; ++ err = 0; ++ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry); ++ if (bindex < 0) { ++ if (remount) ++ goto out; /* success */ ++ err = -ENOENT; ++ pr_err("%s no such branch\n", del->pathname); ++ goto out; ++ } ++ AuDbg("bindex b%d\n", bindex); + ++ err = -EBUSY; ++ mnt_flags = au_mntflags(sb); ++ verbose = !!au_opt_test(mnt_flags, VERBOSE); + bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex); -+ br = au_sbr(sb, bindex); -+ xi = &br->br_xino; -+ AuDebugOn(xi->xi_dbgaufs); -+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent, -+ sbinfo, &dbgaufs_xino_fop); -+ /* ignore an error */ -+ if (unlikely(!xi->xi_dbgaufs)) -+ AuWarn1("failed %s under debugfs\n", name); ++ if (unlikely(!bend)) { ++ AuVerbose(verbose, "no more branches left\n"); ++ goto out; ++ } ++ br = au_sbr(sb, bindex); ++ AuDebugOn(!path_equal(&br->br_path, &del->h_path)); ++ i = atomic_read(&br->br_count); ++ if (unlikely(i)) { ++ AuVerbose(verbose, "%d file(s) opened\n", i); ++ goto out; + } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_EXPORT -+static int dbgaufs_xigen_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; + -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0); -+ si_read_unlock(sb); -+ return err; -+} ++ wbr = br->br_wbr; ++ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph); ++ if (do_wh) { ++ /* instead of WbrWhMustWriteLock(wbr) */ ++ SiMustWriteLock(sb); ++ for (i = 0; i < AuBrWh_Last; i++) { ++ dput(wbr->wbr_wh[i]); ++ wbr->wbr_wh[i] = NULL; ++ } ++ } + -+static const struct file_operations dbgaufs_xigen_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xigen_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; ++ err = test_children_busy(sb->s_root, bindex, verbose); ++ if (unlikely(err)) { ++ if (do_wh) ++ goto out_wh; ++ goto out; ++ } + -+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) -+{ -+ int err; ++ err = 0; ++ br_id = br->br_id; ++ if (!remount) ++ au_br_do_del(sb, bindex, br); ++ else { ++ sysaufs_brs_del(sb, bindex); ++ au_br_do_del(sb, bindex, br); ++ sysaufs_brs_add(sb, bindex); ++ } + -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ ++ if (!bindex) { ++ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1); ++ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes; ++ } else ++ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode); ++ if (au_opt_test(mnt_flags, PLINK)) ++ au_plink_half_refresh(sb, br_id); + -+ err = -EIO; -+ sbinfo->si_dbgaufs_xigen = debugfs_create_file -+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, -+ &dbgaufs_xigen_fop); -+ if (sbinfo->si_dbgaufs_xigen) -+ err = 0; ++ if (au_xino_brid(sb) == br_id) ++ au_xino_brid_set(sb, -1); ++ goto out; /* success */ + ++out_wh: ++ /* revert */ ++ rerr = au_br_init_wh(sb, br, br->br_perm); ++ if (rerr) ++ pr_warn("failed re-creating base whiteout, %s. (%d)\n", ++ del->pathname, rerr); ++out: + return err; +} -+#else -+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) -+{ -+ return 0; -+} -+#endif /* CONFIG_AUFS_EXPORT */ + +/* ---------------------------------------------------------------------- */ + -+void dbgaufs_si_fin(struct au_sbinfo *sbinfo) -+{ -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ -+ -+ debugfs_remove_recursive(sbinfo->si_dbgaufs); -+ sbinfo->si_dbgaufs = NULL; -+ kobject_put(&sbinfo->si_kobj); -+} -+ -+int dbgaufs_si_init(struct au_sbinfo *sbinfo) ++static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg) +{ + int err; -+ char name[SysaufsSiNameLen]; ++ aufs_bindex_t bstart, bend; ++ struct aufs_ibusy ibusy; ++ struct inode *inode, *h_inode; + -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ ++ err = -EPERM; ++ if (unlikely(!capable(CAP_SYS_ADMIN))) ++ goto out; + -+ err = -ENOENT; -+ if (!dbgaufs) { -+ AuErr1("/debug/aufs is uninitialized\n"); ++ err = copy_from_user(&ibusy, arg, sizeof(ibusy)); ++ if (!err) ++ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino)); ++ if (unlikely(err)) { ++ err = -EFAULT; ++ AuTraceErr(err); + goto out; + } + -+ err = -EIO; -+ sysaufs_name(sbinfo, name); -+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs); -+ if (unlikely(!sbinfo->si_dbgaufs)) -+ goto out; -+ kobject_get(&sbinfo->si_kobj); ++ err = -EINVAL; ++ si_read_lock(sb, AuLock_FLUSH); ++ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb))) ++ goto out_unlock; + -+ sbinfo->si_dbgaufs_xib = debugfs_create_file -+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, -+ &dbgaufs_xib_fop); -+ if (unlikely(!sbinfo->si_dbgaufs_xib)) -+ goto out_dir; ++ err = 0; ++ ibusy.h_ino = 0; /* invalid */ ++ inode = ilookup(sb, ibusy.ino); ++ if (!inode ++ || inode->i_ino == AUFS_ROOT_INO ++ || is_bad_inode(inode)) ++ goto out_unlock; + -+ err = dbgaufs_xigen_init(sbinfo); -+ if (!err) -+ goto out; /* success */ ++ ii_read_lock_child(inode); ++ bstart = au_ibstart(inode); ++ bend = au_ibend(inode); ++ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) { ++ h_inode = au_h_iptr(inode, ibusy.bindex); ++ if (h_inode && au_test_ibusy(inode, bstart, bend)) ++ ibusy.h_ino = h_inode->i_ino; ++ } ++ ii_read_unlock(inode); ++ iput(inode); + -+out_dir: -+ dbgaufs_si_fin(sbinfo); ++out_unlock: ++ si_read_unlock(sb); ++ if (!err) { ++ err = __put_user(ibusy.h_ino, &arg->h_ino); ++ if (unlikely(err)) { ++ err = -EFAULT; ++ AuTraceErr(err); ++ } ++ } +out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+void dbgaufs_fin(void) ++long au_ibusy_ioctl(struct file *file, unsigned long arg) +{ -+ debugfs_remove(dbgaufs); ++ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg); +} + -+int __init dbgaufs_init(void) ++#ifdef CONFIG_COMPAT ++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg) +{ -+ int err; -+ -+ err = -EIO; -+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL); -+ if (dbgaufs) -+ err = 0; -+ return err; ++ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg)); +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dbgaufs.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * debugfs interface -+ */ -+ -+#ifndef __DBGAUFS_H__ -+#define __DBGAUFS_H__ -+ -+#ifdef __KERNEL__ -+ -+struct super_block; -+struct au_sbinfo; -+ -+#ifdef CONFIG_DEBUG_FS -+/* dbgaufs.c */ -+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex); -+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex); -+void dbgaufs_si_fin(struct au_sbinfo *sbinfo); -+int dbgaufs_si_init(struct au_sbinfo *sbinfo); -+void dbgaufs_fin(void); -+int __init dbgaufs_init(void); -+#else -+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex) -+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex) -+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo) -+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo) -+AuStubVoid(dbgaufs_fin, void) -+AuStubInt0(__init dbgaufs_init, void) -+#endif /* CONFIG_DEBUG_FS */ ++#endif + -+#endif /* __KERNEL__ */ -+#endif /* __DBGAUFS_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dcsub.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,243 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ ++/* ---------------------------------------------------------------------- */ + +/* -+ * sub-routines for dentry cache ++ * change a branch permission + */ + -+#include "aufs.h" ++static void au_warn_ima(void) ++{ ++#ifdef CONFIG_IMA ++ /* since it doesn't support mark_files_ro() */ ++ AuWarn1("RW -> RO makes IMA to produce wrong message\n"); ++#endif ++} + -+static void au_dpage_free(struct au_dpage *dpage) ++static int do_need_sigen_inc(int a, int b) +{ -+ int i; -+ struct dentry **p; ++ return au_br_whable(a) && !au_br_whable(b); ++} + -+ p = dpage->dentries; -+ for (i = 0; i < dpage->ndentry; i++) -+ dput(*p++); -+ free_page((unsigned long)dpage->dentries); ++static int need_sigen_inc(int old, int new) ++{ ++ return do_need_sigen_inc(old, new) ++ || do_need_sigen_inc(new, old); +} + -+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp) ++static unsigned long long au_farray_cb(void *a, ++ unsigned long long max __maybe_unused, ++ void *arg) +{ -+ int err; -+ void *p; -+ -+ err = -ENOMEM; -+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp); -+ if (unlikely(!dpages->dpages)) -+ goto out; ++ unsigned long long n; ++ struct file **p, *f; ++ struct super_block *sb = arg; + -+ p = (void *)__get_free_page(gfp); -+ if (unlikely(!p)) -+ goto out_dpages; ++ n = 0; ++ p = a; ++ lg_global_lock(&files_lglock); ++ do_file_list_for_each_entry(sb, f) { ++ if (au_fi(f) ++ && file_count(f) ++ && !special_file(file_inode(f)->i_mode)) { ++ get_file(f); ++ *p++ = f; ++ n++; ++ AuDebugOn(n > max); ++ } ++ } while_file_list_for_each_entry; ++ lg_global_unlock(&files_lglock); + -+ dpages->dpages[0].ndentry = 0; -+ dpages->dpages[0].dentries = p; -+ dpages->ndpage = 1; -+ return 0; /* success */ ++ return n; ++} + -+out_dpages: -+ kfree(dpages->dpages); -+out: -+ return err; ++static struct file **au_farray_alloc(struct super_block *sb, ++ unsigned long long *max) ++{ ++ *max = atomic_long_read(&au_sbi(sb)->si_nfiles); ++ return au_array_alloc(max, au_farray_cb, sb); +} + -+void au_dpages_free(struct au_dcsub_pages *dpages) ++static void au_farray_free(struct file **a, unsigned long long max) +{ -+ int i; -+ struct au_dpage *p; ++ unsigned long long ull; + -+ p = dpages->dpages; -+ for (i = 0; i < dpages->ndpage; i++) -+ au_dpage_free(p++); -+ kfree(dpages->dpages); ++ for (ull = 0; ull < max; ull++) ++ if (a[ull]) ++ fput(a[ull]); ++ au_array_free(a); +} + -+static int au_dpages_append(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, gfp_t gfp) ++static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex) +{ -+ int err, sz; -+ struct au_dpage *dpage; -+ void *p; ++ int err, do_warn; ++ unsigned int mnt_flags; ++ unsigned long long ull, max; ++ aufs_bindex_t br_id; ++ unsigned char verbose; ++ struct file *file, *hf, **array; ++ struct inode *inode; ++ struct au_hfile *hfile; + -+ dpage = dpages->dpages + dpages->ndpage - 1; -+ sz = PAGE_SIZE / sizeof(dentry); -+ if (unlikely(dpage->ndentry >= sz)) { -+ AuLabel(new dpage); -+ err = -ENOMEM; -+ sz = dpages->ndpage * sizeof(*dpages->dpages); -+ p = au_kzrealloc(dpages->dpages, sz, -+ sz + sizeof(*dpages->dpages), gfp); -+ if (unlikely(!p)) -+ goto out; ++ mnt_flags = au_mntflags(sb); ++ verbose = !!au_opt_test(mnt_flags, VERBOSE); + -+ dpages->dpages = p; -+ dpage = dpages->dpages + dpages->ndpage; -+ p = (void *)__get_free_page(gfp); -+ if (unlikely(!p)) -+ goto out; ++ array = au_farray_alloc(sb, &max); ++ err = PTR_ERR(array); ++ if (IS_ERR(array)) ++ goto out; + -+ dpage->ndentry = 0; -+ dpage->dentries = p; -+ dpages->ndpage++; -+ } ++ do_warn = 0; ++ br_id = au_sbr_id(sb, bindex); ++ for (ull = 0; ull < max; ull++) { ++ file = array[ull]; + -+ AuDebugOn(!dentry->d_count); -+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry); -+ return 0; /* success */ ++ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */ ++ fi_read_lock(file); ++ if (unlikely(au_test_mmapped(file))) { ++ err = -EBUSY; ++ AuVerbose(verbose, "mmapped %.*s\n", ++ AuDLNPair(file->f_dentry)); ++ AuDbgFile(file); ++ FiMustNoWaiters(file); ++ fi_read_unlock(file); ++ goto out_array; ++ } + -+out: -+ return err; -+} ++ inode = file_inode(file); ++ hfile = &au_fi(file)->fi_htop; ++ hf = hfile->hf_file; ++ if (!S_ISREG(inode->i_mode) ++ || !(file->f_mode & FMODE_WRITE) ++ || hfile->hf_br->br_id != br_id ++ || !(hf->f_mode & FMODE_WRITE)) ++ array[ull] = NULL; ++ else { ++ do_warn = 1; ++ get_file(file); ++ } + -+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, -+ au_dpages_test test, void *arg) -+{ -+ int err; -+ struct dentry *this_parent; -+ struct list_head *next; -+ struct super_block *sb = root->d_sb; ++ FiMustNoWaiters(file); ++ fi_read_unlock(file); ++ fput(file); ++ } + + err = 0; -+ write_seqlock(&rename_lock); -+ this_parent = root; -+ spin_lock(&this_parent->d_lock); -+repeat: -+ next = this_parent->d_subdirs.next; -+resume: -+ if (this_parent->d_sb == sb -+ && !IS_ROOT(this_parent) -+ && au_di(this_parent) -+ && this_parent->d_count -+ && (!test || test(this_parent, arg))) { -+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC); -+ if (unlikely(err)) -+ goto out; -+ } ++ if (do_warn) ++ au_warn_ima(); + -+ while (next != &this_parent->d_subdirs) { -+ struct list_head *tmp = next; -+ struct dentry *dentry = list_entry(tmp, struct dentry, -+ d_u.d_child); ++ for (ull = 0; ull < max; ull++) { ++ file = array[ull]; ++ if (!file) ++ continue; + -+ next = tmp->next; -+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); -+ if (dentry->d_count) { -+ if (!list_empty(&dentry->d_subdirs)) { -+ spin_unlock(&this_parent->d_lock); -+ spin_release(&dentry->d_lock.dep_map, 1, -+ _RET_IP_); -+ this_parent = dentry; -+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, -+ _RET_IP_); -+ goto repeat; -+ } -+ if (dentry->d_sb == sb -+ && au_di(dentry) -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, -+ GFP_ATOMIC); ++ /* todo: already flushed? */ ++ /* cf. fs/super.c:mark_files_ro() */ ++ /* fi_read_lock(file); */ ++ hfile = &au_fi(file)->fi_htop; ++ hf = hfile->hf_file; ++ /* fi_read_unlock(file); */ ++ spin_lock(&hf->f_lock); ++ hf->f_mode &= ~FMODE_WRITE; ++ spin_unlock(&hf->f_lock); ++ if (!file_check_writeable(hf)) { ++ __mnt_drop_write(hf->f_path.mnt); ++ file_release_write(hf); + } -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ if (this_parent != root) { -+ struct dentry *tmp; -+ struct dentry *child; -+ -+ tmp = this_parent->d_parent; -+ rcu_read_lock(); -+ spin_unlock(&this_parent->d_lock); -+ child = this_parent; -+ this_parent = tmp; -+ spin_lock(&this_parent->d_lock); -+ rcu_read_unlock(); -+ next = child->d_u.d_child.next; -+ goto resume; + } + ++out_array: ++ au_farray_free(array, max); +out: -+ spin_unlock(&this_parent->d_lock); -+ write_sequnlock(&rename_lock); ++ AuTraceErr(err); + return err; +} + -+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, -+ int do_include, au_dpages_test test, void *arg) ++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, ++ int *do_refresh) +{ -+ int err; -+ -+ err = 0; -+ write_seqlock(&rename_lock); -+ spin_lock(&dentry->d_lock); -+ if (do_include -+ && dentry->d_count -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ goto out; ++ int err, rerr; ++ aufs_bindex_t bindex; ++ struct dentry *root; ++ struct au_branch *br; + -+ /* -+ * vfsmount_lock is unnecessary since this is a traverse in a single -+ * mount -+ */ -+ while (!IS_ROOT(dentry)) { -+ dentry = dentry->d_parent; /* rename_lock is locked */ -+ spin_lock(&dentry->d_lock); -+ if (dentry->d_count -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ break; ++ root = sb->s_root; ++ bindex = au_find_dbindex(root, mod->h_root); ++ if (bindex < 0) { ++ if (remount) ++ return 0; /* success */ ++ err = -ENOENT; ++ pr_err("%s no such branch\n", mod->path); ++ goto out; + } ++ AuDbg("bindex b%d\n", bindex); + -+out: -+ write_sequnlock(&rename_lock); -+ return err; -+} ++ err = test_br(mod->h_root->d_inode, mod->perm, mod->path); ++ if (unlikely(err)) ++ goto out; + -+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg) -+{ -+ return au_di(dentry) && dentry->d_sb == arg; -+} ++ br = au_sbr(sb, bindex); ++ AuDebugOn(mod->h_root != au_br_dentry(br)); ++ if (br->br_perm == mod->perm) ++ return 0; /* success */ + -+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, int do_include) -+{ -+ return au_dcsub_pages_rev(dpages, dentry, do_include, -+ au_dcsub_dpages_aufs, dentry->d_sb); -+} ++ if (au_br_writable(br->br_perm)) { ++ /* remove whiteout base */ ++ err = au_br_init_wh(sb, br, mod->perm); ++ if (unlikely(err)) ++ goto out; + -+int au_test_subdir(struct dentry *d1, struct dentry *d2) -+{ -+ struct path path[2] = { -+ { -+ .dentry = d1 -+ }, -+ { -+ .dentry = d2 ++ if (!au_br_writable(mod->perm)) { ++ /* rw --> ro, file might be mmapped */ ++ DiMustNoWaiters(root); ++ IiMustNoWaiters(root->d_inode); ++ di_write_unlock(root); ++ err = au_br_mod_files_ro(sb, bindex); ++ /* aufs_write_lock() calls ..._child() */ ++ di_write_lock_child(root); ++ ++ if (unlikely(err)) { ++ rerr = -ENOMEM; ++ br->br_wbr = kmalloc(sizeof(*br->br_wbr), ++ GFP_NOFS); ++ if (br->br_wbr) ++ rerr = au_wbr_init(br, sb, br->br_perm); ++ if (unlikely(rerr)) { ++ AuIOErr("nested error %d (%d)\n", ++ rerr, err); ++ br->br_perm = mod->perm; ++ } ++ } + } -+ }; ++ } else if (au_br_writable(mod->perm)) { ++ /* ro --> rw */ ++ err = -ENOMEM; ++ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS); ++ if (br->br_wbr) { ++ err = au_wbr_init(br, sb, mod->perm); ++ if (unlikely(err)) { ++ kfree(br->br_wbr); ++ br->br_wbr = NULL; ++ } ++ } ++ } + -+ return path_is_under(path + 0, path + 1); ++ if (!err) { ++ if ((br->br_perm & AuBrAttr_UNPIN) ++ && !(mod->perm & AuBrAttr_UNPIN)) ++ au_br_dflags_force(br); ++ else if (!(br->br_perm & AuBrAttr_UNPIN) ++ && (mod->perm & AuBrAttr_UNPIN)) ++ au_br_dflags_restore(br); ++ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm); ++ br->br_perm = mod->perm; ++ } ++ ++out: ++ AuTraceErr(err); ++ return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/dcsub.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,94 @@ ++++ linux-3.9/fs/aufs/branch.h 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,255 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -4715,84 +2944,286 @@ + */ + +/* -+ * sub-routines for dentry cache ++ * branch filesystems and xino for them + */ + -+#ifndef __AUFS_DCSUB_H__ -+#define __AUFS_DCSUB_H__ ++#ifndef __AUFS_BRANCH_H__ ++#define __AUFS_BRANCH_H__ + +#ifdef __KERNEL__ + -+#include -+#include ++#include ++#include "dynop.h" ++#include "rwsem.h" ++#include "super.h" + -+struct dentry; ++/* ---------------------------------------------------------------------- */ + -+struct au_dpage { -+ int ndentry; -+ struct dentry **dentries; ++/* a xino file */ ++struct au_xino_file { ++ struct file *xi_file; ++ struct mutex xi_nondir_mtx; ++ ++ /* todo: make xino files an array to support huge inode number */ ++ ++#ifdef CONFIG_DEBUG_FS ++ struct dentry *xi_dbgaufs; ++#endif +}; + -+struct au_dcsub_pages { -+ int ndpage; -+ struct au_dpage *dpages; ++/* members for writable branch only */ ++enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last}; ++struct au_wbr { ++ struct au_rwsem wbr_wh_rwsem; ++ struct dentry *wbr_wh[AuBrWh_Last]; ++ atomic_t wbr_wh_running; ++#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */ ++#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */ ++#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */ ++ ++ /* mfs mode */ ++ unsigned long long wbr_bytes; ++}; ++ ++/* ext2 has 3 types of operations at least, ext3 has 4 */ ++#define AuBrDynOp (AuDyLast * 4) ++ ++#ifdef CONFIG_AUFS_HFSNOTIFY ++/* support for asynchronous destruction */ ++struct au_br_hfsnotify { ++ struct fsnotify_group *hfsn_group; ++}; ++#endif ++ ++/* protected by superblock rwsem */ ++struct au_branch { ++ struct au_xino_file br_xino; ++ ++ aufs_bindex_t br_id; ++ ++ int br_perm; ++ unsigned int br_dflags; ++ struct path br_path; ++ spinlock_t br_dykey_lock; ++ struct au_dykey *br_dykey[AuBrDynOp]; ++ atomic_t br_count; ++ ++ struct au_wbr *br_wbr; ++ ++ /* xino truncation */ ++ blkcnt_t br_xino_upper; /* watermark in blocks */ ++ atomic_t br_xino_running; ++ ++#ifdef CONFIG_AUFS_HFSNOTIFY ++ struct au_br_hfsnotify *br_hfsn; ++#endif ++ ++#ifdef CONFIG_SYSFS ++ /* an entry under sysfs per mount-point */ ++ char br_name[8]; ++ struct attribute br_attr; ++#endif +}; + +/* ---------------------------------------------------------------------- */ + -+/* dcsub.c */ -+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp); -+void au_dpages_free(struct au_dcsub_pages *dpages); -+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg); -+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, -+ au_dpages_test test, void *arg); -+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, -+ int do_include, au_dpages_test test, void *arg); -+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, int do_include); -+int au_test_subdir(struct dentry *d1, struct dentry *d2); ++static inline struct vfsmount *au_br_mnt(struct au_branch *br) ++{ ++ return br->br_path.mnt; ++} ++ ++static inline struct dentry *au_br_dentry(struct au_branch *br) ++{ ++ return br->br_path.dentry; ++} ++ ++static inline struct super_block *au_br_sb(struct au_branch *br) ++{ ++ return au_br_mnt(br)->mnt_sb; ++} ++ ++/* branch permissions and attributes */ ++#define AuBrPerm_RW 1 /* writable, hardlinkable wh */ ++#define AuBrPerm_RO (1 << 1) /* readonly */ ++#define AuBrPerm_RR (1 << 2) /* natively readonly */ ++#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR) ++ ++#define AuBrRAttr_WH (1 << 3) /* whiteout-able */ ++ ++#define AuBrWAttr_NoLinkWH (1 << 4) /* un-hardlinkable whiteouts */ ++ ++#define AuBrAttr_UNPIN (1 << 5) /* rename-able top dir of ++ branch */ ++ ++static inline int au_br_writable(int brperm) ++{ ++ return brperm & AuBrPerm_RW; ++} ++ ++static inline int au_br_whable(int brperm) ++{ ++ return brperm & (AuBrPerm_RW | AuBrRAttr_WH); ++} ++ ++static inline int au_br_wh_linkable(int brperm) ++{ ++ return !(brperm & AuBrWAttr_NoLinkWH); ++} ++ ++static inline int au_br_rdonly(struct au_branch *br) ++{ ++ return ((au_br_sb(br)->s_flags & MS_RDONLY) ++ || !au_br_writable(br->br_perm)) ++ ? -EROFS : 0; ++} ++ ++static inline int au_br_hnotifyable(int brperm __maybe_unused) ++{ ++#ifdef CONFIG_AUFS_HNOTIFY ++ return !(brperm & AuBrPerm_RR); ++#else ++ return 0; ++#endif ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* branch.c */ ++struct au_sbinfo; ++void au_br_free(struct au_sbinfo *sinfo); ++int au_br_index(struct super_block *sb, aufs_bindex_t br_id); ++struct au_opt_add; ++int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount); ++struct au_opt_del; ++int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount); ++long au_ibusy_ioctl(struct file *file, unsigned long arg); ++#ifdef CONFIG_COMPAT ++long au_ibusy_compat_ioctl(struct file *file, unsigned long arg); ++#endif ++struct au_opt_mod; ++int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, ++ int *do_refresh); ++ ++/* xino.c */ ++static const loff_t au_loff_max = LLONG_MAX; ++ ++int au_xib_trunc(struct super_block *sb); ++ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size, ++ loff_t *pos); ++ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size, ++ loff_t *pos); ++struct file *au_xino_create2(struct file *base_file, struct file *copy_src); ++struct file *au_xino_create(struct super_block *sb, char *fname, int silent); ++ino_t au_xino_new_ino(struct super_block *sb); ++void au_xino_delete_inode(struct inode *inode, const int unlinked); ++int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ ino_t ino); ++int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ ino_t *ino); ++int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino, ++ struct file *base_file, int do_test); ++int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex); ++ ++struct au_opt_xino; ++int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount); ++void au_xino_clr(struct super_block *sb); ++struct file *au_xino_def(struct super_block *sb); ++int au_xino_path(struct seq_file *seq, struct file *file); + +/* ---------------------------------------------------------------------- */ + -+static inline int au_d_hashed_positive(struct dentry *d) ++/* Superblock to branch */ ++static inline ++aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex) +{ -+ int err; -+ struct inode *inode = d->d_inode; -+ err = 0; -+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink)) -+ err = -ENOENT; -+ return err; ++ return au_sbr(sb, bindex)->br_id; +} + -+static inline int au_d_alive(struct dentry *d) ++static inline ++struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex) +{ -+ int err; -+ struct inode *inode; -+ err = 0; -+ if (!IS_ROOT(d)) -+ err = au_d_hashed_positive(d); -+ else { -+ inode = d->d_inode; -+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink)) -+ err = -ENOENT; -+ } -+ return err; ++ return au_br_mnt(au_sbr(sb, bindex)); +} + -+static inline int au_alive_dir(struct dentry *d) ++static inline ++struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex) +{ -+ int err; -+ err = au_d_alive(d); -+ if (unlikely(err || IS_DEADDIR(d->d_inode))) -+ err = -ENOENT; -+ return err; ++ return au_br_sb(au_sbr(sb, bindex)); ++} ++ ++static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ atomic_dec(&au_sbr(sb, bindex)->br_count); ++} ++ ++static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return au_sbr(sb, bindex)->br_perm; ++} ++ ++static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return au_br_whable(au_sbr_perm(sb, bindex)); +} + ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * wbr_wh_read_lock, wbr_wh_write_lock ++ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock ++ */ ++AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem); ++ ++#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem) ++#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem) ++#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem) ++ +#endif /* __KERNEL__ */ -+#endif /* __AUFS_DCSUB_H__ */ ++#endif /* __AUFS_BRANCH_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/conf.mk 2013-05-13 17:13:17.207129054 +0200 +@@ -0,0 +1,38 @@ ++ ++AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS} ++ ++define AuConf ++ifdef ${1} ++AuConfStr += ${1}=${${1}} ++endif ++endef ++ ++AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \ ++ SBILIST \ ++ HNOTIFY HFSNOTIFY \ ++ EXPORT INO_T_64 \ ++ RDU \ ++ PROC_MAP \ ++ SP_IATTR \ ++ SHWH \ ++ BR_RAMFS \ ++ BR_FUSE POLL \ ++ BR_HFSPLUS \ ++ BDEV_LOOP \ ++ DEBUG MAGIC_SYSRQ ++$(foreach i, ${AuConfAll}, \ ++ $(eval $(call AuConf,CONFIG_AUFS_${i}))) ++ ++AuConfName = ${obj}/conf.str ++${AuConfName}.tmp: FORCE ++ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@ ++${AuConfName}: ${AuConfName}.tmp ++ @diff -q $< $@ > /dev/null 2>&1 || { \ ++ echo ' GEN ' $@; \ ++ cp -p $< $@; \ ++ } ++FORCE: ++clean-files += ${AuConfName} ${AuConfName}.tmp ++${obj}/sysfs.o: ${AuConfName} ++ ++-include ${srctree}/${src}/conf_priv.mk --- /dev/null -+++ linux-3.x-rcN/fs/aufs/debug.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,490 @@ ++++ linux-3.9/fs/aufs/cpup.c 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,1220 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -4811,1789 +3242,1734 @@ + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + -+/* -+ * debug print functions -+ */ -+ -+#include -+#include "aufs.h" -+ -+int aufs_debug; -+MODULE_PARM_DESC(debug, "debug print"); -+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP); -+ -+char *au_plevel = KERN_DEBUG; -+#define dpri(fmt, ...) do { \ -+ if ((au_plevel \ -+ && strcmp(au_plevel, KERN_DEBUG)) \ -+ || au_debug_test()) \ -+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \ -+} while (0) -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_dpri_whlist(struct au_nhash *whlist) -+{ -+ unsigned long ul, n; -+ struct hlist_head *head; -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos; -+ -+ n = whlist->nh_num; -+ head = whlist->nh_head; -+ for (ul = 0; ul < n; ul++) { -+ hlist_for_each_entry(tpos, pos, head, wh_hash) -+ dpri("b%d, %.*s, %d\n", -+ tpos->wh_bindex, -+ tpos->wh_str.len, tpos->wh_str.name, -+ tpos->wh_str.len); -+ head++; -+ } -+} -+ -+void au_dpri_vdir(struct au_vdir *vdir) -+{ -+ unsigned long ul; -+ union au_vdir_deblk_p p; -+ unsigned char *o; -+ -+ if (!vdir || IS_ERR(vdir)) { -+ dpri("err %ld\n", PTR_ERR(vdir)); -+ return; -+ } -+ -+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n", -+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk, -+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version); -+ for (ul = 0; ul < vdir->vd_nblk; ul++) { -+ p.deblk = vdir->vd_deblk[ul]; -+ o = p.deblk; -+ dpri("[%lu]: %p\n", ul, o); -+ } -+} -+ -+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn, -+ struct dentry *wh) -+{ -+ char *n = NULL; -+ int l = 0; -+ -+ if (!inode || IS_ERR(inode)) { -+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode)); -+ return -1; -+ } -+ -+ /* the type of i_blocks depends upon CONFIG_LSF */ -+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long) -+ && sizeof(inode->i_blocks) != sizeof(u64)); -+ if (wh) { -+ n = (void *)wh->d_name.name; -+ l = wh->d_name.len; -+ } -+ -+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu," -+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n", -+ bindex, inode, -+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??", -+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode, -+ i_size_read(inode), (unsigned long long)inode->i_blocks, -+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff, -+ inode->i_mapping ? inode->i_mapping->nrpages : 0, -+ inode->i_state, inode->i_flags, inode->i_version, -+ inode->i_generation, -+ l ? ", wh " : "", l, n); -+ return 0; -+} -+ -+void au_dpri_inode(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ aufs_bindex_t bindex; -+ int err, hn; -+ -+ err = do_pri_inode(-1, inode, -1, NULL); -+ if (err || !au_test_aufs(inode->i_sb)) -+ return; -+ -+ iinfo = au_ii(inode); -+ if (!iinfo) -+ return; -+ dpri("i-1: bstart %d, bend %d, gen %d\n", -+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL)); -+ if (iinfo->ii_bstart < 0) -+ return; -+ hn = 0; -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) { -+ hn = !!au_hn(iinfo->ii_hinode + bindex); -+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn, -+ iinfo->ii_hinode[0 + bindex].hi_whdentry); -+ } -+} -+ -+void au_dpri_dalias(struct inode *inode) -+{ -+ struct dentry *d; -+ struct hlist_node *p; -+ -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, p, &inode->i_dentry, d_alias) -+ au_dpri_dentry(d); -+ spin_unlock(&inode->i_lock); -+} -+ -+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry) -+{ -+ struct dentry *wh = NULL; -+ int hn; -+ -+ if (!dentry || IS_ERR(dentry)) { -+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry)); -+ return -1; -+ } -+ /* do not call dget_parent() here */ -+ /* note: access d_xxx without d_lock */ -+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n", -+ bindex, -+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry), -+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??", -+ dentry->d_count, dentry->d_flags); -+ hn = -1; -+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) { -+ struct au_iinfo *iinfo = au_ii(dentry->d_inode); -+ if (iinfo) { -+ hn = !!au_hn(iinfo->ii_hinode + bindex); -+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry; -+ } -+ } -+ do_pri_inode(bindex, dentry->d_inode, hn, wh); -+ return 0; -+} -+ -+void au_dpri_dentry(struct dentry *dentry) ++/* ++ * copy-up functions, see wbr_policy.c for copy-down ++ */ ++ ++#include ++#include ++#include "aufs.h" ++ ++void au_cpup_attr_flags(struct inode *dst, unsigned int iflags) +{ -+ struct au_dinfo *dinfo; -+ aufs_bindex_t bindex; -+ int err; -+ struct au_hdentry *hdp; ++ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE ++ | S_NOATIME | S_NOCMTIME; + -+ err = do_pri_dentry(-1, dentry); -+ if (err || !au_test_aufs(dentry->d_sb)) -+ return; ++ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags)); + -+ dinfo = au_di(dentry); -+ if (!dinfo) -+ return; -+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n", -+ dinfo->di_bstart, dinfo->di_bend, -+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry)); -+ if (dinfo->di_bstart < 0) -+ return; -+ hdp = dinfo->di_hdentry; -+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++) -+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry); ++ dst->i_flags |= iflags & ~mask; ++ if (au_test_fs_notime(dst->i_sb)) ++ dst->i_flags |= S_NOATIME | S_NOCMTIME; +} + -+static int do_pri_file(aufs_bindex_t bindex, struct file *file) ++void au_cpup_attr_timesizes(struct inode *inode) +{ -+ char a[32]; ++ struct inode *h_inode; + -+ if (!file || IS_ERR(file)) { -+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file)); -+ return -1; -+ } -+ a[0] = 0; -+ if (bindex < 0 -+ && file->f_dentry -+ && au_test_aufs(file->f_dentry->d_sb) -+ && au_fi(file)) -+ snprintf(a, sizeof(a), ", gen %d, mmapped %d", -+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped)); -+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n", -+ bindex, file->f_mode, file->f_flags, (long)file_count(file), -+ file->f_version, file->f_pos, a); -+ if (file->f_dentry) -+ do_pri_dentry(bindex, file->f_dentry); -+ return 0; ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ fsstack_copy_attr_times(inode, h_inode); ++ fsstack_copy_inode_size(inode, h_inode); +} + -+void au_dpri_file(struct file *file) ++void au_cpup_attr_nlink(struct inode *inode, int force) +{ -+ struct au_finfo *finfo; -+ struct au_fidir *fidir; -+ struct au_hfile *hfile; -+ aufs_bindex_t bindex; -+ int err; ++ struct inode *h_inode; ++ struct super_block *sb; ++ aufs_bindex_t bindex, bend; + -+ err = do_pri_file(-1, file); -+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb)) ++ sb = inode->i_sb; ++ bindex = au_ibstart(inode); ++ h_inode = au_h_iptr(inode, bindex); ++ if (!force ++ && !S_ISDIR(h_inode->i_mode) ++ && au_opt_test(au_mntflags(sb), PLINK) ++ && au_plink_test(inode)) + return; + -+ finfo = au_fi(file); -+ if (!finfo) -+ return; -+ if (finfo->fi_btop < 0) -+ return; -+ fidir = finfo->fi_hdir; -+ if (!fidir) -+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file); -+ else -+ for (bindex = finfo->fi_btop; -+ bindex >= 0 && bindex <= fidir->fd_bbot; -+ bindex++) { -+ hfile = fidir->fd_hfile + bindex; -+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL); ++ /* ++ * 0 can happen in revalidating. ++ * h_inode->i_mutex is not held, but it is harmless since once i_nlink ++ * reaches 0, it will never become positive. ++ */ ++ set_nlink(inode, h_inode->i_nlink); ++ ++ /* ++ * fewer nlink makes find(1) noisy, but larger nlink doesn't. ++ * it may includes whplink directory. ++ */ ++ if (S_ISDIR(h_inode->i_mode)) { ++ bend = au_ibend(inode); ++ for (bindex++; bindex <= bend; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (h_inode) ++ au_add_nlink(inode, h_inode); + } ++ } +} + -+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br) ++void au_cpup_attr_changeable(struct inode *inode) +{ -+ struct vfsmount *mnt; -+ struct super_block *sb; -+ -+ if (!br || IS_ERR(br)) -+ goto out; -+ mnt = br->br_mnt; -+ if (!mnt || IS_ERR(mnt)) -+ goto out; -+ sb = mnt->mnt_sb; -+ if (!sb || IS_ERR(sb)) -+ goto out; -+ -+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, " -+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, " -+ "xino %d\n", -+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count), -+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev), -+ sb->s_flags, sb->s_count, -+ atomic_read(&sb->s_active), !!br->br_xino.xi_file); -+ return 0; ++ struct inode *h_inode; + -+out: -+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br)); -+ return -1; ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ inode->i_mode = h_inode->i_mode; ++ inode->i_uid = h_inode->i_uid; ++ inode->i_gid = h_inode->i_gid; ++ au_cpup_attr_timesizes(inode); ++ au_cpup_attr_flags(inode, h_inode->i_flags); +} + -+void au_dpri_sb(struct super_block *sb) ++void au_cpup_igen(struct inode *inode, struct inode *h_inode) +{ -+ struct au_sbinfo *sbinfo; -+ aufs_bindex_t bindex; -+ int err; -+ /* to reuduce stack size */ -+ struct { -+ struct vfsmount mnt; -+ struct au_branch fake; -+ } *a; -+ -+ /* this function can be called from magic sysrq */ -+ a = kzalloc(sizeof(*a), GFP_ATOMIC); -+ if (unlikely(!a)) { -+ dpri("no memory\n"); -+ return; -+ } ++ struct au_iinfo *iinfo = au_ii(inode); + -+ a->mnt.mnt_sb = sb; -+ a->fake.br_perm = 0; -+ a->fake.br_mnt = &a->mnt; -+ a->fake.br_xino.xi_file = NULL; -+ atomic_set(&a->fake.br_count, 0); -+ smp_mb(); /* atomic_set */ -+ err = do_pri_br(-1, &a->fake); -+ kfree(a); -+ dpri("dev 0x%x\n", sb->s_dev); -+ if (err || !au_test_aufs(sb)) -+ return; ++ IiMustWriteLock(inode); + -+ sbinfo = au_sbi(sb); -+ if (!sbinfo) -+ return; -+ dpri("nw %d, gen %u, kobj %d\n", -+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation, -+ atomic_read(&sbinfo->si_kobj.kref.refcount)); -+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++) -+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]); ++ iinfo->ii_higen = h_inode->i_generation; ++ iinfo->ii_hsb1 = h_inode->i_sb; +} + -+/* ---------------------------------------------------------------------- */ -+ -+void au_dbg_sleep_jiffy(int jiffy) ++void au_cpup_attr_all(struct inode *inode, int force) +{ -+ while (jiffy) -+ jiffy = schedule_timeout_uninterruptible(jiffy); -+} ++ struct inode *h_inode; + -+void au_dbg_iattr(struct iattr *ia) -+{ -+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \ -+ dpri(#name "\n") -+ AuBit(MODE); -+ AuBit(UID); -+ AuBit(GID); -+ AuBit(SIZE); -+ AuBit(ATIME); -+ AuBit(MTIME); -+ AuBit(CTIME); -+ AuBit(ATIME_SET); -+ AuBit(MTIME_SET); -+ AuBit(FORCE); -+ AuBit(ATTR_FLAG); -+ AuBit(KILL_SUID); -+ AuBit(KILL_SGID); -+ AuBit(FILE); -+ AuBit(KILL_PRIV); -+ AuBit(OPEN); -+ AuBit(TIMES_SET); -+#undef AuBit -+ dpri("ia_file %p\n", ia->ia_file); ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ au_cpup_attr_changeable(inode); ++ if (inode->i_nlink > 0) ++ au_cpup_attr_nlink(inode, force); ++ inode->i_rdev = h_inode->i_rdev; ++ inode->i_blkbits = h_inode->i_blkbits; ++ au_cpup_igen(inode, h_inode); +} + +/* ---------------------------------------------------------------------- */ + -+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line) -+{ -+ struct inode *h_inode, *inode = dentry->d_inode; -+ struct dentry *h_dentry; -+ aufs_bindex_t bindex, bend, bi; -+ -+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */) -+ return; ++/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */ + -+ bend = au_dbend(dentry); -+ bi = au_ibend(inode); -+ if (bi < bend) -+ bend = bi; -+ bindex = au_dbstart(dentry); -+ bi = au_ibstart(inode); -+ if (bi > bindex) -+ bindex = bi; ++/* keep the timestamps of the parent dir when cpup */ ++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, ++ struct path *h_path) ++{ ++ struct inode *h_inode; + -+ for (; bindex <= bend; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ h_inode = au_h_iptr(inode, bindex); -+ if (unlikely(h_inode != h_dentry->d_inode)) { -+ int old = au_debug_test(); -+ if (!old) -+ au_debug(1); -+ AuDbg("b%d, %s:%d\n", bindex, func, line); -+ AuDbgDentry(dentry); -+ AuDbgInode(inode); -+ if (!old) -+ au_debug(0); -+ BUG(); -+ } -+ } ++ dt->dt_dentry = dentry; ++ dt->dt_h_path = *h_path; ++ h_inode = h_path->dentry->d_inode; ++ dt->dt_atime = h_inode->i_atime; ++ dt->dt_mtime = h_inode->i_mtime; ++ /* smp_mb(); */ +} + -+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen) ++void au_dtime_revert(struct au_dtime *dt) +{ -+ struct dentry *parent; ++ struct iattr attr; ++ int err; + -+ parent = dget_parent(dentry); -+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); -+ AuDebugOn(IS_ROOT(dentry)); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ dput(parent); ++ attr.ia_atime = dt->dt_atime; ++ attr.ia_mtime = dt->dt_mtime; ++ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET ++ | ATTR_ATIME | ATTR_ATIME_SET; ++ ++ err = vfsub_notify_change(&dt->dt_h_path, &attr); ++ if (unlikely(err)) ++ pr_warn("restoring timestamps failed(%d). ignored\n", err); +} + -+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen) -+{ -+ struct dentry *parent; -+ struct inode *inode; ++/* ---------------------------------------------------------------------- */ + -+ parent = dget_parent(dentry); -+ inode = dentry->d_inode; -+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode)); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ dput(parent); -+} ++/* internal use only */ ++struct au_cpup_reg_attr { ++ int valid; ++ struct kstat st; ++ unsigned int iflags; /* inode->i_flags */ ++}; + -+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen) ++static noinline_for_stack ++int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src, ++ struct au_cpup_reg_attr *h_src_attr) +{ -+ int err, i, j; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries; ++ int err, sbits; ++ struct iattr ia; ++ struct path h_path; ++ struct inode *h_isrc, *h_idst; ++ struct kstat *h_st; + -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ AuDebugOn(err); -+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1); -+ AuDebugOn(err); -+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ for (j = dpage->ndentry - 1; !err && j >= 0; j--) -+ AuDebugOn(au_digen_test(dentries[j], sigen)); ++ h_path.dentry = au_h_dptr(dst, bindex); ++ h_idst = h_path.dentry->d_inode; ++ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex); ++ h_isrc = h_src->d_inode; ++ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID ++ | ATTR_ATIME | ATTR_MTIME ++ | ATTR_ATIME_SET | ATTR_MTIME_SET; ++ if (h_src_attr && h_src_attr->valid) { ++ h_st = &h_src_attr->st; ++ ia.ia_uid = h_st->uid; ++ ia.ia_gid = h_st->gid; ++ ia.ia_atime = h_st->atime; ++ ia.ia_mtime = h_st->mtime; ++ if (h_idst->i_mode != h_st->mode ++ && !S_ISLNK(h_idst->i_mode)) { ++ ia.ia_valid |= ATTR_MODE; ++ ia.ia_mode = h_st->mode; ++ } ++ sbits = !!(h_st->mode & (S_ISUID | S_ISGID)); ++ au_cpup_attr_flags(h_idst, h_src_attr->iflags); ++ } else { ++ ia.ia_uid = h_isrc->i_uid; ++ ia.ia_gid = h_isrc->i_gid; ++ ia.ia_atime = h_isrc->i_atime; ++ ia.ia_mtime = h_isrc->i_mtime; ++ if (h_idst->i_mode != h_isrc->i_mode ++ && !S_ISLNK(h_idst->i_mode)) { ++ ia.ia_valid |= ATTR_MODE; ++ ia.ia_mode = h_isrc->i_mode; ++ } ++ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID)); ++ au_cpup_attr_flags(h_idst, h_isrc->i_flags); + } -+ au_dpages_free(&dpages); -+} ++ err = vfsub_notify_change(&h_path, &ia); + -+void au_dbg_verify_kthread(void) -+{ -+ if (au_wkq_test()) { -+ au_dbg_blocked(); -+ /* -+ * It may be recursive, but udba=notify between two aufs mounts, -+ * where a single ro branch is shared, is not a problem. -+ */ -+ /* WARN_ON(1); */ ++ /* is this nfs only? */ ++ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) { ++ ia.ia_valid = ATTR_FORCE | ATTR_MODE; ++ ia.ia_mode = h_isrc->i_mode; ++ err = vfsub_notify_change(&h_path, &ia); + } ++ ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused) -+{ -+#ifdef AuForceNoPlink -+ au_opt_clr(sbinfo->si_mntflags, PLINK); -+#endif -+#ifdef AuForceNoXino -+ au_opt_clr(sbinfo->si_mntflags, XINO); -+#endif -+#ifdef AuForceNoRefrof -+ au_opt_clr(sbinfo->si_mntflags, REFROF); -+#endif -+#ifdef AuForceHnotify -+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY); -+#endif -+#ifdef AuForceRd0 -+ sbinfo->si_rdblk = 0; -+ sbinfo->si_rdhash = 0; -+#endif -+} -+ -+int __init au_debug_init(void) ++static int au_do_copy_file(struct file *dst, struct file *src, loff_t len, ++ char *buf, unsigned long blksize) +{ -+ aufs_bindex_t bindex; -+ struct au_vdir_destr destr; ++ int err; ++ size_t sz, rbytes, wbytes; ++ unsigned char all_zero; ++ char *p, *zp; ++ struct mutex *h_mtx; ++ /* reduce stack usage */ ++ struct iattr *ia; + -+ bindex = -1; -+ AuDebugOn(bindex >= 0); ++ zp = page_address(ZERO_PAGE(0)); ++ if (unlikely(!zp)) ++ return -ENOMEM; /* possible? */ + -+ destr.len = -1; -+ AuDebugOn(destr.len < NAME_MAX); ++ err = 0; ++ all_zero = 0; ++ while (len) { ++ AuDbg("len %lld\n", len); ++ sz = blksize; ++ if (len < blksize) ++ sz = len; + -+#ifdef CONFIG_4KSTACKS -+ pr_warn("CONFIG_4KSTACKS is defined.\n"); -+#endif ++ rbytes = 0; ++ /* todo: signal_pending? */ ++ while (!rbytes || err == -EAGAIN || err == -EINTR) { ++ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos); ++ err = rbytes; ++ } ++ if (unlikely(err < 0)) ++ break; + -+#ifdef AuForceNoBrs -+ sysaufs_brs = 0; -+#endif ++ all_zero = 0; ++ if (len >= rbytes && rbytes == blksize) ++ all_zero = !memcmp(buf, zp, rbytes); ++ if (!all_zero) { ++ wbytes = rbytes; ++ p = buf; ++ while (wbytes) { ++ size_t b; + -+ return 0; -+} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/debug.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,242 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ ++ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos); ++ err = b; ++ /* todo: signal_pending? */ ++ if (unlikely(err == -EAGAIN || err == -EINTR)) ++ continue; ++ if (unlikely(err < 0)) ++ break; ++ wbytes -= b; ++ p += b; ++ } ++ } else { ++ loff_t res; + -+/* -+ * debug print functions -+ */ ++ AuLabel(hole); ++ res = vfsub_llseek(dst, rbytes, SEEK_CUR); ++ err = res; ++ if (unlikely(res < 0)) ++ break; ++ } ++ len -= rbytes; ++ err = 0; ++ } + -+#ifndef __AUFS_DEBUG_H__ -+#define __AUFS_DEBUG_H__ ++ /* the last block may be a hole */ ++ if (!err && all_zero) { ++ AuLabel(last hole); + -+#ifdef __KERNEL__ ++ err = 1; ++ if (au_test_nfs(dst->f_dentry->d_sb)) { ++ /* nfs requires this step to make last hole */ ++ /* is this only nfs? */ ++ do { ++ /* todo: signal_pending? */ ++ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos); ++ } while (err == -EAGAIN || err == -EINTR); ++ if (err == 1) ++ dst->f_pos--; ++ } + -+#include -+#include -+#include ++ if (err == 1) { ++ ia = (void *)buf; ++ ia->ia_size = dst->f_pos; ++ ia->ia_valid = ATTR_SIZE | ATTR_FILE; ++ ia->ia_file = dst; ++ h_mtx = &file_inode(dst)->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2); ++ err = vfsub_notify_change(&dst->f_path, ia); ++ mutex_unlock(h_mtx); ++ } ++ } + -+#ifdef CONFIG_AUFS_DEBUG -+#define AuDebugOn(a) BUG_ON(a) ++ return err; ++} + -+/* module parameter */ -+extern int aufs_debug; -+static inline void au_debug(int n) ++int au_copy_file(struct file *dst, struct file *src, loff_t len) +{ -+ aufs_debug = n; -+ smp_mb(); -+} ++ int err; ++ unsigned long blksize; ++ unsigned char do_kfree; ++ char *buf; ++ ++ err = -ENOMEM; ++ blksize = dst->f_dentry->d_sb->s_blocksize; ++ if (!blksize || PAGE_SIZE < blksize) ++ blksize = PAGE_SIZE; ++ AuDbg("blksize %lu\n", blksize); ++ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *)); ++ if (do_kfree) ++ buf = kmalloc(blksize, GFP_NOFS); ++ else ++ buf = (void *)__get_free_page(GFP_NOFS); ++ if (unlikely(!buf)) ++ goto out; ++ ++ if (len > (1 << 22)) ++ AuDbg("copying a large file %lld\n", (long long)len); ++ ++ src->f_pos = 0; ++ dst->f_pos = 0; ++ err = au_do_copy_file(dst, src, len, buf, blksize); ++ if (do_kfree) ++ kfree(buf); ++ else ++ free_page((unsigned long)buf); + -+static inline int au_debug_test(void) -+{ -+ return aufs_debug; ++out: ++ return err; +} -+#else -+#define AuDebugOn(a) do {} while (0) -+AuStubVoid(au_debug, int n) -+AuStubInt0(au_debug_test, void) -+#endif /* CONFIG_AUFS_DEBUG */ -+ -+/* ---------------------------------------------------------------------- */ + -+/* debug print */ ++/* internal use only */ ++struct au_cpup_basic { ++ struct dentry *dentry; ++ aufs_bindex_t bdst, bsrc; ++ loff_t len; ++}; + -+#define AuDbg(fmt, ...) do { \ -+ if (au_debug_test()) \ -+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \ -+} while (0) -+#define AuLabel(l) AuDbg(#l "\n") -+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__) -+#define AuWarn1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ pr_warn(fmt, ##__VA_ARGS__); \ -+} while (0) ++/* ++ * to support a sparse file which is opened with O_APPEND, ++ * we need to close the file. ++ */ ++static int au_cp_regular(struct au_cpup_basic *basic) ++{ ++ int err, i; ++ enum { SRC, DST }; ++ struct { ++ aufs_bindex_t bindex; ++ unsigned int flags; ++ struct dentry *dentry; ++ struct file *file; ++ void *label, *label_file; ++ } *f, file[] = { ++ { ++ .bindex = basic->bsrc, ++ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE, ++ .file = NULL, ++ .label = &&out, ++ .label_file = &&out_src ++ }, ++ { ++ .bindex = basic->bdst, ++ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE, ++ .file = NULL, ++ .label = &&out_src, ++ .label_file = &&out_dst ++ } ++ }; ++ struct super_block *sb; + -+#define AuErr1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ pr_err(fmt, ##__VA_ARGS__); \ -+} while (0) ++ /* bsrc branch can be ro/rw. */ ++ sb = basic->dentry->d_sb; ++ f = file; ++ for (i = 0; i < 2; i++, f++) { ++ f->dentry = au_h_dptr(basic->dentry, f->bindex); ++ f->file = au_h_open(basic->dentry, f->bindex, f->flags, ++ /*file*/NULL); ++ err = PTR_ERR(f->file); ++ if (IS_ERR(f->file)) ++ goto *f->label; ++ err = -EINVAL; ++ if (unlikely(!f->file->f_op)) ++ goto *f->label_file; ++ } + -+#define AuIOErr1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ AuIOErr(fmt, ##__VA_ARGS__); \ -+} while (0) ++ /* try stopping to update while we copyup */ ++ IMustLock(file[SRC].dentry->d_inode); ++ err = au_copy_file(file[DST].file, file[SRC].file, basic->len); + -+#define AuUnsupportMsg "This operation is not supported." \ -+ " Please report this application to aufs-users ML." -+#define AuUnsupport(fmt, ...) do { \ -+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \ -+ dump_stack(); \ -+} while (0) ++out_dst: ++ fput(file[DST].file); ++ au_sbr_put(sb, file[DST].bindex); ++out_src: ++ fput(file[SRC].file); ++ au_sbr_put(sb, file[SRC].bindex); ++out: ++ return err; ++} + -+#define AuTraceErr(e) do { \ -+ if (unlikely((e) < 0)) \ -+ AuDbg("err %d\n", (int)(e)); \ -+} while (0) ++static int au_do_cpup_regular(struct au_cpup_basic *basic, struct au_pin *pin, ++ struct au_cpup_reg_attr *h_src_attr) ++{ ++ int err, rerr; ++ loff_t l; ++ struct path h_path; ++ struct inode *h_src_inode; + -+#define AuTraceErrPtr(p) do { \ -+ if (IS_ERR(p)) \ -+ AuDbg("err %ld\n", PTR_ERR(p)); \ -+} while (0) ++ err = 0; ++ h_src_inode = au_h_iptr(basic->dentry->d_inode, basic->bsrc); ++ l = i_size_read(h_src_inode); ++ if (basic->len == -1 || l < basic->len) ++ basic->len = l; ++ if (basic->len) { ++ /* try stopping to update while we are referencing */ ++ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD); ++ au_pin_hdir_unlock(pin); + -+/* dirty macros for debug print, use with "%.*s" and caution */ -+#define AuLNPair(qstr) (qstr)->len, (qstr)->name -+#define AuDLNPair(d) AuLNPair(&(d)->d_name) ++ h_path.dentry = au_h_dptr(basic->dentry, basic->bsrc); ++ h_path.mnt = au_sbr_mnt(basic->dentry->d_sb, basic->bsrc); ++ h_src_attr->iflags = h_src_inode->i_flags; ++ err = vfs_getattr(&h_path, &h_src_attr->st); ++ if (unlikely(err)) { ++ mutex_unlock(&h_src_inode->i_mutex); ++ goto out; ++ } ++ h_src_attr->valid = 1; ++ err = au_cp_regular(basic); ++ mutex_unlock(&h_src_inode->i_mutex); ++ rerr = au_pin_hdir_relock(pin); ++ if (!err && rerr) ++ err = rerr; ++ } + -+/* ---------------------------------------------------------------------- */ ++out: ++ return err; ++} + -+struct au_sbinfo; -+struct au_finfo; -+struct dentry; -+#ifdef CONFIG_AUFS_DEBUG -+extern char *au_plevel; -+struct au_nhash; -+void au_dpri_whlist(struct au_nhash *whlist); -+struct au_vdir; -+void au_dpri_vdir(struct au_vdir *vdir); -+struct inode; -+void au_dpri_inode(struct inode *inode); -+void au_dpri_dalias(struct inode *inode); -+void au_dpri_dentry(struct dentry *dentry); -+struct file; -+void au_dpri_file(struct file *filp); -+struct super_block; -+void au_dpri_sb(struct super_block *sb); ++static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src, ++ struct inode *h_dir) ++{ ++ int err, symlen; ++ mm_segment_t old_fs; ++ union { ++ char *k; ++ char __user *u; ++ } sym; + -+void au_dbg_sleep_jiffy(int jiffy); -+struct iattr; -+void au_dbg_iattr(struct iattr *ia); ++ err = -ENOSYS; ++ if (unlikely(!h_src->d_inode->i_op->readlink)) ++ goto out; + -+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__) -+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line); -+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen); -+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen); -+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen); -+void au_dbg_verify_kthread(void); ++ err = -ENOMEM; ++ sym.k = (void *)__get_free_page(GFP_NOFS); ++ if (unlikely(!sym.k)) ++ goto out; + -+int __init au_debug_init(void); -+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo); -+#define AuDbgWhlist(w) do { \ -+ AuDbg(#w "\n"); \ -+ au_dpri_whlist(w); \ -+} while (0) ++ /* unnecessary to support mmap_sem since symlink is not mmap-able */ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX); ++ err = symlen; ++ set_fs(old_fs); + -+#define AuDbgVdir(v) do { \ -+ AuDbg(#v "\n"); \ -+ au_dpri_vdir(v); \ -+} while (0) ++ if (symlen > 0) { ++ sym.k[symlen] = 0; ++ err = vfsub_symlink(h_dir, h_path, sym.k); ++ } ++ free_page((unsigned long)sym.k); + -+#define AuDbgInode(i) do { \ -+ AuDbg(#i "\n"); \ -+ au_dpri_inode(i); \ -+} while (0) ++out: ++ return err; ++} + -+#define AuDbgDAlias(i) do { \ -+ AuDbg(#i "\n"); \ -+ au_dpri_dalias(i); \ -+} while (0) ++static noinline_for_stack ++int cpup_entry(struct au_cpup_basic *basic, unsigned int flags, ++ struct dentry *dst_parent, struct au_pin *pin, ++ struct au_cpup_reg_attr *h_src_attr) ++{ ++ int err; ++ umode_t mode; ++ unsigned int mnt_flags; ++ unsigned char isdir; ++ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME); ++ struct au_dtime dt; ++ struct path h_path; ++ struct dentry *h_src, *h_dst, *h_parent; ++ struct inode *h_inode, *h_dir; ++ struct super_block *sb; + -+#define AuDbgDentry(d) do { \ -+ AuDbg(#d "\n"); \ -+ au_dpri_dentry(d); \ -+} while (0) ++ /* bsrc branch can be ro/rw. */ ++ h_src = au_h_dptr(basic->dentry, basic->bsrc); ++ h_inode = h_src->d_inode; ++ AuDebugOn(h_inode != au_h_iptr(basic->dentry->d_inode, basic->bsrc)); + -+#define AuDbgFile(f) do { \ -+ AuDbg(#f "\n"); \ -+ au_dpri_file(f); \ -+} while (0) ++ /* try stopping to be referenced while we are creating */ ++ h_dst = au_h_dptr(basic->dentry, basic->bdst); ++ if (au_ftest_cpup(flags, RENAME)) ++ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX, ++ AUFS_WH_PFX_LEN)); ++ h_parent = h_dst->d_parent; /* dir inode is locked */ ++ h_dir = h_parent->d_inode; ++ IMustLock(h_dir); ++ AuDebugOn(h_parent != h_dst->d_parent); + -+#define AuDbgSb(sb) do { \ -+ AuDbg(#sb "\n"); \ -+ au_dpri_sb(sb); \ -+} while (0) ++ sb = basic->dentry->d_sb; ++ h_path.mnt = au_sbr_mnt(sb, basic->bdst); ++ if (do_dt) { ++ h_path.dentry = h_parent; ++ au_dtime_store(&dt, dst_parent, &h_path); ++ } ++ h_path.dentry = h_dst; + -+#define AuDbgSleep(sec) do { \ -+ AuDbg("sleep %d sec\n", sec); \ -+ ssleep(sec); \ -+} while (0) ++ isdir = 0; ++ mode = h_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR, ++ /*want_excl*/true); ++ if (!err) ++ err = au_do_cpup_regular(basic, pin, h_src_attr); ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ err = vfsub_mkdir(h_dir, &h_path, mode); ++ if (!err) { ++ /* ++ * strange behaviour from the users view, ++ * particularry setattr case ++ */ ++ if (au_ibstart(dst_parent->d_inode) == basic->bdst) ++ au_cpup_attr_nlink(dst_parent->d_inode, ++ /*force*/1); ++ au_cpup_attr_nlink(basic->dentry->d_inode, /*force*/1); ++ } ++ break; ++ case S_IFLNK: ++ err = au_do_cpup_symlink(&h_path, h_src, h_dir); ++ break; ++ case S_IFCHR: ++ case S_IFBLK: ++ AuDebugOn(!capable(CAP_MKNOD)); ++ /*FALLTHROUGH*/ ++ case S_IFIFO: ++ case S_IFSOCK: ++ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev); ++ break; ++ default: ++ AuIOErr("Unknown inode type 0%o\n", mode); ++ err = -EIO; ++ } + -+#define AuDbgSleepJiffy(jiffy) do { \ -+ AuDbg("sleep %d jiffies\n", jiffy); \ -+ au_dbg_sleep_jiffy(jiffy); \ -+} while (0) ++ mnt_flags = au_mntflags(sb); ++ if (!au_opt_test(mnt_flags, UDBA_NONE) ++ && !isdir ++ && au_opt_test(mnt_flags, XINO) ++ && h_inode->i_nlink == 1 ++ /* todo: unnecessary? */ ++ /* && basic->dentry->d_inode->i_nlink == 1 */ ++ && basic->bdst < basic->bsrc ++ && !au_ftest_cpup(flags, KEEPLINO)) ++ au_xino_write(sb, basic->bsrc, h_inode->i_ino, /*ino*/0); ++ /* ignore this error */ + -+#define AuDbgIAttr(ia) do { \ -+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \ -+ au_dbg_iattr(ia); \ -+} while (0) ++ if (do_dt) ++ au_dtime_revert(&dt); ++ return err; ++} + -+#define AuDbgSym(addr) do { \ -+ char sym[KSYM_SYMBOL_LEN]; \ -+ sprint_symbol(sym, (unsigned long)addr); \ -+ AuDbg("%s\n", sym); \ -+} while (0) ++static int au_do_ren_after_cpup(struct dentry *dentry, aufs_bindex_t bdst, ++ struct path *h_path) ++{ ++ int err; ++ struct dentry *h_dentry, *h_parent; ++ struct inode *h_dir; + -+#define AuInfoSym(addr) do { \ -+ char sym[KSYM_SYMBOL_LEN]; \ -+ sprint_symbol(sym, (unsigned long)addr); \ -+ AuInfo("%s\n", sym); \ -+} while (0) -+#else -+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry) -+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen) -+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry, -+ unsigned int sigen) -+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen) -+AuStubVoid(au_dbg_verify_kthread, void) -+AuStubInt0(__init au_debug_init, void) -+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo) ++ h_dentry = dget(au_h_dptr(dentry, bdst)); ++ au_set_h_dptr(dentry, bdst, NULL); ++ err = au_lkup_neg(dentry, bdst, /*wh*/0); ++ if (unlikely(err)) { ++ au_set_h_dptr(dentry, bdst, h_dentry); ++ goto out; ++ } + -+#define AuDbgWhlist(w) do {} while (0) -+#define AuDbgVdir(v) do {} while (0) -+#define AuDbgInode(i) do {} while (0) -+#define AuDbgDAlias(i) do {} while (0) -+#define AuDbgDentry(d) do {} while (0) -+#define AuDbgFile(f) do {} while (0) -+#define AuDbgSb(sb) do {} while (0) -+#define AuDbgSleep(sec) do {} while (0) -+#define AuDbgSleepJiffy(jiffy) do {} while (0) -+#define AuDbgIAttr(ia) do {} while (0) -+#define AuDbgSym(addr) do {} while (0) -+#define AuInfoSym(addr) do {} while (0) -+#endif /* CONFIG_AUFS_DEBUG */ ++ h_path->dentry = dget(au_h_dptr(dentry, bdst)); ++ au_set_h_dptr(dentry, bdst, h_dentry); ++ h_parent = h_dentry->d_parent; /* dir inode is locked */ ++ h_dir = h_parent->d_inode; ++ IMustLock(h_dir); ++ AuDbg("%.*s %.*s\n", AuDLNPair(h_dentry), AuDLNPair(h_path->dentry)); ++ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path); ++ dput(h_path->dentry); + -+/* ---------------------------------------------------------------------- */ ++out: ++ return err; ++} + -+#ifdef CONFIG_AUFS_MAGIC_SYSRQ -+int __init au_sysrq_init(void); -+void au_sysrq_fin(void); ++/* ++ * copyup the @dentry from @bsrc to @bdst. ++ * the caller must set the both of lower dentries. ++ * @len is for truncating when it is -1 copyup the entire file. ++ * in link/rename cases, @dst_parent may be different from the real one. ++ */ ++static int au_cpup_single(struct au_cpup_basic *basic, unsigned int flags, ++ struct dentry *dst_parent, struct au_pin *pin) ++{ ++ int err, rerr; ++ aufs_bindex_t old_ibstart; ++ unsigned char isdir, plink; ++ struct au_dtime dt; ++ struct path h_path; ++ struct dentry *h_src, *h_dst, *h_parent; ++ struct inode *dst_inode, *h_dir, *inode; ++ struct super_block *sb; ++ struct au_branch *br; ++ struct au_cpup_reg_attr h_src_attr = { ++ .valid = 0 ++ }; + -+#ifdef CONFIG_HW_CONSOLE -+#define au_dbg_blocked() do { \ -+ WARN_ON(1); \ -+ handle_sysrq('w'); \ -+} while (0) -+#else -+AuStubVoid(au_dbg_blocked, void) -+#endif ++ AuDebugOn(basic->bsrc <= basic->bdst); + -+#else -+AuStubInt0(__init au_sysrq_init, void) -+AuStubVoid(au_sysrq_fin, void) -+AuStubVoid(au_dbg_blocked, void) -+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */ ++ sb = basic->dentry->d_sb; ++ br = au_sbr(sb, basic->bdst); ++ h_path.mnt = au_br_mnt(br); ++ h_dst = au_h_dptr(basic->dentry, basic->bdst); ++ h_parent = h_dst->d_parent; /* dir inode is locked */ ++ h_dir = h_parent->d_inode; ++ IMustLock(h_dir); + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DEBUG_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dentry.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,1060 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ ++ h_src = au_h_dptr(basic->dentry, basic->bsrc); ++ inode = basic->dentry->d_inode; + -+/* -+ * lookup and dentry operations -+ */ ++ if (!dst_parent) ++ dst_parent = dget_parent(basic->dentry); ++ else ++ dget(dst_parent); + -+#include -+#include "aufs.h" ++ plink = !!au_opt_test(au_mntflags(sb), PLINK); ++ dst_inode = au_h_iptr(inode, basic->bdst); ++ if (dst_inode) { ++ if (unlikely(!plink)) { ++ err = -EIO; ++ AuIOErr("hi%lu(i%lu) exists on b%d " ++ "but plink is disabled\n", ++ dst_inode->i_ino, inode->i_ino, basic->bdst); ++ goto out; ++ } + -+#define AuLkup_ALLOW_NEG 1 -+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name) -+#define au_fset_lkup(flags, name) \ -+ do { (flags) |= AuLkup_##name; } while (0) -+#define au_fclr_lkup(flags, name) \ -+ do { (flags) &= ~AuLkup_##name; } while (0) ++ if (dst_inode->i_nlink) { ++ const int do_dt = au_ftest_cpup(flags, DTIME); + -+struct au_do_lookup_args { -+ unsigned int flags; -+ mode_t type; -+}; ++ h_src = au_plink_lkup(inode, basic->bdst); ++ err = PTR_ERR(h_src); ++ if (IS_ERR(h_src)) ++ goto out; ++ if (unlikely(!h_src->d_inode)) { ++ err = -EIO; ++ AuIOErr("i%lu exists on a upper branch " ++ "but not pseudo-linked\n", ++ inode->i_ino); ++ dput(h_src); ++ goto out; ++ } + -+/* -+ * returns positive/negative dentry, NULL or an error. -+ * NULL means whiteout-ed or not-found. -+ */ -+static struct dentry* -+au_do_lookup(struct dentry *h_parent, struct dentry *dentry, -+ aufs_bindex_t bindex, struct qstr *wh_name, -+ struct au_do_lookup_args *args) -+{ -+ struct dentry *h_dentry; -+ struct inode *h_inode, *inode; -+ struct au_branch *br; -+ int wh_found, opq; -+ unsigned char wh_able; -+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG); ++ if (do_dt) { ++ h_path.dentry = h_parent; ++ au_dtime_store(&dt, dst_parent, &h_path); ++ } + -+ wh_found = 0; -+ br = au_sbr(dentry->d_sb, bindex); -+ wh_able = !!au_br_whable(br->br_perm); -+ if (wh_able) -+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0); -+ h_dentry = ERR_PTR(wh_found); -+ if (!wh_found) -+ goto real_lookup; -+ if (unlikely(wh_found < 0)) -+ goto out; ++ h_path.dentry = h_dst; ++ err = vfsub_link(h_src, h_dir, &h_path); ++ if (!err && au_ftest_cpup(flags, RENAME)) ++ err = au_do_ren_after_cpup ++ (basic->dentry, basic->bdst, &h_path); ++ if (do_dt) ++ au_dtime_revert(&dt); ++ dput(h_src); ++ goto out; ++ } else ++ /* todo: cpup_wh_file? */ ++ /* udba work */ ++ au_update_ibrange(inode, /*do_put_zero*/1); ++ } + -+ /* We found a whiteout */ -+ /* au_set_dbend(dentry, bindex); */ -+ au_set_dbwh(dentry, bindex); -+ if (!allow_neg) -+ return NULL; /* success */ ++ isdir = S_ISDIR(inode->i_mode); ++ old_ibstart = au_ibstart(inode); ++ err = cpup_entry(basic, flags, dst_parent, pin, &h_src_attr); ++ if (unlikely(err)) ++ goto out_rev; ++ dst_inode = h_dst->d_inode; ++ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2); ++ /* todo: necessary? */ ++ /* au_pin_hdir_unlock(pin); */ + -+real_lookup: -+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent); -+ if (IS_ERR(h_dentry)) -+ goto out; ++ err = cpup_iattr(basic->dentry, basic->bdst, h_src, &h_src_attr); ++ if (unlikely(err)) { ++ /* todo: necessary? */ ++ /* au_pin_hdir_relock(pin); */ /* ignore an error */ ++ mutex_unlock(&dst_inode->i_mutex); ++ goto out_rev; ++ } + -+ h_inode = h_dentry->d_inode; -+ if (!h_inode) { -+ if (!allow_neg) -+ goto out_neg; -+ } else if (wh_found -+ || (args->type && args->type != (h_inode->i_mode & S_IFMT))) -+ goto out_neg; ++ if (basic->bdst < old_ibstart) { ++ if (S_ISREG(inode->i_mode)) { ++ err = au_dy_iaop(inode, basic->bdst, dst_inode); ++ if (unlikely(err)) { ++ /* au_pin_hdir_relock(pin); ignore an error */ ++ mutex_unlock(&dst_inode->i_mutex); ++ goto out_rev; ++ } ++ } ++ au_set_ibstart(inode, basic->bdst); ++ } ++ au_set_h_iptr(inode, basic->bdst, au_igrab(dst_inode), ++ au_hi_flags(inode, isdir)); + -+ if (au_dbend(dentry) <= bindex) -+ au_set_dbend(dentry, bindex); -+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) -+ au_set_dbstart(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, h_dentry); ++ /* todo: necessary? */ ++ /* err = au_pin_hdir_relock(pin); */ ++ mutex_unlock(&dst_inode->i_mutex); ++ if (unlikely(err)) ++ goto out_rev; + -+ inode = dentry->d_inode; -+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able -+ || (inode && !S_ISDIR(inode->i_mode))) -+ goto out; /* success */ ++ if (!isdir ++ && h_src->d_inode->i_nlink > 1 ++ && plink) ++ au_plink_append(inode, basic->bdst, h_dst); + -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ opq = au_diropq_test(h_dentry, br); -+ mutex_unlock(&h_inode->i_mutex); -+ if (opq > 0) -+ au_set_dbdiropq(dentry, bindex); -+ else if (unlikely(opq < 0)) { -+ au_set_h_dptr(dentry, bindex, NULL); -+ h_dentry = ERR_PTR(opq); ++ if (au_ftest_cpup(flags, RENAME)) { ++ h_path.dentry = h_dst; ++ err = au_do_ren_after_cpup(basic->dentry, basic->bdst, &h_path); + } -+ goto out; ++ if (!err) ++ goto out; /* success */ + -+out_neg: -+ dput(h_dentry); -+ h_dentry = NULL; ++ /* revert */ ++out_rev: ++ h_path.dentry = h_parent; ++ au_dtime_store(&dt, dst_parent, &h_path); ++ h_path.dentry = h_dst; ++ rerr = 0; ++ if (h_dst->d_inode) { ++ if (!isdir) ++ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ else ++ rerr = vfsub_rmdir(h_dir, &h_path); ++ } ++ au_dtime_revert(&dt); ++ if (rerr) { ++ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr); ++ err = -EIO; ++ } +out: -+ return h_dentry; ++ dput(dst_parent); ++ return err; +} + -+static int au_test_shwh(struct super_block *sb, const struct qstr *name) ++struct au_cpup_single_args { ++ int *errp; ++ struct au_cpup_basic *basic; ++ unsigned int flags; ++ struct dentry *dst_parent; ++ struct au_pin *pin; ++}; ++ ++static void au_call_cpup_single(void *args) +{ -+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH) -+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) -+ return -EPERM; -+ return 0; ++ struct au_cpup_single_args *a = args; ++ ++ au_pin_hdir_acquire_nest(a->pin); ++ *a->errp = au_cpup_single(a->basic, a->flags, a->dst_parent, a->pin); ++ au_pin_hdir_release(a->pin); +} + +/* -+ * returns the number of lower positive dentries, -+ * otherwise an error. -+ * can be called at unlinking with @type is zero. ++ * prevent SIGXFSZ in copy-up. ++ * testing CAP_MKNOD is for generic fs, ++ * but CAP_FSETID is for xfs only, currently. + */ -+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type) ++static int au_cpup_sio_test(struct super_block *sb, umode_t mode) +{ -+ int npositive, err; -+ aufs_bindex_t bindex, btail, bdiropq; -+ unsigned char isdir; -+ struct qstr whname; -+ struct au_do_lookup_args args = { -+ .flags = 0, -+ .type = type -+ }; -+ const struct qstr *name = &dentry->d_name; -+ struct dentry *parent; -+ struct inode *inode; ++ int do_sio; + -+ err = au_test_shwh(dentry->d_sb, name); -+ if (unlikely(err)) -+ goto out; ++ do_sio = 0; ++ if (!au_wkq_test() ++ && (!au_sbi(sb)->si_plink_maint_pid ++ || au_plink_maint(sb, AuLock_NOPLM))) { ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ /* no condition about RLIMIT_FSIZE and the file size */ ++ do_sio = 1; ++ break; ++ case S_IFCHR: ++ case S_IFBLK: ++ do_sio = !capable(CAP_MKNOD); ++ break; ++ } ++ if (!do_sio) ++ do_sio = ((mode & (S_ISUID | S_ISGID)) ++ && !capable(CAP_FSETID)); ++ } + -+ err = au_wh_name_alloc(&whname, name); -+ if (unlikely(err)) -+ goto out; ++ return do_sio; ++} + -+ inode = dentry->d_inode; -+ isdir = !!(inode && S_ISDIR(inode->i_mode)); -+ if (!type) -+ au_fset_lkup(args.flags, ALLOW_NEG); ++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len, unsigned int flags, ++ struct dentry *dst_parent, struct au_pin *pin) ++{ ++ int err, wkq_err; ++ struct dentry *h_dentry; ++ struct au_cpup_basic basic = { ++ .dentry = dentry, ++ .bdst = bdst, ++ .bsrc = bsrc, ++ .len = len ++ }; + -+ npositive = 0; -+ parent = dget_parent(dentry); -+ btail = au_dbtaildir(parent); -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ struct dentry *h_parent, *h_dentry; -+ struct inode *h_inode, *h_dir; ++ h_dentry = au_h_dptr(dentry, bsrc); ++ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode)) ++ err = au_cpup_single(&basic, flags, dst_parent, pin); ++ else { ++ struct au_cpup_single_args args = { ++ .errp = &err, ++ .basic = &basic, ++ .flags = flags, ++ .dst_parent = dst_parent, ++ .pin = pin ++ }; ++ wkq_err = au_wkq_wait(au_call_cpup_single, &args); ++ if (unlikely(wkq_err)) ++ err = wkq_err; ++ } + -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) { -+ if (h_dentry->d_inode) -+ npositive++; -+ if (type != S_IFDIR) -+ break; -+ continue; -+ } -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent) -+ continue; -+ h_dir = h_parent->d_inode; -+ if (!h_dir || !S_ISDIR(h_dir->i_mode)) -+ continue; ++ return err; ++} + -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname, -+ &args); -+ mutex_unlock(&h_dir->i_mutex); -+ err = PTR_ERR(h_dentry); -+ if (IS_ERR(h_dentry)) -+ goto out_parent; -+ au_fclr_lkup(args.flags, ALLOW_NEG); ++/* ++ * copyup the @dentry from the first active lower branch to @bdst, ++ * using au_cpup_single(). ++ */ ++static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags, struct au_pin *pin) ++{ ++ int err; ++ aufs_bindex_t bsrc, bend; ++ struct dentry *h_dentry; + -+ if (au_dbwh(dentry) >= 0) -+ break; -+ if (!h_dentry) -+ continue; -+ h_inode = h_dentry->d_inode; -+ if (!h_inode) -+ continue; -+ npositive++; -+ if (!args.type) -+ args.type = h_inode->i_mode & S_IFMT; -+ if (args.type != S_IFDIR) ++ DiMustWriteLock(dentry); ++ bend = au_dbend(dentry); ++ for (bsrc = bdst + 1; bsrc <= bend; bsrc++) { ++ h_dentry = au_h_dptr(dentry, bsrc); ++ if (h_dentry) { ++ AuDebugOn(!h_dentry->d_inode); + break; -+ else if (isdir) { -+ /* the type of lower may be different */ -+ bdiropq = au_dbdiropq(dentry); -+ if (bdiropq >= 0 && bdiropq <= bindex) -+ break; + } + } ++ AuDebugOn(bsrc > bend); + -+ if (npositive) { -+ AuLabel(positive); -+ au_update_dbstart(dentry); -+ } -+ err = npositive; -+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE) -+ && au_dbstart(dentry) < 0)) { -+ err = -EIO; -+ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n", -+ AuDLNPair(dentry), err); ++ err = au_lkup_neg(dentry, bdst, /*wh*/1); ++ if (!err) { ++ struct au_cpup_basic basic = { ++ .dentry = dentry, ++ .bdst = bdst, ++ .bsrc = bsrc, ++ .len = len ++ }; ++ err = au_cpup_single(&basic, flags | AuCpup_RENAME, NULL, pin); ++ if (!err) ++ return 0; /* success */ ++ ++ /* revert */ ++ au_set_h_dptr(dentry, bdst, NULL); ++ au_set_dbstart(dentry, bsrc); + } + -+out_parent: -+ dput(parent); -+ kfree(whname.name); -+out: + return err; +} + -+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent, -+ struct au_branch *br) -+{ ++struct au_cpup_simple_args { ++ int *errp; + struct dentry *dentry; -+ int wkq_err; ++ aufs_bindex_t bdst; ++ loff_t len; ++ unsigned int flags; ++ struct au_pin *pin; ++}; + -+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC)) -+ dentry = vfsub_lkup_one(name, parent); ++static void au_call_cpup_simple(void *args) ++{ ++ struct au_cpup_simple_args *a = args; ++ ++ au_pin_hdir_acquire_nest(a->pin); ++ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags, a->pin); ++ au_pin_hdir_release(a->pin); ++} ++ ++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags, struct au_pin *pin) ++{ ++ int err, wkq_err; ++ struct dentry *parent; ++ struct inode *h_dir; ++ ++ parent = dget_parent(dentry); ++ h_dir = au_h_iptr(parent->d_inode, bdst); ++ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE) ++ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode)) ++ err = au_cpup_simple(dentry, bdst, len, flags, pin); + else { -+ struct vfsub_lkup_one_args args = { -+ .errp = &dentry, -+ .name = name, -+ .parent = parent ++ struct au_cpup_simple_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .bdst = bdst, ++ .len = len, ++ .flags = flags, ++ .pin = pin + }; ++ wkq_err = au_wkq_wait(au_call_cpup_simple, &args); ++ if (unlikely(wkq_err)) ++ err = wkq_err; ++ } ++ ++ dput(parent); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * copyup the deleted file for writing. ++ */ ++static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, ++ struct dentry *wh_dentry, struct file *file, ++ loff_t len, struct au_pin *pin) ++{ ++ int err; ++ struct au_cpup_basic basic = { ++ .dentry = dentry, ++ .bdst = bdst, ++ .bsrc = -1, ++ .len = len ++ }; ++ struct au_dinfo *dinfo; ++ struct dentry *h_d_dst, *h_d_start; ++ struct au_hdentry *hdp; + -+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args); -+ if (unlikely(wkq_err)) -+ dentry = ERR_PTR(wkq_err); ++ dinfo = au_di(dentry); ++ AuRwMustWriteLock(&dinfo->di_rwsem); ++ ++ basic.bsrc = dinfo->di_bstart; ++ hdp = dinfo->di_hdentry; ++ h_d_dst = hdp[0 + bdst].hd_dentry; ++ dinfo->di_bstart = bdst; ++ hdp[0 + bdst].hd_dentry = wh_dentry; ++ h_d_start = NULL; ++ if (file) { ++ h_d_start = hdp[0 + basic.bsrc].hd_dentry; ++ hdp[0 + basic.bsrc].hd_dentry = au_hf_top(file)->f_dentry; ++ } ++ err = au_cpup_single(&basic, !AuCpup_DTIME, /*h_parent*/NULL, pin); ++ if (file) { ++ if (!err) ++ err = au_reopen_nondir(file); ++ hdp[0 + basic.bsrc].hd_dentry = h_d_start; + } ++ hdp[0 + bdst].hd_dentry = h_d_dst; ++ dinfo->di_bstart = basic.bsrc; + -+ return dentry; ++ return err; +} + -+/* -+ * lookup @dentry on @bindex which should be negative. -+ */ -+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex) ++static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ struct file *file, struct au_pin *pin) +{ + int err; -+ struct dentry *parent, *h_parent, *h_dentry; ++ struct au_dtime dt; ++ struct dentry *parent, *h_parent, *wh_dentry; ++ struct au_branch *br; ++ struct path h_path; + ++ br = au_sbr(dentry->d_sb, bdst); + parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bindex); -+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent, -+ au_sbr(dentry->d_sb, bindex)); -+ err = PTR_ERR(h_dentry); -+ if (IS_ERR(h_dentry)) ++ h_parent = au_h_dptr(parent, bdst); ++ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) + goto out; -+ if (unlikely(h_dentry->d_inode)) { ++ ++ h_path.dentry = h_parent; ++ h_path.mnt = au_br_mnt(br); ++ au_dtime_store(&dt, parent, &h_path); ++ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len, pin); ++ if (unlikely(err)) ++ goto out_wh; ++ ++ dget(wh_dentry); ++ h_path.dentry = wh_dentry; ++ if (!S_ISDIR(wh_dentry->d_inode->i_mode)) ++ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0); ++ else ++ err = vfsub_rmdir(h_parent->d_inode, &h_path); ++ if (unlikely(err)) { ++ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n", ++ AuDLNPair(wh_dentry), err); + err = -EIO; -+ AuIOErr("%.*s should be negative on b%d.\n", -+ AuDLNPair(h_dentry), bindex); -+ dput(h_dentry); -+ goto out; + } ++ au_dtime_revert(&dt); ++ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry); + -+ err = 0; -+ if (bindex < au_dbstart(dentry)) -+ au_set_dbstart(dentry, bindex); -+ if (au_dbend(dentry) < bindex) -+ au_set_dbend(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, h_dentry); -+ ++out_wh: ++ dput(wh_dentry); +out: + dput(parent); + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+/* subset of struct inode */ -+struct au_iattr { -+ unsigned long i_ino; -+ /* unsigned int i_nlink; */ -+ kuid_t i_uid; -+ kgid_t i_gid; -+ u64 i_version; -+/* -+ loff_t i_size; -+ blkcnt_t i_blocks; -+*/ -+ umode_t i_mode; ++struct au_cpup_wh_args { ++ int *errp; ++ struct dentry *dentry; ++ aufs_bindex_t bdst; ++ loff_t len; ++ struct file *file; ++ struct au_pin *pin; +}; + -+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode) ++static void au_call_cpup_wh(void *args) +{ -+ ia->i_ino = h_inode->i_ino; -+ /* ia->i_nlink = h_inode->i_nlink; */ -+ ia->i_uid = h_inode->i_uid; -+ ia->i_gid = h_inode->i_gid; -+ ia->i_version = h_inode->i_version; -+/* -+ ia->i_size = h_inode->i_size; -+ ia->i_blocks = h_inode->i_blocks; -+*/ -+ ia->i_mode = (h_inode->i_mode & S_IFMT); ++ struct au_cpup_wh_args *a = args; ++ ++ au_pin_hdir_acquire_nest(a->pin); ++ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file, a->pin); ++ au_pin_hdir_release(a->pin); +} + -+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode) ++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ struct file *file, struct au_pin *pin) +{ -+ return ia->i_ino != h_inode->i_ino -+ /* || ia->i_nlink != h_inode->i_nlink */ -+ || !uid_eq(ia->i_uid, h_inode->i_uid) -+ || !gid_eq(ia->i_gid, h_inode->i_gid) -+ || ia->i_version != h_inode->i_version -+/* -+ || ia->i_size != h_inode->i_size -+ || ia->i_blocks != h_inode->i_blocks -+*/ -+ || ia->i_mode != (h_inode->i_mode & S_IFMT); ++ int err, wkq_err; ++ struct dentry *parent, *h_orph, *h_parent, *h_dentry; ++ struct inode *dir, *h_dir, *h_tmpdir; ++ struct au_wbr *wbr; ++ struct au_pin wh_pin; ++ ++ parent = dget_parent(dentry); ++ dir = parent->d_inode; ++ h_orph = NULL; ++ h_parent = NULL; ++ h_dir = au_igrab(au_h_iptr(dir, bdst)); ++ h_tmpdir = h_dir; ++ if (!h_dir->i_nlink) { ++ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr; ++ h_orph = wbr->wbr_orph; ++ ++ h_parent = dget(au_h_dptr(parent, bdst)); ++ au_set_h_dptr(parent, bdst, dget(h_orph)); ++ h_tmpdir = h_orph->d_inode; ++ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0); ++ ++ if (file) ++ h_dentry = au_hf_top(file)->f_dentry; ++ else ++ h_dentry = au_h_dptr(dentry, au_dbstart(dentry)); ++ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3); ++ /* todo: au_h_open_pre()? */ ++ ++ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT, ++ AuLsc_I_PARENT3, pin->udba, AuPin_DI_LOCKED); ++ pin = &wh_pin; ++ } ++ ++ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE) ++ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode)) ++ err = au_cpup_wh(dentry, bdst, len, file, pin); ++ else { ++ struct au_cpup_wh_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .bdst = bdst, ++ .len = len, ++ .file = file, ++ .pin = pin ++ }; ++ wkq_err = au_wkq_wait(au_call_cpup_wh, &args); ++ if (unlikely(wkq_err)) ++ err = wkq_err; ++ } ++ ++ if (h_orph) { ++ mutex_unlock(&h_tmpdir->i_mutex); ++ /* todo: au_h_open_post()? */ ++ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0); ++ au_set_h_dptr(parent, bdst, h_parent); ++ } ++ iput(h_dir); ++ dput(parent); ++ ++ return err; +} + -+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent, -+ struct au_branch *br) ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * generic routine for both of copy-up and copy-down. ++ */ ++/* cf. revalidate function in file.c */ ++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, ++ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, ++ struct au_pin *pin, ++ struct dentry *h_parent, void *arg), ++ void *arg) +{ + int err; -+ struct au_iattr ia; -+ struct inode *h_inode; -+ struct dentry *h_d; -+ struct super_block *h_sb; ++ struct au_pin pin; ++ struct dentry *d, *parent, *h_parent, *real_parent; + + err = 0; -+ memset(&ia, -1, sizeof(ia)); -+ h_sb = h_dentry->d_sb; -+ h_inode = h_dentry->d_inode; -+ if (h_inode) -+ au_iattr_save(&ia, h_inode); -+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb)) -+ /* nfs d_revalidate may return 0 for negative dentry */ -+ /* fuse d_revalidate always return 0 for negative dentry */ ++ parent = dget_parent(dentry); ++ if (IS_ROOT(parent)) + goto out; + -+ /* main purpose is namei.c:cached_lookup() and d_revalidate */ -+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent); -+ err = PTR_ERR(h_d); -+ if (IS_ERR(h_d)) -+ goto out; ++ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2, ++ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE); + -+ err = 0; -+ if (unlikely(h_d != h_dentry -+ || h_d->d_inode != h_inode -+ || (h_inode && au_iattr_test(&ia, h_inode)))) -+ err = au_busy_or_stale(); -+ dput(h_d); ++ /* do not use au_dpage */ ++ real_parent = parent; ++ while (1) { ++ dput(parent); ++ parent = dget_parent(dentry); ++ h_parent = au_h_dptr(parent, bdst); ++ if (h_parent) ++ goto out; /* success */ ++ ++ /* find top dir which is necessary to cpup */ ++ do { ++ d = parent; ++ dput(parent); ++ parent = dget_parent(d); ++ di_read_lock_parent3(parent, !AuLock_IR); ++ h_parent = au_h_dptr(parent, bdst); ++ di_read_unlock(parent, !AuLock_IR); ++ } while (!h_parent); ++ ++ if (d != real_parent) ++ di_write_lock_child3(d); ++ ++ /* somebody else might create while we were sleeping */ ++ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) { ++ if (au_h_dptr(d, bdst)) ++ au_update_dbstart(d); ++ ++ au_pin_set_dentry(&pin, d); ++ err = au_do_pin(&pin); ++ if (!err) { ++ err = cp(d, bdst, &pin, h_parent, arg); ++ au_unpin(&pin); ++ } ++ } ++ ++ if (d != real_parent) ++ di_write_unlock(d); ++ if (unlikely(err)) ++ break; ++ } ++ ++out: ++ dput(parent); ++ return err; ++} ++ ++static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst, ++ struct au_pin *pin, ++ struct dentry *h_parent __maybe_unused , ++ void *arg __maybe_unused) ++{ ++ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME, pin); ++} + -+out: -+ AuTraceErr(err); -+ return err; ++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) ++{ ++ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL); +} + -+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, -+ struct dentry *h_parent, struct au_branch *br) ++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) +{ + int err; ++ struct dentry *parent; ++ struct inode *dir; + ++ parent = dget_parent(dentry); ++ dir = parent->d_inode; + err = 0; -+ if (udba == AuOpt_UDBA_REVAL -+ && !au_test_fs_remote(h_dentry->d_sb)) { -+ IMustLock(h_dir); -+ err = (h_dentry->d_parent->d_inode != h_dir); -+ } else if (udba != AuOpt_UDBA_NONE) -+ err = au_h_verify_dentry(h_dentry, h_parent, br); ++ if (au_h_iptr(dir, bdst)) ++ goto out; ++ ++ di_read_unlock(parent, AuLock_IR); ++ di_write_lock_parent(parent); ++ /* someone else might change our inode while we were sleeping */ ++ if (!au_h_iptr(dir, bdst)) ++ err = au_cpup_dirs(dentry, bdst); ++ di_downgrade_lock(parent, AuLock_IR); + ++out: ++ dput(parent); + return err; +} +--- /dev/null ++++ linux-3.9/fs/aufs/cpup.h 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,84 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent) -+{ -+ int err; -+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq; -+ struct au_hdentry tmp, *p, *q; -+ struct au_dinfo *dinfo; -+ struct super_block *sb; -+ -+ DiMustWriteLock(dentry); -+ -+ sb = dentry->d_sb; -+ dinfo = au_di(dentry); -+ bend = dinfo->di_bend; -+ bwh = dinfo->di_bwh; -+ bdiropq = dinfo->di_bdiropq; -+ p = dinfo->di_hdentry + dinfo->di_bstart; -+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) { -+ if (!p->hd_dentry) -+ continue; ++/* ++ * copy-up/down functions ++ */ + -+ new_bindex = au_br_index(sb, p->hd_id); -+ if (new_bindex == bindex) -+ continue; ++#ifndef __AUFS_CPUP_H__ ++#define __AUFS_CPUP_H__ + -+ if (dinfo->di_bwh == bindex) -+ bwh = new_bindex; -+ if (dinfo->di_bdiropq == bindex) -+ bdiropq = new_bindex; -+ if (new_bindex < 0) { -+ au_hdput(p); -+ p->hd_dentry = NULL; -+ continue; -+ } ++#ifdef __KERNEL__ + -+ /* swap two lower dentries, and loop again */ -+ q = dinfo->di_hdentry + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hd_dentry) { -+ bindex--; -+ p--; -+ } -+ } ++#include + -+ dinfo->di_bwh = -1; -+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh)) -+ dinfo->di_bwh = bwh; ++struct inode; ++struct file; ++struct au_pin; + -+ dinfo->di_bdiropq = -1; -+ if (bdiropq >= 0 -+ && bdiropq <= au_sbend(sb) -+ && au_sbr_whable(sb, bdiropq)) -+ dinfo->di_bdiropq = bdiropq; ++void au_cpup_attr_flags(struct inode *dst, unsigned int iflags); ++void au_cpup_attr_timesizes(struct inode *inode); ++void au_cpup_attr_nlink(struct inode *inode, int force); ++void au_cpup_attr_changeable(struct inode *inode); ++void au_cpup_igen(struct inode *inode, struct inode *h_inode); ++void au_cpup_attr_all(struct inode *inode, int force); + -+ err = -EIO; -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ bend = au_dbend(parent); -+ p = dinfo->di_hdentry; -+ for (bindex = 0; bindex <= bend; bindex++, p++) -+ if (p->hd_dentry) { -+ dinfo->di_bstart = bindex; -+ break; -+ } ++/* ---------------------------------------------------------------------- */ + -+ if (dinfo->di_bstart >= 0) { -+ p = dinfo->di_hdentry + bend; -+ for (bindex = bend; bindex >= 0; bindex--, p--) -+ if (p->hd_dentry) { -+ dinfo->di_bend = bindex; -+ err = 0; -+ break; -+ } -+ } ++/* cpup flags */ ++#define AuCpup_DTIME 1 /* do dtime_store/revert */ ++#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino, ++ for link(2) */ ++#define AuCpup_RENAME (1 << 2) /* rename after cpup */ ++#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name) ++#define au_fset_cpup(flags, name) \ ++ do { (flags) |= AuCpup_##name; } while (0) ++#define au_fclr_cpup(flags, name) \ ++ do { (flags) &= ~AuCpup_##name; } while (0) + -+ return err; -+} ++int au_copy_file(struct file *dst, struct file *src, loff_t len); ++int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len, unsigned int flags, ++ struct dentry *dst_parent, struct au_pin *pin); ++int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags, struct au_pin *pin); ++int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ struct file *file, struct au_pin *pin); + -+static void au_do_hide(struct dentry *dentry) -+{ -+ struct inode *inode; ++int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, ++ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, ++ struct au_pin *pin, ++ struct dentry *h_parent, void *arg), ++ void *arg); ++int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); ++int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); + -+ inode = dentry->d_inode; -+ if (inode) { -+ if (!S_ISDIR(inode->i_mode)) { -+ if (inode->i_nlink && !d_unhashed(dentry)) -+ drop_nlink(inode); -+ } else { -+ clear_nlink(inode); -+ /* stop next lookup */ -+ inode->i_flags |= S_DEAD; -+ } -+ smp_mb(); /* necessary? */ -+ } -+ d_drop(dentry); -+} ++/* ---------------------------------------------------------------------- */ + -+static int au_hide_children(struct dentry *parent) -+{ -+ int err, i, j, ndentry; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry *dentry; ++/* keep timestamps when copyup */ ++struct au_dtime { ++ struct dentry *dt_dentry; ++ struct path dt_h_path; ++ struct timespec dt_atime, dt_mtime; ++}; ++void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, ++ struct path *h_path); ++void au_dtime_revert(struct au_dtime *dt); + -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, parent, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_CPUP_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/dbgaufs.c 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,433 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ /* in reverse order */ -+ for (i = dpages.ndpage - 1; i >= 0; i--) { -+ dpage = dpages.dpages + i; -+ ndentry = dpage->ndentry; -+ for (j = ndentry - 1; j >= 0; j--) { -+ dentry = dpage->dentries[j]; -+ if (dentry != parent) -+ au_do_hide(dentry); -+ } -+ } ++/* ++ * debugfs interface ++ */ + -+out_dpages: -+ au_dpages_free(&dpages); -+out: -+ return err; -+} ++#include ++#include "aufs.h" + -+static void au_hide(struct dentry *dentry) -+{ -+ int err; -+ struct inode *inode; ++#ifndef CONFIG_SYSFS ++#error DEBUG_FS depends upon SYSFS ++#endif + -+ AuDbgDentry(dentry); -+ inode = dentry->d_inode; -+ if (inode && S_ISDIR(inode->i_mode)) { -+ /* shrink_dcache_parent(dentry); */ -+ err = au_hide_children(dentry); -+ if (unlikely(err)) -+ AuIOErr("%.*s, failed hiding children, ignored %d\n", -+ AuDLNPair(dentry), err); -+ } -+ au_do_hide(dentry); -+} ++static struct dentry *dbgaufs; ++static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH; ++ ++/* 20 is max digits length of ulong 64 */ ++struct dbgaufs_arg { ++ int n; ++ char a[20 * 4]; ++}; + +/* -+ * By adding a dirty branch, a cached dentry may be affected in various ways. -+ * -+ * a dirty branch is added -+ * - on the top of layers -+ * - in the middle of layers -+ * - to the bottom of layers -+ * -+ * on the added branch there exists -+ * - a whiteout -+ * - a diropq -+ * - a same named entry -+ * + exist -+ * * negative --> positive -+ * * positive --> positive -+ * - type is unchanged -+ * - type is changed -+ * + doesn't exist -+ * * negative --> negative -+ * * positive --> negative (rejected by au_br_del() for non-dir case) -+ * - none ++ * common function for all XINO files + */ -+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo, -+ struct au_dinfo *tmp) ++static int dbgaufs_xi_release(struct inode *inode __maybe_unused, ++ struct file *file) ++{ ++ kfree(file->private_data); ++ return 0; ++} ++ ++static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt) +{ + int err; -+ aufs_bindex_t bindex, bend; -+ struct { -+ struct dentry *dentry; -+ struct inode *inode; -+ mode_t mode; -+ } orig_h, tmp_h; -+ struct au_hdentry *hd; -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry; ++ struct kstat st; ++ struct dbgaufs_arg *p; + -+ err = 0; -+ AuDebugOn(dinfo->di_bstart < 0); -+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry; -+ orig_h.inode = orig_h.dentry->d_inode; -+ orig_h.mode = 0; -+ if (orig_h.inode) -+ orig_h.mode = orig_h.inode->i_mode & S_IFMT; -+ memset(&tmp_h, 0, sizeof(tmp_h)); -+ if (tmp->di_bstart >= 0) { -+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry; -+ tmp_h.inode = tmp_h.dentry->d_inode; -+ if (tmp_h.inode) -+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT; -+ } ++ err = -ENOMEM; ++ p = kmalloc(sizeof(*p), GFP_NOFS); ++ if (unlikely(!p)) ++ goto out; + -+ inode = dentry->d_inode; -+ if (!orig_h.inode) { -+ AuDbg("nagative originally\n"); -+ if (inode) { -+ au_hide(dentry); -+ goto out; -+ } -+ AuDebugOn(inode); -+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); -+ AuDebugOn(dinfo->di_bdiropq != -1); ++ err = 0; ++ p->n = 0; ++ file->private_data = p; ++ if (!xf) ++ goto out; + -+ if (!tmp_h.inode) { -+ AuDbg("negative --> negative\n"); -+ /* should have only one negative lower */ -+ if (tmp->di_bstart >= 0 -+ && tmp->di_bstart < dinfo->di_bstart) { -+ AuDebugOn(tmp->di_bstart != tmp->di_bend); -+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); -+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL); -+ au_di_cp(dinfo, tmp); -+ hd = tmp->di_hdentry + tmp->di_bstart; -+ au_set_h_dptr(dentry, tmp->di_bstart, -+ dget(hd->hd_dentry)); -+ } -+ au_dbg_verify_dinode(dentry); -+ } else { -+ AuDbg("negative --> positive\n"); -+ /* -+ * similar to the behaviour of creating with bypassing -+ * aufs. -+ * unhash it in order to force an error in the -+ * succeeding create operation. -+ * we should not set S_DEAD here. -+ */ -+ d_drop(dentry); -+ /* au_di_swap(tmp, dinfo); */ -+ au_dbg_verify_dinode(dentry); -+ } ++ err = vfs_getattr(&xf->f_path, &st); ++ if (!err) { ++ if (do_fcnt) ++ p->n = snprintf ++ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n", ++ (long)file_count(xf), st.blocks, st.blksize, ++ (long long)st.size); ++ else ++ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n", ++ st.blocks, st.blksize, ++ (long long)st.size); ++ AuDebugOn(p->n >= sizeof(p->a)); + } else { -+ AuDbg("positive originally\n"); -+ /* inode may be NULL */ -+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode); -+ if (!tmp_h.inode) { -+ AuDbg("positive --> negative\n"); -+ /* or bypassing aufs */ -+ au_hide(dentry); -+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart) -+ dinfo->di_bwh = tmp->di_bwh; -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } else if (orig_h.mode == tmp_h.mode) { -+ AuDbg("positive --> positive, same type\n"); -+ if (!S_ISDIR(orig_h.mode) -+ && dinfo->di_bstart > tmp->di_bstart) { -+ /* -+ * similar to the behaviour of removing and -+ * creating. -+ */ -+ au_hide(dentry); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } else { -+ /* fill empty slots */ -+ if (dinfo->di_bstart > tmp->di_bstart) -+ dinfo->di_bstart = tmp->di_bstart; -+ if (dinfo->di_bend < tmp->di_bend) -+ dinfo->di_bend = tmp->di_bend; -+ dinfo->di_bwh = tmp->di_bwh; -+ dinfo->di_bdiropq = tmp->di_bdiropq; -+ hd = tmp->di_hdentry; -+ bend = dinfo->di_bend; -+ for (bindex = tmp->di_bstart; bindex <= bend; -+ bindex++) { -+ if (au_h_dptr(dentry, bindex)) -+ continue; -+ h_dentry = hd[bindex].hd_dentry; -+ if (!h_dentry) -+ continue; -+ h_inode = h_dentry->d_inode; -+ AuDebugOn(!h_inode); -+ AuDebugOn(orig_h.mode -+ != (h_inode->i_mode -+ & S_IFMT)); -+ au_set_h_dptr(dentry, bindex, -+ dget(h_dentry)); -+ } -+ err = au_refresh_hinode(inode, dentry); -+ au_dbg_verify_dinode(dentry); -+ } -+ } else { -+ AuDbg("positive --> positive, different type\n"); -+ /* similar to the behaviour of removing and creating */ -+ au_hide(dentry); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } ++ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err); ++ err = 0; + } + +out: + return err; ++ +} + -+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent) ++static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) +{ -+ int err, ebrange; -+ unsigned int sigen; -+ struct au_dinfo *dinfo, *tmp; -+ struct super_block *sb; -+ struct inode *inode; ++ struct dbgaufs_arg *p; + -+ DiMustWriteLock(dentry); -+ AuDebugOn(IS_ROOT(dentry)); -+ AuDebugOn(!parent->d_inode); ++ p = file->private_data; ++ return simple_read_from_buffer(buf, count, ppos, p->a, p->n); ++} + -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ sigen = au_sigen(sb); -+ err = au_digen_test(parent, sigen); -+ if (unlikely(err)) -+ goto out; ++/* ---------------------------------------------------------------------- */ + -+ dinfo = au_di(dentry); -+ err = au_di_realloc(dinfo, au_sbend(sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ ebrange = au_dbrange_test(dentry); -+ if (!ebrange) -+ ebrange = au_do_refresh_hdentry(dentry, parent); ++struct dbgaufs_plink_arg { ++ int n; ++ char a[]; ++}; + -+ if (d_unhashed(dentry) || ebrange) { -+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ if (!err) -+ goto out_dgen; /* success */ -+ goto out; -+ } ++static int dbgaufs_plink_release(struct inode *inode __maybe_unused, ++ struct file *file) ++{ ++ free_page((unsigned long)file->private_data); ++ return 0; ++} ++ ++static int dbgaufs_plink_open(struct inode *inode, struct file *file) ++{ ++ int err, i, limit; ++ unsigned long n, sum; ++ struct dbgaufs_plink_arg *p; ++ struct au_sbinfo *sbinfo; ++ struct super_block *sb; ++ struct au_sphlhead *sphl; + -+ /* temporary dinfo */ -+ AuDbgDentry(dentry); + err = -ENOMEM; -+ tmp = au_di_alloc(sb, AuLsc_DI_TMP); -+ if (unlikely(!tmp)) -+ goto out; -+ au_di_swap(tmp, dinfo); -+ /* returns the number of positive dentries */ -+ /* -+ * if current working dir is removed, it returns an error. -+ * but the dentry is legal. -+ */ -+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0); -+ AuDbgDentry(dentry); -+ au_di_swap(tmp, dinfo); -+ if (err == -ENOENT) -+ err = 0; -+ if (err >= 0) { -+ /* compare/refresh by dinfo */ -+ AuDbgDentry(dentry); -+ err = au_refresh_by_dinfo(dentry, dinfo, tmp); -+ au_dbg_verify_dinode(dentry); -+ AuTraceErr(err); -+ } -+ au_rw_write_unlock(&tmp->di_rwsem); -+ au_di_free(tmp); -+ if (unlikely(err)) ++ p = (void *)get_zeroed_page(GFP_NOFS); ++ if (unlikely(!p)) + goto out; + -+out_dgen: -+ au_update_digen(dentry); -+out: -+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) { -+ AuIOErr("failed refreshing %.*s, %d\n", -+ AuDLNPair(dentry), err); -+ AuDbgDentry(dentry); ++ err = -EFBIG; ++ sbinfo = inode->i_private; ++ sb = sbinfo->si_sb; ++ si_noflush_read_lock(sb); ++ if (au_opt_test(au_mntflags(sb), PLINK)) { ++ limit = PAGE_SIZE - sizeof(p->n); ++ ++ /* the number of buckets */ ++ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH); ++ p->n += n; ++ limit -= n; ++ ++ sum = 0; ++ for (i = 0, sphl = sbinfo->si_plink; ++ i < AuPlink_NHASH; ++ i++, sphl++) { ++ n = au_sphl_count(sphl); ++ sum += n; ++ ++ n = snprintf(p->a + p->n, limit, "%lu ", n); ++ p->n += n; ++ limit -= n; ++ if (unlikely(limit <= 0)) ++ goto out_free; ++ } ++ p->a[p->n - 1] = '\n'; ++ ++ /* the sum of plinks */ ++ n = snprintf(p->a + p->n, limit, "%lu\n", sum); ++ p->n += n; ++ limit -= n; ++ if (unlikely(limit <= 0)) ++ goto out_free; ++ } else { ++#define str "1\n0\n0\n" ++ p->n = sizeof(str) - 1; ++ strcpy(p->a, str); ++#undef str + } -+ AuTraceErr(err); ++ si_read_unlock(sb); ++ ++ err = 0; ++ file->private_data = p; ++ goto out; /* success */ ++ ++out_free: ++ free_page((unsigned long)p); ++out: + return err; +} + -+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags, -+ struct dentry *dentry, aufs_bindex_t bindex) ++static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) +{ -+ int err, valid; ++ struct dbgaufs_plink_arg *p; + -+ err = 0; -+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE)) -+ goto out; ++ p = file->private_data; ++ return simple_read_from_buffer(buf, count, ppos, p->a, p->n); ++} + -+ AuDbg("b%d\n", bindex); -+ /* -+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs, -+ * due to whiteout and branch permission. -+ */ -+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE -+ | LOOKUP_FOLLOW | LOOKUP_EXCL); -+ /* it may return tri-state */ -+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags); ++static const struct file_operations dbgaufs_plink_fop = { ++ .owner = THIS_MODULE, ++ .open = dbgaufs_plink_open, ++ .release = dbgaufs_plink_release, ++ .read = dbgaufs_plink_read ++}; + -+ if (unlikely(valid < 0)) -+ err = valid; -+ else if (!valid) -+ err = -EINVAL; ++/* ---------------------------------------------------------------------- */ + -+out: -+ AuTraceErr(err); ++static int dbgaufs_xib_open(struct inode *inode, struct file *file) ++{ ++ int err; ++ struct au_sbinfo *sbinfo; ++ struct super_block *sb; ++ ++ sbinfo = inode->i_private; ++ sb = sbinfo->si_sb; ++ si_noflush_read_lock(sb); ++ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0); ++ si_read_unlock(sb); + return err; +} + -+/* todo: remove this */ -+static int h_d_revalidate(struct dentry *dentry, struct inode *inode, -+ unsigned int flags, int do_udba) ++static const struct file_operations dbgaufs_xib_fop = { ++ .owner = THIS_MODULE, ++ .open = dbgaufs_xib_open, ++ .release = dbgaufs_xi_release, ++ .read = dbgaufs_xi_read ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define DbgaufsXi_PREFIX "xi" ++ ++static int dbgaufs_xino_open(struct inode *inode, struct file *file) +{ + int err; -+ umode_t mode, h_mode; -+ aufs_bindex_t bindex, btail, bstart, ibs, ibe; -+ unsigned char plus, unhashed, is_root, h_plus; -+ struct inode *h_inode, *h_cached_inode; -+ struct dentry *h_dentry; -+ struct qstr *name, *h_name; ++ long l; ++ struct au_sbinfo *sbinfo; ++ struct super_block *sb; ++ struct file *xf; ++ struct qstr *name; + -+ err = 0; -+ plus = 0; -+ mode = 0; -+ ibs = -1; -+ ibe = -1; -+ unhashed = !!d_unhashed(dentry); -+ is_root = !!IS_ROOT(dentry); -+ name = &dentry->d_name; ++ err = -ENOENT; ++ xf = NULL; ++ name = &file->f_dentry->d_name; ++ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX) ++ || memcmp(name->name, DbgaufsXi_PREFIX, ++ sizeof(DbgaufsXi_PREFIX) - 1))) ++ goto out; ++ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l); ++ if (unlikely(err)) ++ goto out; + -+ /* -+ * Theoretically, REVAL test should be unnecessary in case of -+ * {FS,I}NOTIFY. -+ * But {fs,i}notify doesn't fire some necessary events, -+ * IN_ATTRIB for atime/nlink/pageio -+ * IN_DELETE for NFS dentry -+ * Let's do REVAL test too. -+ */ -+ if (do_udba && inode) { -+ mode = (inode->i_mode & S_IFMT); -+ plus = (inode->i_nlink > 0); -+ ibs = au_ibstart(inode); -+ ibe = au_ibend(inode); -+ } ++ sbinfo = inode->i_private; ++ sb = sbinfo->si_sb; ++ si_noflush_read_lock(sb); ++ if (l <= au_sbend(sb)) { ++ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file; ++ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1); ++ } else ++ err = -ENOENT; ++ si_read_unlock(sb); + -+ bstart = au_dbstart(dentry); -+ btail = bstart; -+ if (inode && S_ISDIR(inode->i_mode)) -+ btail = au_dbtaildir(dentry); -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; ++out: ++ return err; ++} + -+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry)); -+ spin_lock(&h_dentry->d_lock); -+ h_name = &h_dentry->d_name; -+ if (unlikely(do_udba -+ && !is_root -+ && (unhashed != !!d_unhashed(h_dentry) -+ || name->len != h_name->len -+ || memcmp(name->name, h_name->name, name->len)) -+ )) { -+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n", -+ unhashed, d_unhashed(h_dentry), -+ AuDLNPair(dentry), AuDLNPair(h_dentry)); -+ spin_unlock(&h_dentry->d_lock); -+ goto err; -+ } -+ spin_unlock(&h_dentry->d_lock); ++static const struct file_operations dbgaufs_xino_fop = { ++ .owner = THIS_MODULE, ++ .open = dbgaufs_xino_open, ++ .release = dbgaufs_xi_release, ++ .read = dbgaufs_xi_read ++}; + -+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex); -+ if (unlikely(err)) -+ /* do not goto err, to keep the errno */ -+ break; ++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ aufs_bindex_t bend; ++ struct au_branch *br; ++ struct au_xino_file *xi; + -+ /* todo: plink too? */ -+ if (!do_udba) -+ continue; ++ if (!au_sbi(sb)->si_dbgaufs) ++ return; + -+ /* UDBA tests */ -+ h_inode = h_dentry->d_inode; -+ if (unlikely(!!inode != !!h_inode)) -+ goto err; ++ bend = au_sbend(sb); ++ for (; bindex <= bend; bindex++) { ++ br = au_sbr(sb, bindex); ++ xi = &br->br_xino; ++ debugfs_remove(xi->xi_dbgaufs); ++ xi->xi_dbgaufs = NULL; ++ } ++} + -+ h_plus = plus; -+ h_mode = mode; -+ h_cached_inode = h_inode; -+ if (h_inode) { -+ h_mode = (h_inode->i_mode & S_IFMT); -+ h_plus = (h_inode->i_nlink > 0); -+ } -+ if (inode && ibs <= bindex && bindex <= ibe) -+ h_cached_inode = au_h_iptr(inode, bindex); ++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ struct au_sbinfo *sbinfo; ++ struct dentry *parent; ++ struct au_branch *br; ++ struct au_xino_file *xi; ++ aufs_bindex_t bend; ++ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */ + -+ if (unlikely(plus != h_plus -+ || mode != h_mode -+ || h_cached_inode != h_inode)) -+ goto err; -+ continue; ++ sbinfo = au_sbi(sb); ++ parent = sbinfo->si_dbgaufs; ++ if (!parent) ++ return; + -+ err: -+ err = -EINVAL; -+ break; ++ bend = au_sbend(sb); ++ for (; bindex <= bend; bindex++) { ++ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex); ++ br = au_sbr(sb, bindex); ++ xi = &br->br_xino; ++ AuDebugOn(xi->xi_dbgaufs); ++ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent, ++ sbinfo, &dbgaufs_xino_fop); ++ /* ignore an error */ ++ if (unlikely(!xi->xi_dbgaufs)) ++ AuWarn1("failed %s under debugfs\n", name); + } -+ -+ return err; +} + -+/* todo: consolidate with do_refresh() and au_reval_for_attr() */ -+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen) ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_EXPORT ++static int dbgaufs_xigen_open(struct inode *inode, struct file *file) +{ + int err; -+ struct dentry *parent; -+ -+ if (!au_digen_test(dentry, sigen)) -+ return 0; ++ struct au_sbinfo *sbinfo; ++ struct super_block *sb; + -+ parent = dget_parent(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ au_dbg_verify_gen(parent, sigen); -+ err = au_refresh_dentry(dentry, parent); -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+ AuTraceErr(err); ++ sbinfo = inode->i_private; ++ sb = sbinfo->si_sb; ++ si_noflush_read_lock(sb); ++ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0); ++ si_read_unlock(sb); + return err; +} + -+int au_reval_dpath(struct dentry *dentry, unsigned int sigen) ++static const struct file_operations dbgaufs_xigen_fop = { ++ .owner = THIS_MODULE, ++ .open = dbgaufs_xigen_open, ++ .release = dbgaufs_xi_release, ++ .read = dbgaufs_xi_read ++}; ++ ++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) +{ + int err; -+ struct dentry *d, *parent; -+ struct inode *inode; -+ -+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR)) -+ return simple_reval_dpath(dentry, sigen); -+ -+ /* slow loop, keep it simple and stupid */ -+ /* cf: au_cpup_dirs() */ -+ err = 0; -+ parent = NULL; -+ while (au_digen_test(dentry, sigen)) { -+ d = dentry; -+ while (1) { -+ dput(parent); -+ parent = dget_parent(d); -+ if (!au_digen_test(parent, sigen)) -+ break; -+ d = parent; -+ } -+ -+ inode = d->d_inode; -+ if (d != dentry) -+ di_write_lock_child2(d); + -+ /* someone might update our dentry while we were sleeping */ -+ if (au_digen_test(d, sigen)) { -+ /* -+ * todo: consolidate with simple_reval_dpath(), -+ * do_refresh() and au_reval_for_attr(). -+ */ -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_refresh_dentry(d, parent); -+ di_read_unlock(parent, AuLock_IR); -+ } ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ + -+ if (d != dentry) -+ di_write_unlock(d); -+ dput(parent); -+ if (unlikely(err)) -+ break; -+ } ++ err = -EIO; ++ sbinfo->si_dbgaufs_xigen = debugfs_create_file ++ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, ++ &dbgaufs_xigen_fop); ++ if (sbinfo->si_dbgaufs_xigen) ++ err = 0; + + return err; +} -+ -+/* -+ * if valid returns 1, otherwise 0. -+ */ -+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags) ++#else ++static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) +{ -+ int valid, err; -+ unsigned int sigen; -+ unsigned char do_udba; -+ struct super_block *sb; -+ struct inode *inode; ++ return 0; ++} ++#endif /* CONFIG_AUFS_EXPORT */ + -+ /* todo: support rcu-walk? */ -+ if (flags & LOOKUP_RCU) -+ return -ECHILD; ++/* ---------------------------------------------------------------------- */ + -+ valid = 0; -+ if (unlikely(!au_di(dentry))) -+ goto out; ++void dbgaufs_si_fin(struct au_sbinfo *sbinfo) ++{ ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ + -+ inode = dentry->d_inode; -+ if (inode && is_bad_inode(inode)) -+ goto out; ++ debugfs_remove_recursive(sbinfo->si_dbgaufs); ++ sbinfo->si_dbgaufs = NULL; ++ kobject_put(&sbinfo->si_kobj); ++} ++ ++int dbgaufs_si_init(struct au_sbinfo *sbinfo) ++{ ++ int err; ++ char name[SysaufsSiNameLen]; + -+ valid = 1; -+ sb = dentry->d_sb; + /* -+ * todo: very ugly -+ * i_mutex of parent dir may be held, -+ * but we should not return 'invalid' due to busy. ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. + */ -+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM); -+ if (unlikely(err)) { -+ valid = err; -+ AuTraceErr(err); -+ goto out; -+ } -+ if (unlikely(au_dbrange_test(dentry))) { -+ err = -EINVAL; -+ AuTraceErr(err); -+ goto out_dgrade; -+ } ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ + -+ sigen = au_sigen(sb); -+ if (au_digen_test(dentry, sigen)) { -+ AuDebugOn(IS_ROOT(dentry)); -+ err = au_reval_dpath(dentry, sigen); -+ if (unlikely(err)) { -+ AuTraceErr(err); -+ goto out_dgrade; -+ } ++ err = -ENOENT; ++ if (!dbgaufs) { ++ AuErr1("/debug/aufs is uninitialized\n"); ++ goto out; + } -+ di_downgrade_lock(dentry, AuLock_IR); + -+ err = -EINVAL; -+ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink)) -+ goto out_inval; ++ err = -EIO; ++ sysaufs_name(sbinfo, name); ++ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs); ++ if (unlikely(!sbinfo->si_dbgaufs)) ++ goto out; ++ kobject_get(&sbinfo->si_kobj); + -+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE); -+ if (do_udba && inode) { -+ aufs_bindex_t bstart = au_ibstart(inode); -+ struct inode *h_inode; ++ sbinfo->si_dbgaufs_xib = debugfs_create_file ++ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, ++ &dbgaufs_xib_fop); ++ if (unlikely(!sbinfo->si_dbgaufs_xib)) ++ goto out_dir; + -+ if (bstart >= 0) { -+ h_inode = au_h_iptr(inode, bstart); -+ if (h_inode && au_test_higen(inode, h_inode)) -+ goto out_inval; -+ } -+ } ++ sbinfo->si_dbgaufs_plink = debugfs_create_file ++ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, ++ &dbgaufs_plink_fop); ++ if (unlikely(!sbinfo->si_dbgaufs_plink)) ++ goto out_dir; + -+ err = h_d_revalidate(dentry, inode, flags, do_udba); -+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) { -+ err = -EIO; -+ AuDbg("both of real entry and whiteout found, %.*s, err %d\n", -+ AuDLNPair(dentry), err); -+ } -+ goto out_inval; ++ err = dbgaufs_xigen_init(sbinfo); ++ if (!err) ++ goto out; /* success */ + -+out_dgrade: -+ di_downgrade_lock(dentry, AuLock_IR); -+out_inval: -+ aufs_read_unlock(dentry, AuLock_IR); -+ AuTraceErr(err); -+ valid = !err; ++out_dir: ++ dbgaufs_si_fin(sbinfo); +out: -+ if (!valid) { -+ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid); -+ d_drop(dentry); -+ } -+ return valid; ++ return err; +} + -+static void aufs_d_release(struct dentry *dentry) ++/* ---------------------------------------------------------------------- */ ++ ++void dbgaufs_fin(void) +{ -+ if (au_di(dentry)) { -+ au_di_fin(dentry); -+ au_hn_di_reinit(dentry); -+ } ++ debugfs_remove(dbgaufs); +} + -+const struct dentry_operations aufs_dop = { -+ .d_revalidate = aufs_d_revalidate, -+ .d_release = aufs_d_release -+}; ++int __init dbgaufs_init(void) ++{ ++ int err; ++ ++ err = -EIO; ++ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL); ++ if (dbgaufs) ++ err = 0; ++ return err; ++} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/dentry.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,234 @@ ++++ linux-3.9/fs/aufs/dbgaufs.h 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -6613,224 +4989,382 @@ + */ + +/* -+ * lookup and dentry operations ++ * debugfs interface ++ */ ++ ++#ifndef __DBGAUFS_H__ ++#define __DBGAUFS_H__ ++ ++#ifdef __KERNEL__ ++ ++struct super_block; ++struct au_sbinfo; ++ ++#ifdef CONFIG_DEBUG_FS ++/* dbgaufs.c */ ++void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex); ++void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex); ++void dbgaufs_si_fin(struct au_sbinfo *sbinfo); ++int dbgaufs_si_init(struct au_sbinfo *sbinfo); ++void dbgaufs_fin(void); ++int __init dbgaufs_init(void); ++#else ++AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex) ++AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex) ++AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo) ++AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo) ++AuStubVoid(dbgaufs_fin, void) ++AuStubInt0(__init dbgaufs_init, void) ++#endif /* CONFIG_DEBUG_FS */ ++ ++#endif /* __KERNEL__ */ ++#endif /* __DBGAUFS_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/dcsub.c 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,243 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * sub-routines for dentry cache + */ + -+#ifndef __AUFS_DENTRY_H__ -+#define __AUFS_DENTRY_H__ ++#include "aufs.h" ++ ++static void au_dpage_free(struct au_dpage *dpage) ++{ ++ int i; ++ struct dentry **p; + -+#ifdef __KERNEL__ ++ p = dpage->dentries; ++ for (i = 0; i < dpage->ndentry; i++) ++ dput(*p++); ++ free_page((unsigned long)dpage->dentries); ++} + -+#include -+#include "rwsem.h" ++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp) ++{ ++ int err; ++ void *p; + -+struct au_hdentry { -+ struct dentry *hd_dentry; -+ aufs_bindex_t hd_id; -+}; ++ err = -ENOMEM; ++ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp); ++ if (unlikely(!dpages->dpages)) ++ goto out; + -+struct au_dinfo { -+ atomic_t di_generation; ++ p = (void *)__get_free_page(gfp); ++ if (unlikely(!p)) ++ goto out_dpages; + -+ struct au_rwsem di_rwsem; -+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq; -+ struct au_hdentry *di_hdentry; -+} ____cacheline_aligned_in_smp; ++ dpages->dpages[0].ndentry = 0; ++ dpages->dpages[0].dentries = p; ++ dpages->ndpage = 1; ++ return 0; /* success */ + -+/* ---------------------------------------------------------------------- */ ++out_dpages: ++ kfree(dpages->dpages); ++out: ++ return err; ++} + -+/* dentry.c */ -+extern const struct dentry_operations aufs_dop; -+struct au_branch; -+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent, -+ struct au_branch *br); -+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, -+ struct dentry *h_parent, struct au_branch *br); ++void au_dpages_free(struct au_dcsub_pages *dpages) ++{ ++ int i; ++ struct au_dpage *p; + -+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type); -+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex); -+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent); -+int au_reval_dpath(struct dentry *dentry, unsigned int sigen); ++ p = dpages->dpages; ++ for (i = 0; i < dpages->ndpage; i++) ++ au_dpage_free(p++); ++ kfree(dpages->dpages); ++} + -+/* dinfo.c */ -+void au_di_init_once(void *_di); -+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc); -+void au_di_free(struct au_dinfo *dinfo); -+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b); -+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src); -+int au_di_init(struct dentry *dentry); -+void au_di_fin(struct dentry *dentry); -+int au_di_realloc(struct au_dinfo *dinfo, int nbr); ++static int au_dpages_append(struct au_dcsub_pages *dpages, ++ struct dentry *dentry, gfp_t gfp) ++{ ++ int err, sz; ++ struct au_dpage *dpage; ++ void *p; + -+void di_read_lock(struct dentry *d, int flags, unsigned int lsc); -+void di_read_unlock(struct dentry *d, int flags); -+void di_downgrade_lock(struct dentry *d, int flags); -+void di_write_lock(struct dentry *d, unsigned int lsc); -+void di_write_unlock(struct dentry *d); -+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir); -+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir); -+void di_write_unlock2(struct dentry *d1, struct dentry *d2); ++ dpage = dpages->dpages + dpages->ndpage - 1; ++ sz = PAGE_SIZE / sizeof(dentry); ++ if (unlikely(dpage->ndentry >= sz)) { ++ AuLabel(new dpage); ++ err = -ENOMEM; ++ sz = dpages->ndpage * sizeof(*dpages->dpages); ++ p = au_kzrealloc(dpages->dpages, sz, ++ sz + sizeof(*dpages->dpages), gfp); ++ if (unlikely(!p)) ++ goto out; + -+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex); -+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex); -+aufs_bindex_t au_dbtail(struct dentry *dentry); -+aufs_bindex_t au_dbtaildir(struct dentry *dentry); ++ dpages->dpages = p; ++ dpage = dpages->dpages + dpages->ndpage; ++ p = (void *)__get_free_page(gfp); ++ if (unlikely(!p)) ++ goto out; + -+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+int au_digen_test(struct dentry *dentry, unsigned int sigen); -+int au_dbrange_test(struct dentry *dentry); -+void au_update_digen(struct dentry *dentry); -+void au_update_dbrange(struct dentry *dentry, int do_put_zero); -+void au_update_dbstart(struct dentry *dentry); -+void au_update_dbend(struct dentry *dentry); -+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry); ++ dpage->ndentry = 0; ++ dpage->dentries = p; ++ dpages->ndpage++; ++ } + -+/* ---------------------------------------------------------------------- */ ++ AuDebugOn(!dentry->d_count); ++ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry); ++ return 0; /* success */ + -+static inline struct au_dinfo *au_di(struct dentry *dentry) -+{ -+ return dentry->d_fsdata; ++out: ++ return err; +} + -+/* ---------------------------------------------------------------------- */ ++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, ++ au_dpages_test test, void *arg) ++{ ++ int err; ++ struct dentry *this_parent; ++ struct list_head *next; ++ struct super_block *sb = root->d_sb; + -+/* lock subclass for dinfo */ -+enum { -+ AuLsc_DI_CHILD, /* child first */ -+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */ -+ AuLsc_DI_CHILD3, /* copyup dirs */ -+ AuLsc_DI_PARENT, -+ AuLsc_DI_PARENT2, -+ AuLsc_DI_PARENT3, -+ AuLsc_DI_TMP /* temp for replacing dinfo */ -+}; ++ err = 0; ++ write_seqlock(&rename_lock); ++ this_parent = root; ++ spin_lock(&this_parent->d_lock); ++repeat: ++ next = this_parent->d_subdirs.next; ++resume: ++ if (this_parent->d_sb == sb ++ && !IS_ROOT(this_parent) ++ && au_di(this_parent) ++ && this_parent->d_count ++ && (!test || test(this_parent, arg))) { ++ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC); ++ if (unlikely(err)) ++ goto out; ++ } + -+/* -+ * di_read_lock_child, di_write_lock_child, -+ * di_read_lock_child2, di_write_lock_child2, -+ * di_read_lock_child3, di_write_lock_child3, -+ * di_read_lock_parent, di_write_lock_parent, -+ * di_read_lock_parent2, di_write_lock_parent2, -+ * di_read_lock_parent3, di_write_lock_parent3, -+ */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void di_read_lock_##name(struct dentry *d, int flags) \ -+{ di_read_lock(d, flags, AuLsc_DI_##lsc); } ++ while (next != &this_parent->d_subdirs) { ++ struct list_head *tmp = next; ++ struct dentry *dentry = list_entry(tmp, struct dentry, ++ d_u.d_child); + -+#define AuWriteLockFunc(name, lsc) \ -+static inline void di_write_lock_##name(struct dentry *d) \ -+{ di_write_lock(d, AuLsc_DI_##lsc); } ++ next = tmp->next; ++ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ++ if (dentry->d_count) { ++ if (!list_empty(&dentry->d_subdirs)) { ++ spin_unlock(&this_parent->d_lock); ++ spin_release(&dentry->d_lock.dep_map, 1, ++ _RET_IP_); ++ this_parent = dentry; ++ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, ++ _RET_IP_); ++ goto repeat; ++ } ++ if (dentry->d_sb == sb ++ && au_di(dentry) ++ && (!test || test(dentry, arg))) ++ err = au_dpages_append(dpages, dentry, ++ GFP_ATOMIC); ++ } ++ spin_unlock(&dentry->d_lock); ++ if (unlikely(err)) ++ goto out; ++ } + -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) ++ if (this_parent != root) { ++ struct dentry *tmp; ++ struct dentry *child; + -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); ++ tmp = this_parent->d_parent; ++ rcu_read_lock(); ++ spin_unlock(&this_parent->d_lock); ++ child = this_parent; ++ this_parent = tmp; ++ spin_lock(&this_parent->d_lock); ++ rcu_read_unlock(); ++ next = child->d_u.d_child.next; ++ goto resume; ++ } + -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs ++out: ++ spin_unlock(&this_parent->d_lock); ++ write_sequnlock(&rename_lock); ++ return err; ++} + -+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem) -+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem) -+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem) ++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, ++ int do_include, au_dpages_test test, void *arg) ++{ ++ int err; + -+/* ---------------------------------------------------------------------- */ ++ err = 0; ++ write_seqlock(&rename_lock); ++ spin_lock(&dentry->d_lock); ++ if (do_include ++ && dentry->d_count ++ && (!test || test(dentry, arg))) ++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); ++ spin_unlock(&dentry->d_lock); ++ if (unlikely(err)) ++ goto out; ++ ++ /* ++ * vfsmount_lock is unnecessary since this is a traverse in a single ++ * mount ++ */ ++ while (!IS_ROOT(dentry)) { ++ dentry = dentry->d_parent; /* rename_lock is locked */ ++ spin_lock(&dentry->d_lock); ++ if (dentry->d_count ++ && (!test || test(dentry, arg))) ++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); ++ spin_unlock(&dentry->d_lock); ++ if (unlikely(err)) ++ break; ++ } + -+/* todo: memory barrier? */ -+static inline unsigned int au_digen(struct dentry *d) -+{ -+ return atomic_read(&au_di(d)->di_generation); ++out: ++ write_sequnlock(&rename_lock); ++ return err; +} + -+static inline void au_h_dentry_init(struct au_hdentry *hdentry) ++static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg) +{ -+ hdentry->hd_dentry = NULL; ++ return au_di(dentry) && dentry->d_sb == arg; +} + -+static inline void au_hdput(struct au_hdentry *hd) ++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, ++ struct dentry *dentry, int do_include) +{ -+ if (hd) -+ dput(hd->hd_dentry); ++ return au_dcsub_pages_rev(dpages, dentry, do_include, ++ au_dcsub_dpages_aufs, dentry->d_sb); +} + -+static inline aufs_bindex_t au_dbstart(struct dentry *dentry) ++int au_test_subdir(struct dentry *d1, struct dentry *d2) +{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bstart; -+} ++ struct path path[2] = { ++ { ++ .dentry = d1 ++ }, ++ { ++ .dentry = d2 ++ } ++ }; + -+static inline aufs_bindex_t au_dbend(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bend; ++ return path_is_under(path + 0, path + 1); +} +--- /dev/null ++++ linux-3.9/fs/aufs/dcsub.h 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,94 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+static inline aufs_bindex_t au_dbwh(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bwh; -+} ++/* ++ * sub-routines for dentry cache ++ */ + -+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bdiropq; -+} ++#ifndef __AUFS_DCSUB_H__ ++#define __AUFS_DCSUB_H__ + -+/* todo: hard/soft set? */ -+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bstart = bindex; -+} ++#ifdef __KERNEL__ + -+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bend = bindex; -+} ++#include ++#include + -+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ /* dbwh can be outside of bstart - bend range */ -+ au_di(dentry)->di_bwh = bindex; -+} ++struct dentry; + -+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bdiropq = bindex; -+} ++struct au_dpage { ++ int ndentry; ++ struct dentry **dentries; ++}; ++ ++struct au_dcsub_pages { ++ int ndpage; ++ struct au_dpage *dpages; ++}; + +/* ---------------------------------------------------------------------- */ + -+#ifdef CONFIG_AUFS_HNOTIFY -+static inline void au_digen_dec(struct dentry *d) ++/* dcsub.c */ ++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp); ++void au_dpages_free(struct au_dcsub_pages *dpages); ++typedef int (*au_dpages_test)(struct dentry *dentry, void *arg); ++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, ++ au_dpages_test test, void *arg); ++int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, ++ int do_include, au_dpages_test test, void *arg); ++int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, ++ struct dentry *dentry, int do_include); ++int au_test_subdir(struct dentry *d1, struct dentry *d2); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline int au_d_hashed_positive(struct dentry *d) +{ -+ atomic_dec(&au_di(d)->di_generation); ++ int err; ++ struct inode *inode = d->d_inode; ++ err = 0; ++ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink)) ++ err = -ENOENT; ++ return err; +} + -+static inline void au_hn_di_reinit(struct dentry *dentry) ++static inline int au_d_alive(struct dentry *d) +{ -+ dentry->d_fsdata = NULL; ++ int err; ++ struct inode *inode; ++ err = 0; ++ if (!IS_ROOT(d)) ++ err = au_d_hashed_positive(d); ++ else { ++ inode = d->d_inode; ++ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink)) ++ err = -ENOENT; ++ } ++ return err; ++} ++ ++static inline int au_alive_dir(struct dentry *d) ++{ ++ int err; ++ err = au_d_alive(d); ++ if (unlikely(err || IS_DEADDIR(d->d_inode))) ++ err = -ENOENT; ++ return err; +} -+#else -+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused) -+#endif /* CONFIG_AUFS_HNOTIFY */ + +#endif /* __KERNEL__ */ -+#endif /* __AUFS_DENTRY_H__ */ ++#endif /* __AUFS_DCSUB_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/dinfo.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,543 @@ ++++ linux-3.9/fs/aufs/debug.c 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,491 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -6850,533 +5384,726 @@ + */ + +/* -+ * dentry private data ++ * debug print functions + */ + ++#include +#include "aufs.h" + -+void au_di_init_once(void *_dinfo) ++int aufs_debug; ++MODULE_PARM_DESC(debug, "debug print"); ++module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP); ++ ++char *au_plevel = KERN_DEBUG; ++#define dpri(fmt, ...) do { \ ++ if ((au_plevel \ ++ && strcmp(au_plevel, KERN_DEBUG)) \ ++ || au_debug_test()) \ ++ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \ ++} while (0) ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_dpri_whlist(struct au_nhash *whlist) +{ -+ struct au_dinfo *dinfo = _dinfo; -+ static struct lock_class_key aufs_di; ++ unsigned long ul, n; ++ struct hlist_head *head; ++ struct au_vdir_wh *pos; + -+ au_rw_init(&dinfo->di_rwsem); -+ au_rw_class(&dinfo->di_rwsem, &aufs_di); ++ n = whlist->nh_num; ++ head = whlist->nh_head; ++ for (ul = 0; ul < n; ul++) { ++ hlist_for_each_entry(pos, head, wh_hash) ++ dpri("b%d, %.*s, %d\n", ++ pos->wh_bindex, ++ pos->wh_str.len, pos->wh_str.name, ++ pos->wh_str.len); ++ head++; ++ } +} + -+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc) ++void au_dpri_vdir(struct au_vdir *vdir) +{ -+ struct au_dinfo *dinfo; -+ int nbr, i; ++ unsigned long ul; ++ union au_vdir_deblk_p p; ++ unsigned char *o; ++ ++ if (!vdir || IS_ERR(vdir)) { ++ dpri("err %ld\n", PTR_ERR(vdir)); ++ return; ++ } ++ ++ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n", ++ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk, ++ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version); ++ for (ul = 0; ul < vdir->vd_nblk; ul++) { ++ p.deblk = vdir->vd_deblk[ul]; ++ o = p.deblk; ++ dpri("[%lu]: %p\n", ul, o); ++ } ++} ++ ++static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn, ++ struct dentry *wh) ++{ ++ char *n = NULL; ++ int l = 0; ++ ++ if (!inode || IS_ERR(inode)) { ++ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode)); ++ return -1; ++ } ++ ++ /* the type of i_blocks depends upon CONFIG_LSF */ ++ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long) ++ && sizeof(inode->i_blocks) != sizeof(u64)); ++ if (wh) { ++ n = (void *)wh->d_name.name; ++ l = wh->d_name.len; ++ } ++ ++ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu," ++ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n", ++ bindex, inode, ++ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??", ++ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode, ++ i_size_read(inode), (unsigned long long)inode->i_blocks, ++ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff, ++ inode->i_mapping ? inode->i_mapping->nrpages : 0, ++ inode->i_state, inode->i_flags, inode->i_version, ++ inode->i_generation, ++ l ? ", wh " : "", l, n); ++ return 0; ++} ++ ++void au_dpri_inode(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ aufs_bindex_t bindex; ++ int err, hn; + -+ dinfo = au_cache_alloc_dinfo(); -+ if (unlikely(!dinfo)) -+ goto out; ++ err = do_pri_inode(-1, inode, -1, NULL); ++ if (err || !au_test_aufs(inode->i_sb)) ++ return; + -+ nbr = au_sbend(sb) + 1; -+ if (nbr <= 0) -+ nbr = 1; -+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS); -+ if (dinfo->di_hdentry) { -+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc); -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ dinfo->di_bwh = -1; -+ dinfo->di_bdiropq = -1; -+ for (i = 0; i < nbr; i++) -+ dinfo->di_hdentry[i].hd_id = -1; -+ goto out; ++ iinfo = au_ii(inode); ++ if (!iinfo) ++ return; ++ dpri("i-1: bstart %d, bend %d, gen %d\n", ++ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL)); ++ if (iinfo->ii_bstart < 0) ++ return; ++ hn = 0; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) { ++ hn = !!au_hn(iinfo->ii_hinode + bindex); ++ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn, ++ iinfo->ii_hinode[0 + bindex].hi_whdentry); + } ++} + -+ au_cache_free_dinfo(dinfo); -+ dinfo = NULL; ++void au_dpri_dalias(struct inode *inode) ++{ ++ struct dentry *d; + -+out: -+ return dinfo; ++ spin_lock(&inode->i_lock); ++ hlist_for_each_entry(d, &inode->i_dentry, d_alias) ++ au_dpri_dentry(d); ++ spin_unlock(&inode->i_lock); +} + -+void au_di_free(struct au_dinfo *dinfo) ++static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry) +{ -+ struct au_hdentry *p; -+ aufs_bindex_t bend, bindex; ++ struct dentry *wh = NULL; ++ int hn; + -+ /* dentry may not be revalidated */ -+ bindex = dinfo->di_bstart; -+ if (bindex >= 0) { -+ bend = dinfo->di_bend; -+ p = dinfo->di_hdentry + bindex; -+ while (bindex++ <= bend) -+ au_hdput(p++); ++ if (!dentry || IS_ERR(dentry)) { ++ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry)); ++ return -1; + } -+ kfree(dinfo->di_hdentry); -+ au_cache_free_dinfo(dinfo); ++ /* do not call dget_parent() here */ ++ /* note: access d_xxx without d_lock */ ++ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n", ++ bindex, ++ AuDLNPair(dentry->d_parent), AuDLNPair(dentry), ++ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??", ++ dentry->d_count, dentry->d_flags); ++ hn = -1; ++ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) { ++ struct au_iinfo *iinfo = au_ii(dentry->d_inode); ++ if (iinfo) { ++ hn = !!au_hn(iinfo->ii_hinode + bindex); ++ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry; ++ } ++ } ++ do_pri_inode(bindex, dentry->d_inode, hn, wh); ++ return 0; +} + -+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b) ++void au_dpri_dentry(struct dentry *dentry) +{ -+ struct au_hdentry *p; -+ aufs_bindex_t bi; -+ -+ AuRwMustWriteLock(&a->di_rwsem); -+ AuRwMustWriteLock(&b->di_rwsem); -+ -+#define DiSwap(v, name) \ -+ do { \ -+ v = a->di_##name; \ -+ a->di_##name = b->di_##name; \ -+ b->di_##name = v; \ -+ } while (0) ++ struct au_dinfo *dinfo; ++ aufs_bindex_t bindex; ++ int err; ++ struct au_hdentry *hdp; + -+ DiSwap(p, hdentry); -+ DiSwap(bi, bstart); -+ DiSwap(bi, bend); -+ DiSwap(bi, bwh); -+ DiSwap(bi, bdiropq); -+ /* smp_mb(); */ ++ err = do_pri_dentry(-1, dentry); ++ if (err || !au_test_aufs(dentry->d_sb)) ++ return; + -+#undef DiSwap ++ dinfo = au_di(dentry); ++ if (!dinfo) ++ return; ++ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n", ++ dinfo->di_bstart, dinfo->di_bend, ++ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry)); ++ if (dinfo->di_bstart < 0) ++ return; ++ hdp = dinfo->di_hdentry; ++ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++) ++ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry); +} + -+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src) ++static int do_pri_file(aufs_bindex_t bindex, struct file *file) +{ -+ AuRwMustWriteLock(&dst->di_rwsem); -+ AuRwMustWriteLock(&src->di_rwsem); ++ char a[32]; + -+ dst->di_bstart = src->di_bstart; -+ dst->di_bend = src->di_bend; -+ dst->di_bwh = src->di_bwh; -+ dst->di_bdiropq = src->di_bdiropq; -+ /* smp_mb(); */ ++ if (!file || IS_ERR(file)) { ++ dpri("f%d: err %ld\n", bindex, PTR_ERR(file)); ++ return -1; ++ } ++ a[0] = 0; ++ if (bindex < 0 ++ && file->f_dentry ++ && au_test_aufs(file->f_dentry->d_sb) ++ && au_fi(file)) ++ snprintf(a, sizeof(a), ", gen %d, mmapped %d", ++ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped)); ++ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n", ++ bindex, file->f_mode, file->f_flags, (long)file_count(file), ++ file->f_version, file->f_pos, a); ++ if (file->f_dentry) ++ do_pri_dentry(bindex, file->f_dentry); ++ return 0; +} + -+int au_di_init(struct dentry *dentry) ++void au_dpri_file(struct file *file) +{ ++ struct au_finfo *finfo; ++ struct au_fidir *fidir; ++ struct au_hfile *hfile; ++ aufs_bindex_t bindex; + int err; -+ struct super_block *sb; -+ struct au_dinfo *dinfo; + -+ err = 0; -+ sb = dentry->d_sb; -+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD); -+ if (dinfo) { -+ atomic_set(&dinfo->di_generation, au_sigen(sb)); -+ /* smp_mb(); */ /* atomic_set */ -+ dentry->d_fsdata = dinfo; -+ } else -+ err = -ENOMEM; ++ err = do_pri_file(-1, file); ++ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb)) ++ return; + -+ return err; ++ finfo = au_fi(file); ++ if (!finfo) ++ return; ++ if (finfo->fi_btop < 0) ++ return; ++ fidir = finfo->fi_hdir; ++ if (!fidir) ++ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file); ++ else ++ for (bindex = finfo->fi_btop; ++ bindex >= 0 && bindex <= fidir->fd_bbot; ++ bindex++) { ++ hfile = fidir->fd_hfile + bindex; ++ do_pri_file(bindex, hfile ? hfile->hf_file : NULL); ++ } +} + -+void au_di_fin(struct dentry *dentry) ++static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br) +{ -+ struct au_dinfo *dinfo; ++ struct vfsmount *mnt; ++ struct super_block *sb; + -+ dinfo = au_di(dentry); -+ AuRwDestroy(&dinfo->di_rwsem); -+ au_di_free(dinfo); ++ if (!br || IS_ERR(br)) ++ goto out; ++ mnt = au_br_mnt(br); ++ if (!mnt || IS_ERR(mnt)) ++ goto out; ++ sb = mnt->mnt_sb; ++ if (!sb || IS_ERR(sb)) ++ goto out; ++ ++ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, " ++ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, " ++ "xino %d\n", ++ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count), ++ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev), ++ sb->s_flags, sb->s_count, ++ atomic_read(&sb->s_active), !!br->br_xino.xi_file); ++ return 0; ++ ++out: ++ dpri("s%d: err %ld\n", bindex, PTR_ERR(br)); ++ return -1; +} + -+int au_di_realloc(struct au_dinfo *dinfo, int nbr) ++void au_dpri_sb(struct super_block *sb) +{ -+ int err, sz; -+ struct au_hdentry *hdp; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); ++ struct au_sbinfo *sbinfo; ++ aufs_bindex_t bindex; ++ int err; ++ /* to reuduce stack size */ ++ struct { ++ struct vfsmount mnt; ++ struct au_branch fake; ++ } *a; + -+ err = -ENOMEM; -+ sz = sizeof(*hdp) * (dinfo->di_bend + 1); -+ if (!sz) -+ sz = sizeof(*hdp); -+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS); -+ if (hdp) { -+ dinfo->di_hdentry = hdp; -+ err = 0; ++ /* this function can be called from magic sysrq */ ++ a = kzalloc(sizeof(*a), GFP_ATOMIC); ++ if (unlikely(!a)) { ++ dpri("no memory\n"); ++ return; + } + -+ return err; ++ a->mnt.mnt_sb = sb; ++ a->fake.br_perm = 0; ++ a->fake.br_path.mnt = &a->mnt; ++ a->fake.br_xino.xi_file = NULL; ++ atomic_set(&a->fake.br_count, 0); ++ smp_mb(); /* atomic_set */ ++ err = do_pri_br(-1, &a->fake); ++ kfree(a); ++ dpri("dev 0x%x\n", sb->s_dev); ++ if (err || !au_test_aufs(sb)) ++ return; ++ ++ sbinfo = au_sbi(sb); ++ if (!sbinfo) ++ return; ++ dpri("nw %d, gen %u, kobj %d\n", ++ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation, ++ atomic_read(&sbinfo->si_kobj.kref.refcount)); ++ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++) ++ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]); +} + +/* ---------------------------------------------------------------------- */ + -+static void do_ii_write_lock(struct inode *inode, unsigned int lsc) ++void au_dbg_sleep_jiffy(int jiffy) +{ -+ switch (lsc) { -+ case AuLsc_DI_CHILD: -+ ii_write_lock_child(inode); -+ break; -+ case AuLsc_DI_CHILD2: -+ ii_write_lock_child2(inode); -+ break; -+ case AuLsc_DI_CHILD3: -+ ii_write_lock_child3(inode); -+ break; -+ case AuLsc_DI_PARENT: -+ ii_write_lock_parent(inode); -+ break; -+ case AuLsc_DI_PARENT2: -+ ii_write_lock_parent2(inode); -+ break; -+ case AuLsc_DI_PARENT3: -+ ii_write_lock_parent3(inode); -+ break; -+ default: -+ BUG(); -+ } ++ while (jiffy) ++ jiffy = schedule_timeout_uninterruptible(jiffy); +} + -+static void do_ii_read_lock(struct inode *inode, unsigned int lsc) ++void au_dbg_iattr(struct iattr *ia) +{ -+ switch (lsc) { -+ case AuLsc_DI_CHILD: -+ ii_read_lock_child(inode); -+ break; -+ case AuLsc_DI_CHILD2: -+ ii_read_lock_child2(inode); -+ break; -+ case AuLsc_DI_CHILD3: -+ ii_read_lock_child3(inode); -+ break; -+ case AuLsc_DI_PARENT: -+ ii_read_lock_parent(inode); -+ break; -+ case AuLsc_DI_PARENT2: -+ ii_read_lock_parent2(inode); -+ break; -+ case AuLsc_DI_PARENT3: -+ ii_read_lock_parent3(inode); -+ break; -+ default: -+ BUG(); -+ } ++#define AuBit(name) \ ++ do { \ ++ if (ia->ia_valid & ATTR_ ## name) \ ++ dpri(#name "\n"); \ ++ } while (0) ++ AuBit(MODE); ++ AuBit(UID); ++ AuBit(GID); ++ AuBit(SIZE); ++ AuBit(ATIME); ++ AuBit(MTIME); ++ AuBit(CTIME); ++ AuBit(ATIME_SET); ++ AuBit(MTIME_SET); ++ AuBit(FORCE); ++ AuBit(ATTR_FLAG); ++ AuBit(KILL_SUID); ++ AuBit(KILL_SGID); ++ AuBit(FILE); ++ AuBit(KILL_PRIV); ++ AuBit(OPEN); ++ AuBit(TIMES_SET); ++#undef AuBit ++ dpri("ia_file %p\n", ia->ia_file); +} + -+void di_read_lock(struct dentry *d, int flags, unsigned int lsc) -+{ -+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc); -+ if (d->d_inode) { -+ if (au_ftest_lock(flags, IW)) -+ do_ii_write_lock(d->d_inode, lsc); -+ else if (au_ftest_lock(flags, IR)) -+ do_ii_read_lock(d->d_inode, lsc); -+ } -+} ++/* ---------------------------------------------------------------------- */ + -+void di_read_unlock(struct dentry *d, int flags) ++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line) +{ -+ if (d->d_inode) { -+ if (au_ftest_lock(flags, IW)) { -+ au_dbg_verify_dinode(d); -+ ii_write_unlock(d->d_inode); -+ } else if (au_ftest_lock(flags, IR)) { -+ au_dbg_verify_dinode(d); -+ ii_read_unlock(d->d_inode); ++ struct inode *h_inode, *inode = dentry->d_inode; ++ struct dentry *h_dentry; ++ aufs_bindex_t bindex, bend, bi; ++ ++ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */) ++ return; ++ ++ bend = au_dbend(dentry); ++ bi = au_ibend(inode); ++ if (bi < bend) ++ bend = bi; ++ bindex = au_dbstart(dentry); ++ bi = au_ibstart(inode); ++ if (bi > bindex) ++ bindex = bi; ++ ++ for (; bindex <= bend; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; ++ h_inode = au_h_iptr(inode, bindex); ++ if (unlikely(h_inode != h_dentry->d_inode)) { ++ int old = au_debug_test(); ++ if (!old) ++ au_debug(1); ++ AuDbg("b%d, %s:%d\n", bindex, func, line); ++ AuDbgDentry(dentry); ++ AuDbgInode(inode); ++ if (!old) ++ au_debug(0); ++ BUG(); + } + } -+ au_rw_read_unlock(&au_di(d)->di_rwsem); +} + -+void di_downgrade_lock(struct dentry *d, int flags) ++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen) +{ -+ if (d->d_inode && au_ftest_lock(flags, IR)) -+ ii_downgrade_lock(d->d_inode); -+ au_rw_dgrade_lock(&au_di(d)->di_rwsem); -+} ++ struct dentry *parent; + -+void di_write_lock(struct dentry *d, unsigned int lsc) -+{ -+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc); -+ if (d->d_inode) -+ do_ii_write_lock(d->d_inode, lsc); ++ parent = dget_parent(dentry); ++ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); ++ AuDebugOn(IS_ROOT(dentry)); ++ AuDebugOn(au_digen_test(parent, sigen)); ++ dput(parent); +} + -+void di_write_unlock(struct dentry *d) ++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen) +{ -+ au_dbg_verify_dinode(d); -+ if (d->d_inode) -+ ii_write_unlock(d->d_inode); -+ au_rw_write_unlock(&au_di(d)->di_rwsem); ++ struct dentry *parent; ++ struct inode *inode; ++ ++ parent = dget_parent(dentry); ++ inode = dentry->d_inode; ++ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode)); ++ AuDebugOn(au_digen_test(parent, sigen)); ++ dput(parent); +} + -+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir) ++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen) +{ -+ AuDebugOn(d1 == d2 -+ || d1->d_inode == d2->d_inode -+ || d1->d_sb != d2->d_sb); ++ int err, i, j; ++ struct au_dcsub_pages dpages; ++ struct au_dpage *dpage; ++ struct dentry **dentries; + -+ if (isdir && au_test_subdir(d1, d2)) { -+ di_write_lock_child(d1); -+ di_write_lock_child2(d2); -+ } else { -+ /* there should be no races */ -+ di_write_lock_child(d2); -+ di_write_lock_child2(d1); ++ err = au_dpages_init(&dpages, GFP_NOFS); ++ AuDebugOn(err); ++ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1); ++ AuDebugOn(err); ++ for (i = dpages.ndpage - 1; !err && i >= 0; i--) { ++ dpage = dpages.dpages + i; ++ dentries = dpage->dentries; ++ for (j = dpage->ndentry - 1; !err && j >= 0; j--) ++ AuDebugOn(au_digen_test(dentries[j], sigen)); + } ++ au_dpages_free(&dpages); +} + -+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir) ++void au_dbg_verify_kthread(void) +{ -+ AuDebugOn(d1 == d2 -+ || d1->d_inode == d2->d_inode -+ || d1->d_sb != d2->d_sb); -+ -+ if (isdir && au_test_subdir(d1, d2)) { -+ di_write_lock_parent(d1); -+ di_write_lock_parent2(d2); -+ } else { -+ /* there should be no races */ -+ di_write_lock_parent(d2); -+ di_write_lock_parent2(d1); ++ if (au_wkq_test()) { ++ au_dbg_blocked(); ++ /* ++ * It may be recursive, but udba=notify between two aufs mounts, ++ * where a single ro branch is shared, is not a problem. ++ */ ++ /* WARN_ON(1); */ + } +} + -+void di_write_unlock2(struct dentry *d1, struct dentry *d2) ++/* ---------------------------------------------------------------------- */ ++ ++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused) +{ -+ di_write_unlock(d1); -+ if (d1->d_inode == d2->d_inode) -+ au_rw_write_unlock(&au_di(d2)->di_rwsem); -+ else -+ di_write_unlock(d2); ++#ifdef AuForceNoPlink ++ au_opt_clr(sbinfo->si_mntflags, PLINK); ++#endif ++#ifdef AuForceNoXino ++ au_opt_clr(sbinfo->si_mntflags, XINO); ++#endif ++#ifdef AuForceNoRefrof ++ au_opt_clr(sbinfo->si_mntflags, REFROF); ++#endif ++#ifdef AuForceHnotify ++ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY); ++#endif ++#ifdef AuForceRd0 ++ sbinfo->si_rdblk = 0; ++ sbinfo->si_rdhash = 0; ++#endif +} + -+/* ---------------------------------------------------------------------- */ -+ -+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex) ++int __init au_debug_init(void) +{ -+ struct dentry *d; ++ aufs_bindex_t bindex; ++ struct au_vdir_destr destr; + -+ DiMustAnyLock(dentry); ++ bindex = -1; ++ AuDebugOn(bindex >= 0); + -+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) -+ return NULL; -+ AuDebugOn(bindex < 0); -+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry; -+ AuDebugOn(d && d->d_count <= 0); -+ return d; -+} ++ destr.len = -1; ++ AuDebugOn(destr.len < NAME_MAX); ++ ++#ifdef CONFIG_4KSTACKS ++ pr_warn("CONFIG_4KSTACKS is defined.\n"); ++#endif ++ ++#ifdef AuForceNoBrs ++ sysaufs_brs = 0; ++#endif + ++ return 0; ++} +--- /dev/null ++++ linux-3.9/fs/aufs/debug.h 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,242 @@ +/* -+ * extended version of au_h_dptr(). -+ * returns a hashed and positive h_dentry in bindex, NULL, or error. ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ -+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ struct dentry *h_dentry; -+ struct inode *inode, *h_inode; + -+ inode = dentry->d_inode; -+ AuDebugOn(!inode); ++/* ++ * debug print functions ++ */ + -+ h_dentry = NULL; -+ if (au_dbstart(dentry) <= bindex -+ && bindex <= au_dbend(dentry)) -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && !au_d_hashed_positive(h_dentry)) { -+ dget(h_dentry); -+ goto out; /* success */ -+ } ++#ifndef __AUFS_DEBUG_H__ ++#define __AUFS_DEBUG_H__ + -+ AuDebugOn(bindex < au_ibstart(inode)); -+ AuDebugOn(au_ibend(inode) < bindex); -+ h_inode = au_h_iptr(inode, bindex); -+ h_dentry = d_find_alias(h_inode); -+ if (h_dentry) { -+ if (!IS_ERR(h_dentry)) { -+ if (!au_d_hashed_positive(h_dentry)) -+ goto out; /* success */ -+ dput(h_dentry); -+ } else -+ goto out; -+ } ++#ifdef __KERNEL__ + -+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) { -+ h_dentry = au_plink_lkup(inode, bindex); -+ AuDebugOn(!h_dentry); -+ if (!IS_ERR(h_dentry)) { -+ if (!au_d_hashed_positive(h_dentry)) -+ goto out; /* success */ -+ dput(h_dentry); -+ h_dentry = NULL; -+ } -+ } ++#include ++#include ++#include + -+out: -+ AuDbgDentry(h_dentry); -+ return h_dentry; -+} ++#ifdef CONFIG_AUFS_DEBUG ++#define AuDebugOn(a) BUG_ON(a) + -+aufs_bindex_t au_dbtail(struct dentry *dentry) ++/* module parameter */ ++extern int aufs_debug; ++static inline void au_debug(int n) +{ -+ aufs_bindex_t bend, bwh; -+ -+ bend = au_dbend(dentry); -+ if (0 <= bend) { -+ bwh = au_dbwh(dentry); -+ if (!bwh) -+ return bwh; -+ if (0 < bwh && bwh < bend) -+ return bwh - 1; -+ } -+ return bend; ++ aufs_debug = n; ++ smp_mb(); +} + -+aufs_bindex_t au_dbtaildir(struct dentry *dentry) ++static inline int au_debug_test(void) +{ -+ aufs_bindex_t bend, bopq; -+ -+ bend = au_dbtail(dentry); -+ if (0 <= bend) { -+ bopq = au_dbdiropq(dentry); -+ if (0 <= bopq && bopq < bend) -+ bend = bopq; -+ } -+ return bend; ++ return aufs_debug; +} ++#else ++#define AuDebugOn(a) do {} while (0) ++AuStubVoid(au_debug, int n) ++AuStubInt0(au_debug_test, void) ++#endif /* CONFIG_AUFS_DEBUG */ + +/* ---------------------------------------------------------------------- */ + -+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_dentry) -+{ -+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex; -+ struct au_branch *br; ++/* debug print */ + -+ DiMustWriteLock(dentry); ++#define AuDbg(fmt, ...) do { \ ++ if (au_debug_test()) \ ++ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \ ++} while (0) ++#define AuLabel(l) AuDbg(#l "\n") ++#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__) ++#define AuWarn1(fmt, ...) do { \ ++ static unsigned char _c; \ ++ if (!_c++) \ ++ pr_warn(fmt, ##__VA_ARGS__); \ ++} while (0) + -+ au_hdput(hd); -+ hd->hd_dentry = h_dentry; -+ if (h_dentry) { -+ br = au_sbr(dentry->d_sb, bindex); -+ hd->hd_id = br->br_id; -+ } -+} ++#define AuErr1(fmt, ...) do { \ ++ static unsigned char _c; \ ++ if (!_c++) \ ++ pr_err(fmt, ##__VA_ARGS__); \ ++} while (0) + -+int au_dbrange_test(struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bstart, bend; ++#define AuIOErr1(fmt, ...) do { \ ++ static unsigned char _c; \ ++ if (!_c++) \ ++ AuIOErr(fmt, ##__VA_ARGS__); \ ++} while (0) + -+ err = 0; -+ bstart = au_dbstart(dentry); -+ bend = au_dbend(dentry); -+ if (bstart >= 0) -+ AuDebugOn(bend < 0 && bstart > bend); -+ else { -+ err = -EIO; -+ AuDebugOn(bend >= 0); -+ } ++#define AuUnsupportMsg "This operation is not supported." \ ++ " Please report this application to aufs-users ML." ++#define AuUnsupport(fmt, ...) do { \ ++ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \ ++ dump_stack(); \ ++} while (0) + -+ return err; -+} ++#define AuTraceErr(e) do { \ ++ if (unlikely((e) < 0)) \ ++ AuDbg("err %d\n", (int)(e)); \ ++} while (0) + -+int au_digen_test(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; ++#define AuTraceErrPtr(p) do { \ ++ if (IS_ERR(p)) \ ++ AuDbg("err %ld\n", PTR_ERR(p)); \ ++} while (0) + -+ err = 0; -+ if (unlikely(au_digen(dentry) != sigen -+ || au_iigen_test(dentry->d_inode, sigen))) -+ err = -EIO; ++/* dirty macros for debug print, use with "%.*s" and caution */ ++#define AuLNPair(qstr) (qstr)->len, (qstr)->name ++#define AuDLNPair(d) AuLNPair(&(d)->d_name) + -+ return err; -+} ++/* ---------------------------------------------------------------------- */ + -+void au_update_digen(struct dentry *dentry) -+{ -+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb)); -+ /* smp_mb(); */ /* atomic_set */ -+} ++struct au_sbinfo; ++struct au_finfo; ++struct dentry; ++#ifdef CONFIG_AUFS_DEBUG ++extern char *au_plevel; ++struct au_nhash; ++void au_dpri_whlist(struct au_nhash *whlist); ++struct au_vdir; ++void au_dpri_vdir(struct au_vdir *vdir); ++struct inode; ++void au_dpri_inode(struct inode *inode); ++void au_dpri_dalias(struct inode *inode); ++void au_dpri_dentry(struct dentry *dentry); ++struct file; ++void au_dpri_file(struct file *filp); ++struct super_block; ++void au_dpri_sb(struct super_block *sb); + -+void au_update_dbrange(struct dentry *dentry, int do_put_zero) -+{ -+ struct au_dinfo *dinfo; -+ struct dentry *h_d; -+ struct au_hdentry *hdp; ++void au_dbg_sleep_jiffy(int jiffy); ++struct iattr; ++void au_dbg_iattr(struct iattr *ia); + -+ DiMustWriteLock(dentry); ++#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__) ++void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line); ++void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen); ++void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen); ++void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen); ++void au_dbg_verify_kthread(void); + -+ dinfo = au_di(dentry); -+ if (!dinfo || dinfo->di_bstart < 0) -+ return; ++int __init au_debug_init(void); ++void au_debug_sbinfo_init(struct au_sbinfo *sbinfo); ++#define AuDbgWhlist(w) do { \ ++ AuDbg(#w "\n"); \ ++ au_dpri_whlist(w); \ ++} while (0) ++ ++#define AuDbgVdir(v) do { \ ++ AuDbg(#v "\n"); \ ++ au_dpri_vdir(v); \ ++} while (0) ++ ++#define AuDbgInode(i) do { \ ++ AuDbg(#i "\n"); \ ++ au_dpri_inode(i); \ ++} while (0) ++ ++#define AuDbgDAlias(i) do { \ ++ AuDbg(#i "\n"); \ ++ au_dpri_dalias(i); \ ++} while (0) ++ ++#define AuDbgDentry(d) do { \ ++ AuDbg(#d "\n"); \ ++ au_dpri_dentry(d); \ ++} while (0) ++ ++#define AuDbgFile(f) do { \ ++ AuDbg(#f "\n"); \ ++ au_dpri_file(f); \ ++} while (0) ++ ++#define AuDbgSb(sb) do { \ ++ AuDbg(#sb "\n"); \ ++ au_dpri_sb(sb); \ ++} while (0) + -+ hdp = dinfo->di_hdentry; -+ if (do_put_zero) { -+ aufs_bindex_t bindex, bend; ++#define AuDbgSleep(sec) do { \ ++ AuDbg("sleep %d sec\n", sec); \ ++ ssleep(sec); \ ++} while (0) + -+ bend = dinfo->di_bend; -+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) { -+ h_d = hdp[0 + bindex].hd_dentry; -+ if (h_d && !h_d->d_inode) -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+ } ++#define AuDbgSleepJiffy(jiffy) do { \ ++ AuDbg("sleep %d jiffies\n", jiffy); \ ++ au_dbg_sleep_jiffy(jiffy); \ ++} while (0) + -+ dinfo->di_bstart = -1; -+ while (++dinfo->di_bstart <= dinfo->di_bend) -+ if (hdp[0 + dinfo->di_bstart].hd_dentry) -+ break; -+ if (dinfo->di_bstart > dinfo->di_bend) { -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ return; -+ } ++#define AuDbgIAttr(ia) do { \ ++ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \ ++ au_dbg_iattr(ia); \ ++} while (0) + -+ dinfo->di_bend++; -+ while (0 <= --dinfo->di_bend) -+ if (hdp[0 + dinfo->di_bend].hd_dentry) -+ break; -+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0); -+} ++#define AuDbgSym(addr) do { \ ++ char sym[KSYM_SYMBOL_LEN]; \ ++ sprint_symbol(sym, (unsigned long)addr); \ ++ AuDbg("%s\n", sym); \ ++} while (0) + -+void au_update_dbstart(struct dentry *dentry) -+{ -+ aufs_bindex_t bindex, bend; -+ struct dentry *h_dentry; ++#define AuInfoSym(addr) do { \ ++ char sym[KSYM_SYMBOL_LEN]; \ ++ sprint_symbol(sym, (unsigned long)addr); \ ++ AuInfo("%s\n", sym); \ ++} while (0) ++#else ++AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry) ++AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen) ++AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry, ++ unsigned int sigen) ++AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen) ++AuStubVoid(au_dbg_verify_kthread, void) ++AuStubInt0(__init au_debug_init, void) ++AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo) + -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ if (h_dentry->d_inode) { -+ au_set_dbstart(dentry, bindex); -+ return; -+ } -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+} ++#define AuDbgWhlist(w) do {} while (0) ++#define AuDbgVdir(v) do {} while (0) ++#define AuDbgInode(i) do {} while (0) ++#define AuDbgDAlias(i) do {} while (0) ++#define AuDbgDentry(d) do {} while (0) ++#define AuDbgFile(f) do {} while (0) ++#define AuDbgSb(sb) do {} while (0) ++#define AuDbgSleep(sec) do {} while (0) ++#define AuDbgSleepJiffy(jiffy) do {} while (0) ++#define AuDbgIAttr(ia) do {} while (0) ++#define AuDbgSym(addr) do {} while (0) ++#define AuInfoSym(addr) do {} while (0) ++#endif /* CONFIG_AUFS_DEBUG */ + -+void au_update_dbend(struct dentry *dentry) -+{ -+ aufs_bindex_t bindex, bstart; -+ struct dentry *h_dentry; ++/* ---------------------------------------------------------------------- */ + -+ bstart = au_dbstart(dentry); -+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ if (h_dentry->d_inode) { -+ au_set_dbend(dentry, bindex); -+ return; -+ } -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+} ++#ifdef CONFIG_AUFS_MAGIC_SYSRQ ++int __init au_sysrq_init(void); ++void au_sysrq_fin(void); + -+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry) -+{ -+ aufs_bindex_t bindex, bend; ++#ifdef CONFIG_HW_CONSOLE ++#define au_dbg_blocked() do { \ ++ WARN_ON(1); \ ++ handle_sysrq('w'); \ ++} while (0) ++#else ++AuStubVoid(au_dbg_blocked, void) ++#endif + -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) -+ if (au_h_dptr(dentry, bindex) == h_dentry) -+ return bindex; -+ return -1; -+} ++#else ++AuStubInt0(__init au_sysrq_init, void) ++AuStubVoid(au_sysrq_fin, void) ++AuStubVoid(au_dbg_blocked, void) ++#endif /* CONFIG_AUFS_MAGIC_SYSRQ */ ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DEBUG_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/dir.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,633 @@ ++++ linux-3.9/fs/aufs/dentry.c 2013-05-13 17:13:17.210464023 +0200 +@@ -0,0 +1,1065 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -7396,765 +6123,1294 @@ + */ + +/* -+ * directory operations ++ * lookup and dentry operations + */ + -+#include ++#include +#include "aufs.h" + -+void au_add_nlink(struct inode *dir, struct inode *h_dir) -+{ -+ unsigned int nlink; -+ -+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); ++#define AuLkup_ALLOW_NEG 1 ++#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name) ++#define au_fset_lkup(flags, name) \ ++ do { (flags) |= AuLkup_##name; } while (0) ++#define au_fclr_lkup(flags, name) \ ++ do { (flags) &= ~AuLkup_##name; } while (0) + -+ nlink = dir->i_nlink; -+ nlink += h_dir->i_nlink - 2; -+ if (h_dir->i_nlink < 2) -+ nlink += 2; -+ /* 0 can happen in revaliding */ -+ set_nlink(dir, nlink); -+} ++struct au_do_lookup_args { ++ unsigned int flags; ++ mode_t type; ++}; + -+void au_sub_nlink(struct inode *dir, struct inode *h_dir) ++/* ++ * returns positive/negative dentry, NULL or an error. ++ * NULL means whiteout-ed or not-found. ++ */ ++static struct dentry* ++au_do_lookup(struct dentry *h_parent, struct dentry *dentry, ++ aufs_bindex_t bindex, struct qstr *wh_name, ++ struct au_do_lookup_args *args) +{ -+ unsigned int nlink; ++ struct dentry *h_dentry; ++ struct inode *h_inode, *inode; ++ struct au_branch *br; ++ int wh_found, opq; ++ unsigned char wh_able; ++ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG); + -+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); ++ wh_found = 0; ++ br = au_sbr(dentry->d_sb, bindex); ++ wh_able = !!au_br_whable(br->br_perm); ++ if (wh_able) ++ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0); ++ h_dentry = ERR_PTR(wh_found); ++ if (!wh_found) ++ goto real_lookup; ++ if (unlikely(wh_found < 0)) ++ goto out; + -+ nlink = dir->i_nlink; -+ nlink -= h_dir->i_nlink - 2; -+ if (h_dir->i_nlink < 2) -+ nlink -= 2; -+ /* nlink == 0 means the branch-fs is broken */ -+ set_nlink(dir, nlink); -+} ++ /* We found a whiteout */ ++ /* au_set_dbend(dentry, bindex); */ ++ au_set_dbwh(dentry, bindex); ++ if (!allow_neg) ++ return NULL; /* success */ + -+loff_t au_dir_size(struct file *file, struct dentry *dentry) -+{ -+ loff_t sz; -+ aufs_bindex_t bindex, bend; -+ struct file *h_file; -+ struct dentry *h_dentry; ++real_lookup: ++ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent); ++ if (IS_ERR(h_dentry)) ++ goto out; + -+ sz = 0; -+ if (file) { -+ AuDebugOn(!file->f_dentry); -+ AuDebugOn(!file->f_dentry->d_inode); -+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode)); ++ h_inode = h_dentry->d_inode; ++ if (!h_inode) { ++ if (!allow_neg) ++ goto out_neg; ++ } else if (wh_found ++ || (args->type && args->type != (h_inode->i_mode & S_IFMT))) ++ goto out_neg; + -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); -+ bindex <= bend && sz < KMALLOC_MAX_SIZE; -+ bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (h_file -+ && h_file->f_dentry -+ && h_file->f_dentry->d_inode) -+ sz += i_size_read(h_file->f_dentry->d_inode); -+ } -+ } else { -+ AuDebugOn(!dentry); -+ AuDebugOn(!dentry->d_inode); -+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); ++ if (au_dbend(dentry) <= bindex) ++ au_set_dbend(dentry, bindex); ++ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) ++ au_set_dbstart(dentry, bindex); ++ au_set_h_dptr(dentry, bindex, h_dentry); + -+ bend = au_dbtaildir(dentry); -+ for (bindex = au_dbstart(dentry); -+ bindex <= bend && sz < KMALLOC_MAX_SIZE; -+ bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) -+ sz += i_size_read(h_dentry->d_inode); -+ } -+ } -+ if (sz < KMALLOC_MAX_SIZE) -+ sz = roundup_pow_of_two(sz); -+ if (sz > KMALLOC_MAX_SIZE) -+ sz = KMALLOC_MAX_SIZE; -+ else if (sz < NAME_MAX) { -+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX); -+ sz = AUFS_RDBLK_DEF; ++ inode = dentry->d_inode; ++ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able ++ || (inode && !S_ISDIR(inode->i_mode))) ++ goto out; /* success */ ++ ++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); ++ opq = au_diropq_test(h_dentry, br); ++ mutex_unlock(&h_inode->i_mutex); ++ if (opq > 0) ++ au_set_dbdiropq(dentry, bindex); ++ else if (unlikely(opq < 0)) { ++ au_set_h_dptr(dentry, bindex, NULL); ++ h_dentry = ERR_PTR(opq); + } -+ return sz; ++ goto out; ++ ++out_neg: ++ dput(h_dentry); ++ h_dentry = NULL; ++out: ++ return h_dentry; +} + -+/* ---------------------------------------------------------------------- */ ++static int au_test_shwh(struct super_block *sb, const struct qstr *name) ++{ ++ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH) ++ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) ++ return -EPERM; ++ return 0; ++} + -+static int reopen_dir(struct file *file) ++/* ++ * returns the number of lower positive dentries, ++ * otherwise an error. ++ * can be called at unlinking with @type is zero. ++ */ ++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type) +{ -+ int err; -+ unsigned int flags; -+ aufs_bindex_t bindex, btail, bstart; -+ struct dentry *dentry, *h_dentry; -+ struct file *h_file; ++ int npositive, err; ++ aufs_bindex_t bindex, btail, bdiropq; ++ unsigned char isdir; ++ struct qstr whname; ++ struct au_do_lookup_args args = { ++ .flags = 0, ++ .type = type ++ }; ++ const struct qstr *name = &dentry->d_name; ++ struct dentry *parent; ++ struct inode *inode; + -+ /* open all lower dirs */ -+ dentry = file->f_dentry; -+ bstart = au_dbstart(dentry); -+ for (bindex = au_fbstart(file); bindex < bstart; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbstart(file, bstart); ++ err = au_test_shwh(dentry->d_sb, name); ++ if (unlikely(err)) ++ goto out; + -+ btail = au_dbtaildir(dentry); -+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbend_dir(file, btail); ++ err = au_wh_name_alloc(&whname, name); ++ if (unlikely(err)) ++ goto out; + -+ flags = vfsub_file_flags(file); ++ inode = dentry->d_inode; ++ isdir = !!(inode && S_ISDIR(inode->i_mode)); ++ if (!type) ++ au_fset_lkup(args.flags, ALLOW_NEG); ++ ++ npositive = 0; ++ parent = dget_parent(dentry); ++ btail = au_dbtaildir(parent); + for (bindex = bstart; bindex <= btail; bindex++) { ++ struct dentry *h_parent, *h_dentry; ++ struct inode *h_inode, *h_dir; ++ + h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry) { ++ if (h_dentry->d_inode) ++ npositive++; ++ if (type != S_IFDIR) ++ break; ++ continue; ++ } ++ h_parent = au_h_dptr(parent, bindex); ++ if (!h_parent) ++ continue; ++ h_dir = h_parent->d_inode; ++ if (!h_dir || !S_ISDIR(h_dir->i_mode)) ++ continue; ++ ++ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); ++ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname, ++ &args); ++ mutex_unlock(&h_dir->i_mutex); ++ err = PTR_ERR(h_dentry); ++ if (IS_ERR(h_dentry)) ++ goto out_parent; ++ au_fclr_lkup(args.flags, ALLOW_NEG); ++ ++ if (au_dbwh(dentry) >= 0) ++ break; + if (!h_dentry) + continue; -+ h_file = au_hf_dir(file, bindex); -+ if (h_file) ++ h_inode = h_dentry->d_inode; ++ if (!h_inode) + continue; ++ npositive++; ++ if (!args.type) ++ args.type = h_inode->i_mode & S_IFMT; ++ if (args.type != S_IFDIR) ++ break; ++ else if (isdir) { ++ /* the type of lower may be different */ ++ bdiropq = au_dbdiropq(dentry); ++ if (bdiropq >= 0 && bdiropq <= bindex) ++ break; ++ } ++ } + -+ h_file = au_h_open(dentry, bindex, flags, file); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; /* close all? */ -+ au_set_h_fptr(file, bindex, h_file); ++ if (npositive) { ++ AuLabel(positive); ++ au_update_dbstart(dentry); ++ } ++ err = npositive; ++ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE) ++ && au_dbstart(dentry) < 0)) { ++ err = -EIO; ++ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n", ++ AuDLNPair(dentry), err); + } -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ err = 0; + ++out_parent: ++ dput(parent); ++ kfree(whname.name); +out: + return err; +} + -+static int do_open_dir(struct file *file, int flags) ++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent, ++ struct au_branch *br) +{ -+ int err; -+ aufs_bindex_t bindex, btail; -+ struct dentry *dentry, *h_dentry; -+ struct file *h_file; ++ struct dentry *dentry; ++ int wkq_err; + -+ FiMustWriteLock(file); ++ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC)) ++ dentry = vfsub_lkup_one(name, parent); ++ else { ++ struct vfsub_lkup_one_args args = { ++ .errp = &dentry, ++ .name = name, ++ .parent = parent ++ }; + -+ dentry = file->f_dentry; -+ err = au_alive_dir(dentry); -+ if (unlikely(err)) -+ goto out; ++ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args); ++ if (unlikely(wkq_err)) ++ dentry = ERR_PTR(wkq_err); ++ } + -+ file->f_version = dentry->d_inode->i_version; -+ bindex = au_dbstart(dentry); -+ au_set_fbstart(file, bindex); -+ btail = au_dbtaildir(dentry); -+ au_set_fbend_dir(file, btail); -+ for (; !err && bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; ++ return dentry; ++} + -+ h_file = au_h_open(dentry, bindex, flags, file); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ break; -+ } -+ au_set_h_fptr(file, bindex, h_file); ++/* ++ * lookup @dentry on @bindex which should be negative. ++ */ ++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh) ++{ ++ int err; ++ struct dentry *parent, *h_parent, *h_dentry; ++ struct au_branch *br; ++ ++ parent = dget_parent(dentry); ++ h_parent = au_h_dptr(parent, bindex); ++ br = au_sbr(dentry->d_sb, bindex); ++ if (wh) ++ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name); ++ else ++ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent, br); ++ err = PTR_ERR(h_dentry); ++ if (IS_ERR(h_dentry)) ++ goto out; ++ if (unlikely(h_dentry->d_inode)) { ++ err = -EIO; ++ AuIOErr("%.*s should be negative on b%d.\n", ++ AuDLNPair(h_dentry), bindex); ++ dput(h_dentry); ++ goto out; + } -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ if (!err) -+ return 0; /* success */ + -+ /* close all */ -+ for (bindex = au_fbstart(file); bindex <= btail; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbstart(file, -1); -+ au_set_fbend_dir(file, -1); ++ err = 0; ++ if (bindex < au_dbstart(dentry)) ++ au_set_dbstart(dentry, bindex); ++ if (au_dbend(dentry) < bindex) ++ au_set_dbend(dentry, bindex); ++ au_set_h_dptr(dentry, bindex, h_dentry); + +out: ++ dput(parent); + return err; +} + -+static int aufs_open_dir(struct inode *inode __maybe_unused, -+ struct file *file) ++/* ---------------------------------------------------------------------- */ ++ ++/* subset of struct inode */ ++struct au_iattr { ++ unsigned long i_ino; ++ /* unsigned int i_nlink; */ ++ kuid_t i_uid; ++ kgid_t i_gid; ++ u64 i_version; ++/* ++ loff_t i_size; ++ blkcnt_t i_blocks; ++*/ ++ umode_t i_mode; ++}; ++ ++static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode) ++{ ++ ia->i_ino = h_inode->i_ino; ++ /* ia->i_nlink = h_inode->i_nlink; */ ++ ia->i_uid = h_inode->i_uid; ++ ia->i_gid = h_inode->i_gid; ++ ia->i_version = h_inode->i_version; ++/* ++ ia->i_size = h_inode->i_size; ++ ia->i_blocks = h_inode->i_blocks; ++*/ ++ ia->i_mode = (h_inode->i_mode & S_IFMT); ++} ++ ++static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode) ++{ ++ return ia->i_ino != h_inode->i_ino ++ /* || ia->i_nlink != h_inode->i_nlink */ ++ || !uid_eq(ia->i_uid, h_inode->i_uid) ++ || !gid_eq(ia->i_gid, h_inode->i_gid) ++ || ia->i_version != h_inode->i_version ++/* ++ || ia->i_size != h_inode->i_size ++ || ia->i_blocks != h_inode->i_blocks ++*/ ++ || ia->i_mode != (h_inode->i_mode & S_IFMT); ++} ++ ++static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent, ++ struct au_branch *br) +{ + int err; -+ struct super_block *sb; -+ struct au_fidir *fidir; ++ struct au_iattr ia; ++ struct inode *h_inode; ++ struct dentry *h_d; ++ struct super_block *h_sb; + -+ err = -ENOMEM; -+ sb = file->f_dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ fidir = au_fidir_alloc(sb); -+ if (fidir) { -+ err = au_do_open(file, do_open_dir, fidir); -+ if (unlikely(err)) -+ kfree(fidir); -+ } -+ si_read_unlock(sb); -+ return err; -+} ++ err = 0; ++ memset(&ia, -1, sizeof(ia)); ++ h_sb = h_dentry->d_sb; ++ h_inode = h_dentry->d_inode; ++ if (h_inode) ++ au_iattr_save(&ia, h_inode); ++ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb)) ++ /* nfs d_revalidate may return 0 for negative dentry */ ++ /* fuse d_revalidate always return 0 for negative dentry */ ++ goto out; + -+static int aufs_release_dir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ struct au_vdir *vdir_cache; -+ struct au_finfo *finfo; -+ struct au_fidir *fidir; -+ aufs_bindex_t bindex, bend; ++ /* main purpose is namei.c:cached_lookup() and d_revalidate */ ++ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent); ++ err = PTR_ERR(h_d); ++ if (IS_ERR(h_d)) ++ goto out; + -+ finfo = au_fi(file); -+ fidir = finfo->fi_hdir; -+ if (fidir) { -+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */ -+ if (vdir_cache) -+ au_vdir_free(vdir_cache); ++ err = 0; ++ if (unlikely(h_d != h_dentry ++ || h_d->d_inode != h_inode ++ || (h_inode && au_iattr_test(&ia, h_inode)))) ++ err = au_busy_or_stale(); ++ dput(h_d); + -+ bindex = finfo->fi_btop; -+ if (bindex >= 0) { -+ /* -+ * calls fput() instead of filp_close(), -+ * since no dnotify or lock for the lower file. -+ */ -+ bend = fidir->fd_bbot; -+ for (; bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ } -+ kfree(fidir); -+ finfo->fi_hdir = NULL; -+ } -+ au_finfo_fin(file); -+ return 0; ++out: ++ AuTraceErr(err); ++ return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_flush_dir(struct file *file, fl_owner_t id) ++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, ++ struct dentry *h_parent, struct au_branch *br) +{ + int err; -+ aufs_bindex_t bindex, bend; -+ struct file *h_file; + + err = 0; -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (h_file) -+ err = vfsub_flush(h_file, id); -+ } -+ return err; -+} ++ if (udba == AuOpt_UDBA_REVAL ++ && !au_test_fs_remote(h_dentry->d_sb)) { ++ IMustLock(h_dir); ++ err = (h_dentry->d_parent->d_inode != h_dir); ++ } else if (udba != AuOpt_UDBA_NONE) ++ err = au_h_verify_dentry(h_dentry, h_parent, br); + -+static int aufs_flush_dir(struct file *file, fl_owner_t id) -+{ -+ return au_do_flush(file, id, au_do_flush_dir); ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync) ++static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent) +{ + int err; -+ aufs_bindex_t bend, bindex; -+ struct inode *inode; ++ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq; ++ struct au_hdentry tmp, *p, *q; ++ struct au_dinfo *dinfo; + struct super_block *sb; + -+ err = 0; ++ DiMustWriteLock(dentry); ++ + sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) { -+ struct path h_path; ++ dinfo = au_di(dentry); ++ bend = dinfo->di_bend; ++ bwh = dinfo->di_bwh; ++ bdiropq = dinfo->di_bdiropq; ++ p = dinfo->di_hdentry + dinfo->di_bstart; ++ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) { ++ if (!p->hd_dentry) ++ continue; + -+ if (au_test_ro(sb, bindex, inode)) ++ new_bindex = au_br_index(sb, p->hd_id); ++ if (new_bindex == bindex) + continue; -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ if (!h_path.dentry) ++ ++ if (dinfo->di_bwh == bindex) ++ bwh = new_bindex; ++ if (dinfo->di_bdiropq == bindex) ++ bdiropq = new_bindex; ++ if (new_bindex < 0) { ++ au_hdput(p); ++ p->hd_dentry = NULL; + continue; ++ } + -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_fsync(NULL, &h_path, datasync); ++ /* swap two lower dentries, and loop again */ ++ q = dinfo->di_hdentry + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hd_dentry) { ++ bindex--; ++ p--; ++ } ++ } ++ ++ dinfo->di_bwh = -1; ++ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh)) ++ dinfo->di_bwh = bwh; ++ ++ dinfo->di_bdiropq = -1; ++ if (bdiropq >= 0 ++ && bdiropq <= au_sbend(sb) ++ && au_sbr_whable(sb, bdiropq)) ++ dinfo->di_bdiropq = bdiropq; ++ ++ err = -EIO; ++ dinfo->di_bstart = -1; ++ dinfo->di_bend = -1; ++ bend = au_dbend(parent); ++ p = dinfo->di_hdentry; ++ for (bindex = 0; bindex <= bend; bindex++, p++) ++ if (p->hd_dentry) { ++ dinfo->di_bstart = bindex; ++ break; ++ } ++ ++ if (dinfo->di_bstart >= 0) { ++ p = dinfo->di_hdentry + bend; ++ for (bindex = bend; bindex >= 0; bindex--, p--) ++ if (p->hd_dentry) { ++ dinfo->di_bend = bindex; ++ err = 0; ++ break; ++ } + } + + return err; +} + -+static int au_do_fsync_dir(struct file *file, int datasync) ++static void au_do_hide(struct dentry *dentry) +{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct file *h_file; -+ struct super_block *sb; + struct inode *inode; + -+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); ++ inode = dentry->d_inode; ++ if (inode) { ++ if (!S_ISDIR(inode->i_mode)) { ++ if (inode->i_nlink && !d_unhashed(dentry)) ++ drop_nlink(inode); ++ } else { ++ clear_nlink(inode); ++ /* stop next lookup */ ++ inode->i_flags |= S_DEAD; ++ } ++ smp_mb(); /* necessary? */ ++ } ++ d_drop(dentry); ++} ++ ++static int au_hide_children(struct dentry *parent) ++{ ++ int err, i, j, ndentry; ++ struct au_dcsub_pages dpages; ++ struct au_dpage *dpage; ++ struct dentry *dentry; ++ ++ err = au_dpages_init(&dpages, GFP_NOFS); + if (unlikely(err)) + goto out; ++ err = au_dcsub_pages(&dpages, parent, NULL, NULL); ++ if (unlikely(err)) ++ goto out_dpages; + -+ sb = file->f_dentry->d_sb; -+ inode = file->f_dentry->d_inode; -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (!h_file || au_test_ro(sb, bindex, inode)) -+ continue; -+ -+ err = vfsub_fsync(h_file, &h_file->f_path, datasync); ++ /* in reverse order */ ++ for (i = dpages.ndpage - 1; i >= 0; i--) { ++ dpage = dpages.dpages + i; ++ ndentry = dpage->ndentry; ++ for (j = ndentry - 1; j >= 0; j--) { ++ dentry = dpage->dentries[j]; ++ if (dentry != parent) ++ au_do_hide(dentry); ++ } + } + ++out_dpages: ++ au_dpages_free(&dpages); +out: + return err; +} + ++static void au_hide(struct dentry *dentry) ++{ ++ int err; ++ struct inode *inode; ++ ++ AuDbgDentry(dentry); ++ inode = dentry->d_inode; ++ if (inode && S_ISDIR(inode->i_mode)) { ++ /* shrink_dcache_parent(dentry); */ ++ err = au_hide_children(dentry); ++ if (unlikely(err)) ++ AuIOErr("%.*s, failed hiding children, ignored %d\n", ++ AuDLNPair(dentry), err); ++ } ++ au_do_hide(dentry); ++} ++ +/* -+ * @file may be NULL ++ * By adding a dirty branch, a cached dentry may be affected in various ways. ++ * ++ * a dirty branch is added ++ * - on the top of layers ++ * - in the middle of layers ++ * - to the bottom of layers ++ * ++ * on the added branch there exists ++ * - a whiteout ++ * - a diropq ++ * - a same named entry ++ * + exist ++ * * negative --> positive ++ * * positive --> positive ++ * - type is unchanged ++ * - type is changed ++ * + doesn't exist ++ * * negative --> negative ++ * * positive --> negative (rejected by au_br_del() for non-dir case) ++ * - none + */ -+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end, -+ int datasync) ++static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo, ++ struct au_dinfo *tmp) +{ + int err; -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct mutex *mtx; ++ aufs_bindex_t bindex, bend; ++ struct { ++ struct dentry *dentry; ++ struct inode *inode; ++ mode_t mode; ++ } orig_h, tmp_h; ++ struct au_hdentry *hd; ++ struct inode *inode, *h_inode; ++ struct dentry *h_dentry; + + err = 0; -+ dentry = file->f_dentry; -+ mtx = &dentry->d_inode->i_mutex; -+ mutex_lock(mtx); -+ sb = dentry->d_sb; -+ si_noflush_read_lock(sb); -+ if (file) -+ err = au_do_fsync_dir(file, datasync); -+ else { -+ di_write_lock_child(dentry); -+ err = au_do_fsync_dir_no_file(dentry, datasync); ++ AuDebugOn(dinfo->di_bstart < 0); ++ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry; ++ orig_h.inode = orig_h.dentry->d_inode; ++ orig_h.mode = 0; ++ if (orig_h.inode) ++ orig_h.mode = orig_h.inode->i_mode & S_IFMT; ++ memset(&tmp_h, 0, sizeof(tmp_h)); ++ if (tmp->di_bstart >= 0) { ++ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry; ++ tmp_h.inode = tmp_h.dentry->d_inode; ++ if (tmp_h.inode) ++ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT; + } -+ au_cpup_attr_timesizes(dentry->d_inode); -+ di_write_unlock(dentry); -+ if (file) -+ fi_write_unlock(file); + -+ si_read_unlock(sb); -+ mutex_unlock(mtx); ++ inode = dentry->d_inode; ++ if (!orig_h.inode) { ++ AuDbg("nagative originally\n"); ++ if (inode) { ++ au_hide(dentry); ++ goto out; ++ } ++ AuDebugOn(inode); ++ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); ++ AuDebugOn(dinfo->di_bdiropq != -1); ++ ++ if (!tmp_h.inode) { ++ AuDbg("negative --> negative\n"); ++ /* should have only one negative lower */ ++ if (tmp->di_bstart >= 0 ++ && tmp->di_bstart < dinfo->di_bstart) { ++ AuDebugOn(tmp->di_bstart != tmp->di_bend); ++ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); ++ au_set_h_dptr(dentry, dinfo->di_bstart, NULL); ++ au_di_cp(dinfo, tmp); ++ hd = tmp->di_hdentry + tmp->di_bstart; ++ au_set_h_dptr(dentry, tmp->di_bstart, ++ dget(hd->hd_dentry)); ++ } ++ au_dbg_verify_dinode(dentry); ++ } else { ++ AuDbg("negative --> positive\n"); ++ /* ++ * similar to the behaviour of creating with bypassing ++ * aufs. ++ * unhash it in order to force an error in the ++ * succeeding create operation. ++ * we should not set S_DEAD here. ++ */ ++ d_drop(dentry); ++ /* au_di_swap(tmp, dinfo); */ ++ au_dbg_verify_dinode(dentry); ++ } ++ } else { ++ AuDbg("positive originally\n"); ++ /* inode may be NULL */ ++ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode); ++ if (!tmp_h.inode) { ++ AuDbg("positive --> negative\n"); ++ /* or bypassing aufs */ ++ au_hide(dentry); ++ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart) ++ dinfo->di_bwh = tmp->di_bwh; ++ if (inode) ++ err = au_refresh_hinode_self(inode); ++ au_dbg_verify_dinode(dentry); ++ } else if (orig_h.mode == tmp_h.mode) { ++ AuDbg("positive --> positive, same type\n"); ++ if (!S_ISDIR(orig_h.mode) ++ && dinfo->di_bstart > tmp->di_bstart) { ++ /* ++ * similar to the behaviour of removing and ++ * creating. ++ */ ++ au_hide(dentry); ++ if (inode) ++ err = au_refresh_hinode_self(inode); ++ au_dbg_verify_dinode(dentry); ++ } else { ++ /* fill empty slots */ ++ if (dinfo->di_bstart > tmp->di_bstart) ++ dinfo->di_bstart = tmp->di_bstart; ++ if (dinfo->di_bend < tmp->di_bend) ++ dinfo->di_bend = tmp->di_bend; ++ dinfo->di_bwh = tmp->di_bwh; ++ dinfo->di_bdiropq = tmp->di_bdiropq; ++ hd = tmp->di_hdentry; ++ bend = dinfo->di_bend; ++ for (bindex = tmp->di_bstart; bindex <= bend; ++ bindex++) { ++ if (au_h_dptr(dentry, bindex)) ++ continue; ++ h_dentry = hd[bindex].hd_dentry; ++ if (!h_dentry) ++ continue; ++ h_inode = h_dentry->d_inode; ++ AuDebugOn(!h_inode); ++ AuDebugOn(orig_h.mode ++ != (h_inode->i_mode ++ & S_IFMT)); ++ au_set_h_dptr(dentry, bindex, ++ dget(h_dentry)); ++ } ++ err = au_refresh_hinode(inode, dentry); ++ au_dbg_verify_dinode(dentry); ++ } ++ } else { ++ AuDbg("positive --> positive, different type\n"); ++ /* similar to the behaviour of removing and creating */ ++ au_hide(dentry); ++ if (inode) ++ err = au_refresh_hinode_self(inode); ++ au_dbg_verify_dinode(dentry); ++ } ++ } ++ ++out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir) ++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent) +{ -+ int err; -+ struct dentry *dentry; -+ struct inode *inode, *h_inode; ++ int err, ebrange; ++ unsigned int sigen; ++ struct au_dinfo *dinfo, *tmp; + struct super_block *sb; ++ struct inode *inode; + -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ IMustLock(inode); ++ DiMustWriteLock(dentry); ++ AuDebugOn(IS_ROOT(dentry)); ++ AuDebugOn(!parent->d_inode); + + sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); ++ inode = dentry->d_inode; ++ sigen = au_sigen(sb); ++ err = au_digen_test(parent, sigen); + if (unlikely(err)) + goto out; -+ err = au_alive_dir(dentry); -+ if (!err) -+ err = au_vdir_init(file); -+ di_downgrade_lock(dentry, AuLock_IR); ++ ++ dinfo = au_di(dentry); ++ err = au_di_realloc(dinfo, au_sbend(sb) + 1); + if (unlikely(err)) -+ goto out_unlock; ++ goto out; ++ ebrange = au_dbrange_test(dentry); ++ if (!ebrange) ++ ebrange = au_do_refresh_hdentry(dentry, parent); + -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ if (!au_test_nfsd()) { -+ err = au_vdir_fill_de(file, dirent, filldir); -+ fsstack_copy_attr_atime(inode, h_inode); -+ } else { -+ /* -+ * nfsd filldir may call lookup_one_len(), vfs_getattr(), -+ * encode_fh() and others. -+ */ -+ atomic_inc(&h_inode->i_count); -+ di_read_unlock(dentry, AuLock_IR); -+ si_read_unlock(sb); -+ err = au_vdir_fill_de(file, dirent, filldir); -+ fsstack_copy_attr_atime(inode, h_inode); -+ fi_write_unlock(file); -+ iput(h_inode); ++ if (d_unhashed(dentry) || ebrange) { ++ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0); ++ if (inode) ++ err = au_refresh_hinode_self(inode); ++ au_dbg_verify_dinode(dentry); ++ if (!err) ++ goto out_dgen; /* success */ ++ goto out; ++ } + ++ /* temporary dinfo */ ++ AuDbgDentry(dentry); ++ err = -ENOMEM; ++ tmp = au_di_alloc(sb, AuLsc_DI_TMP); ++ if (unlikely(!tmp)) ++ goto out; ++ au_di_swap(tmp, dinfo); ++ /* returns the number of positive dentries */ ++ /* ++ * if current working dir is removed, it returns an error. ++ * but the dentry is legal. ++ */ ++ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0); ++ AuDbgDentry(dentry); ++ au_di_swap(tmp, dinfo); ++ if (err == -ENOENT) ++ err = 0; ++ if (err >= 0) { ++ /* compare/refresh by dinfo */ ++ AuDbgDentry(dentry); ++ err = au_refresh_by_dinfo(dentry, dinfo, tmp); ++ au_dbg_verify_dinode(dentry); + AuTraceErr(err); -+ return err; + } ++ au_rw_write_unlock(&tmp->di_rwsem); ++ au_di_free(tmp); ++ if (unlikely(err)) ++ goto out; + -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); ++out_dgen: ++ au_update_digen(dentry); +out: -+ si_read_unlock(sb); ++ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) { ++ AuIOErr("failed refreshing %.*s, %d\n", ++ AuDLNPair(dentry), err); ++ AuDbgDentry(dentry); ++ } ++ AuTraceErr(err); + return err; +} + -+/* ---------------------------------------------------------------------- */ ++static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags, ++ struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ int err, valid; + -+#define AuTestEmpty_WHONLY 1 -+#define AuTestEmpty_CALLED (1 << 1) -+#define AuTestEmpty_SHWH (1 << 2) -+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name) -+#define au_fset_testempty(flags, name) \ -+ do { (flags) |= AuTestEmpty_##name; } while (0) -+#define au_fclr_testempty(flags, name) \ -+ do { (flags) &= ~AuTestEmpty_##name; } while (0) ++ err = 0; ++ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE)) ++ goto out; + -+#ifndef CONFIG_AUFS_SHWH -+#undef AuTestEmpty_SHWH -+#define AuTestEmpty_SHWH 0 -+#endif ++ AuDbg("b%d\n", bindex); ++ /* ++ * gave up supporting LOOKUP_CREATE/OPEN for lower fs, ++ * due to whiteout and branch permission. ++ */ ++ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE ++ | LOOKUP_FOLLOW | LOOKUP_EXCL); ++ /* it may return tri-state */ ++ valid = h_dentry->d_op->d_revalidate(h_dentry, flags); + -+struct test_empty_arg { -+ struct au_nhash *whlist; -+ unsigned int flags; -+ int err; -+ aufs_bindex_t bindex; -+}; ++ if (unlikely(valid < 0)) ++ err = valid; ++ else if (!valid) ++ err = -EINVAL; + -+static int test_empty_cb(void *__arg, const char *__name, int namelen, -+ loff_t offset __maybe_unused, u64 ino, -+ unsigned int d_type) ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++/* todo: remove this */ ++static int h_d_revalidate(struct dentry *dentry, struct inode *inode, ++ unsigned int flags, int do_udba) +{ -+ struct test_empty_arg *arg = __arg; -+ char *name = (void *)__name; ++ int err; ++ umode_t mode, h_mode; ++ aufs_bindex_t bindex, btail, bstart, ibs, ibe; ++ unsigned char plus, unhashed, is_root, h_plus; ++ struct inode *h_inode, *h_cached_inode; ++ struct dentry *h_dentry; ++ struct qstr *name, *h_name; + -+ arg->err = 0; -+ au_fset_testempty(arg->flags, CALLED); -+ /* smp_mb(); */ -+ if (name[0] == '.' -+ && (namelen == 1 || (name[1] == '.' && namelen == 2))) -+ goto out; /* success */ ++ err = 0; ++ plus = 0; ++ mode = 0; ++ ibs = -1; ++ ibe = -1; ++ unhashed = !!d_unhashed(dentry); ++ is_root = !!IS_ROOT(dentry); ++ name = &dentry->d_name; + -+ if (namelen <= AUFS_WH_PFX_LEN -+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ if (au_ftest_testempty(arg->flags, WHONLY) -+ && !au_nhash_test_known_wh(arg->whlist, name, namelen)) -+ arg->err = -ENOTEMPTY; -+ goto out; ++ /* ++ * Theoretically, REVAL test should be unnecessary in case of ++ * {FS,I}NOTIFY. ++ * But {fs,i}notify doesn't fire some necessary events, ++ * IN_ATTRIB for atime/nlink/pageio ++ * IN_DELETE for NFS dentry ++ * Let's do REVAL test too. ++ */ ++ if (do_udba && inode) { ++ mode = (inode->i_mode & S_IFMT); ++ plus = (inode->i_nlink > 0); ++ ibs = au_ibstart(inode); ++ ibe = au_ibend(inode); + } + -+ name += AUFS_WH_PFX_LEN; -+ namelen -= AUFS_WH_PFX_LEN; -+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen)) -+ arg->err = au_nhash_append_wh -+ (arg->whlist, name, namelen, ino, d_type, arg->bindex, -+ au_ftest_testempty(arg->flags, SHWH)); ++ bstart = au_dbstart(dentry); ++ btail = bstart; ++ if (inode && S_ISDIR(inode->i_mode)) ++ btail = au_dbtaildir(dentry); ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; + -+out: -+ /* smp_mb(); */ -+ AuTraceErr(arg->err); -+ return arg->err; -+} ++ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry)); ++ spin_lock(&h_dentry->d_lock); ++ h_name = &h_dentry->d_name; ++ if (unlikely(do_udba ++ && !is_root ++ && (unhashed != !!d_unhashed(h_dentry) ++ || name->len != h_name->len ++ || memcmp(name->name, h_name->name, name->len)) ++ )) { ++ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n", ++ unhashed, d_unhashed(h_dentry), ++ AuDLNPair(dentry), AuDLNPair(h_dentry)); ++ spin_unlock(&h_dentry->d_lock); ++ goto err; ++ } ++ spin_unlock(&h_dentry->d_lock); + -+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg) -+{ -+ int err; -+ struct file *h_file; ++ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex); ++ if (unlikely(err)) ++ /* do not goto err, to keep the errno */ ++ break; + -+ h_file = au_h_open(dentry, arg->bindex, -+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE, -+ /*file*/NULL); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; ++ /* todo: plink too? */ ++ if (!do_udba) ++ continue; + -+ err = 0; -+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE) -+ && !h_file->f_dentry->d_inode->i_nlink) -+ goto out_put; ++ /* UDBA tests */ ++ h_inode = h_dentry->d_inode; ++ if (unlikely(!!inode != !!h_inode)) ++ goto err; + -+ do { -+ arg->err = 0; -+ au_fclr_testempty(arg->flags, CALLED); -+ /* smp_mb(); */ -+ err = vfsub_readdir(h_file, test_empty_cb, arg); -+ if (err >= 0) -+ err = arg->err; -+ } while (!err && au_ftest_testempty(arg->flags, CALLED)); ++ h_plus = plus; ++ h_mode = mode; ++ h_cached_inode = h_inode; ++ if (h_inode) { ++ h_mode = (h_inode->i_mode & S_IFMT); ++ h_plus = (h_inode->i_nlink > 0); ++ } ++ if (inode && ibs <= bindex && bindex <= ibe) ++ h_cached_inode = au_h_iptr(inode, bindex); ++ ++ if (unlikely(plus != h_plus ++ || mode != h_mode ++ || h_cached_inode != h_inode)) ++ goto err; ++ continue; ++ ++ err: ++ err = -EINVAL; ++ break; ++ } + -+out_put: -+ fput(h_file); -+ au_sbr_put(dentry->d_sb, arg->bindex); -+out: + return err; +} + -+struct do_test_empty_args { -+ int *errp; -+ struct dentry *dentry; -+ struct test_empty_arg *arg; -+}; -+ -+static void call_do_test_empty(void *args) ++/* todo: consolidate with do_refresh() and au_reval_for_attr() */ ++static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen) +{ -+ struct do_test_empty_args *a = args; -+ *a->errp = do_test_empty(a->dentry, a->arg); ++ int err; ++ struct dentry *parent; ++ ++ if (!au_digen_test(dentry, sigen)) ++ return 0; ++ ++ parent = dget_parent(dentry); ++ di_read_lock_parent(parent, AuLock_IR); ++ AuDebugOn(au_digen_test(parent, sigen)); ++ au_dbg_verify_gen(parent, sigen); ++ err = au_refresh_dentry(dentry, parent); ++ di_read_unlock(parent, AuLock_IR); ++ dput(parent); ++ AuTraceErr(err); ++ return err; +} + -+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg) ++int au_reval_dpath(struct dentry *dentry, unsigned int sigen) +{ -+ int err, wkq_err; -+ struct dentry *h_dentry; -+ struct inode *h_inode; ++ int err; ++ struct dentry *d, *parent; ++ struct inode *inode; + -+ h_dentry = au_h_dptr(dentry, arg->bindex); -+ h_inode = h_dentry->d_inode; -+ /* todo: i_mode changes anytime? */ -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ); -+ mutex_unlock(&h_inode->i_mutex); -+ if (!err) -+ err = do_test_empty(dentry, arg); -+ else { -+ struct do_test_empty_args args = { -+ .errp = &err, -+ .dentry = dentry, -+ .arg = arg -+ }; -+ unsigned int flags = arg->flags; ++ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR)) ++ return simple_reval_dpath(dentry, sigen); ++ ++ /* slow loop, keep it simple and stupid */ ++ /* cf: au_cpup_dirs() */ ++ err = 0; ++ parent = NULL; ++ while (au_digen_test(dentry, sigen)) { ++ d = dentry; ++ while (1) { ++ dput(parent); ++ parent = dget_parent(d); ++ if (!au_digen_test(parent, sigen)) ++ break; ++ d = parent; ++ } ++ ++ inode = d->d_inode; ++ if (d != dentry) ++ di_write_lock_child2(d); ++ ++ /* someone might update our dentry while we were sleeping */ ++ if (au_digen_test(d, sigen)) { ++ /* ++ * todo: consolidate with simple_reval_dpath(), ++ * do_refresh() and au_reval_for_attr(). ++ */ ++ di_read_lock_parent(parent, AuLock_IR); ++ err = au_refresh_dentry(d, parent); ++ di_read_unlock(parent, AuLock_IR); ++ } + -+ wkq_err = au_wkq_wait(call_do_test_empty, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ arg->flags = flags; ++ if (d != dentry) ++ di_write_unlock(d); ++ dput(parent); ++ if (unlikely(err)) ++ break; + } + + return err; +} + -+int au_test_empty_lower(struct dentry *dentry) ++/* ++ * if valid returns 1, otherwise 0. ++ */ ++static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ -+ int err; -+ unsigned int rdhash; -+ aufs_bindex_t bindex, bstart, btail; -+ struct au_nhash whlist; -+ struct test_empty_arg arg; ++ int valid, err; ++ unsigned int sigen; ++ unsigned char do_udba; ++ struct super_block *sb; ++ struct inode *inode; + -+ SiMustAnyLock(dentry->d_sb); ++ /* todo: support rcu-walk? */ ++ if (flags & LOOKUP_RCU) ++ return -ECHILD; + -+ rdhash = au_sbi(dentry->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry)); -+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) ++ valid = 0; ++ if (unlikely(!au_di(dentry))) + goto out; + -+ arg.flags = 0; -+ arg.whlist = &whlist; -+ bstart = au_dbstart(dentry); -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) -+ au_fset_testempty(arg.flags, SHWH); -+ arg.bindex = bstart; -+ err = do_test_empty(dentry, &arg); -+ if (unlikely(err)) -+ goto out_whlist; ++ inode = dentry->d_inode; ++ if (inode && is_bad_inode(inode)) ++ goto out; + -+ au_fset_testempty(arg.flags, WHONLY); -+ btail = au_dbtaildir(dentry); -+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) { -+ struct dentry *h_dentry; ++ valid = 1; ++ sb = dentry->d_sb; ++ /* ++ * todo: very ugly ++ * i_mutex of parent dir may be held, ++ * but we should not return 'invalid' due to busy. ++ */ ++ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM); ++ if (unlikely(err)) { ++ valid = err; ++ AuTraceErr(err); ++ goto out; ++ } ++ if (unlikely(au_dbrange_test(dentry))) { ++ err = -EINVAL; ++ AuTraceErr(err); ++ goto out_dgrade; ++ } + -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) { -+ arg.bindex = bindex; -+ err = do_test_empty(dentry, &arg); ++ sigen = au_sigen(sb); ++ if (au_digen_test(dentry, sigen)) { ++ AuDebugOn(IS_ROOT(dentry)); ++ err = au_reval_dpath(dentry, sigen); ++ if (unlikely(err)) { ++ AuTraceErr(err); ++ goto out_dgrade; + } + } ++ di_downgrade_lock(dentry, AuLock_IR); + -+out_whlist: -+ au_nhash_wh_free(&whlist); ++ err = -EINVAL; ++ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink)) ++ goto out_inval; ++ ++ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE); ++ if (do_udba && inode) { ++ aufs_bindex_t bstart = au_ibstart(inode); ++ struct inode *h_inode; ++ ++ if (bstart >= 0) { ++ h_inode = au_h_iptr(inode, bstart); ++ if (h_inode && au_test_higen(inode, h_inode)) ++ goto out_inval; ++ } ++ } ++ ++ err = h_d_revalidate(dentry, inode, flags, do_udba); ++ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) { ++ err = -EIO; ++ AuDbg("both of real entry and whiteout found, %.*s, err %d\n", ++ AuDLNPair(dentry), err); ++ } ++ goto out_inval; ++ ++out_dgrade: ++ di_downgrade_lock(dentry, AuLock_IR); ++out_inval: ++ aufs_read_unlock(dentry, AuLock_IR); ++ AuTraceErr(err); ++ valid = !err; +out: -+ return err; ++ if (!valid) { ++ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid); ++ d_drop(dentry); ++ } ++ return valid; +} + -+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist) ++static void aufs_d_release(struct dentry *dentry) +{ -+ int err; -+ struct test_empty_arg arg; -+ aufs_bindex_t bindex, btail; ++ if (au_di(dentry)) { ++ au_di_fin(dentry); ++ au_hn_di_reinit(dentry); ++ } ++} + -+ err = 0; -+ arg.whlist = whlist; -+ arg.flags = AuTestEmpty_WHONLY; -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) -+ au_fset_testempty(arg.flags, SHWH); -+ btail = au_dbtaildir(dentry); -+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) { -+ struct dentry *h_dentry; ++const struct dentry_operations aufs_dop = { ++ .d_revalidate = aufs_d_revalidate, ++ .d_weak_revalidate = aufs_d_revalidate, ++ .d_release = aufs_d_release ++}; +--- /dev/null ++++ linux-3.9/fs/aufs/dentry.h 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,234 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) { -+ arg.bindex = bindex; -+ err = sio_test_empty(dentry, &arg); -+ } -+ } ++/* ++ * lookup and dentry operations ++ */ + -+ return err; -+} ++#ifndef __AUFS_DENTRY_H__ ++#define __AUFS_DENTRY_H__ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include "rwsem.h" ++ ++struct au_hdentry { ++ struct dentry *hd_dentry; ++ aufs_bindex_t hd_id; ++}; ++ ++struct au_dinfo { ++ atomic_t di_generation; ++ ++ struct au_rwsem di_rwsem; ++ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq; ++ struct au_hdentry *di_hdentry; ++} ____cacheline_aligned_in_smp; + +/* ---------------------------------------------------------------------- */ + -+const struct file_operations aufs_dir_fop = { -+ .owner = THIS_MODULE, -+ .llseek = default_llseek, -+ .read = generic_read_dir, -+ .readdir = aufs_readdir, -+ .unlocked_ioctl = aufs_ioctl_dir, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = aufs_compat_ioctl_dir, -+#endif -+ .open = aufs_open_dir, -+ .release = aufs_release_dir, -+ .flush = aufs_flush_dir, -+ .fsync = aufs_fsync_dir ++/* dentry.c */ ++extern const struct dentry_operations aufs_dop; ++struct au_branch; ++struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent, ++ struct au_branch *br); ++int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, ++ struct dentry *h_parent, struct au_branch *br); ++ ++int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type); ++int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh); ++int au_refresh_dentry(struct dentry *dentry, struct dentry *parent); ++int au_reval_dpath(struct dentry *dentry, unsigned int sigen); ++ ++/* dinfo.c */ ++void au_di_init_once(void *_di); ++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc); ++void au_di_free(struct au_dinfo *dinfo); ++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b); ++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src); ++int au_di_init(struct dentry *dentry); ++void au_di_fin(struct dentry *dentry); ++int au_di_realloc(struct au_dinfo *dinfo, int nbr); ++ ++void di_read_lock(struct dentry *d, int flags, unsigned int lsc); ++void di_read_unlock(struct dentry *d, int flags); ++void di_downgrade_lock(struct dentry *d, int flags); ++void di_write_lock(struct dentry *d, unsigned int lsc); ++void di_write_unlock(struct dentry *d); ++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir); ++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir); ++void di_write_unlock2(struct dentry *d1, struct dentry *d2); ++ ++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex); ++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex); ++aufs_bindex_t au_dbtail(struct dentry *dentry); ++aufs_bindex_t au_dbtaildir(struct dentry *dentry); ++ ++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++int au_digen_test(struct dentry *dentry, unsigned int sigen); ++int au_dbrange_test(struct dentry *dentry); ++void au_update_digen(struct dentry *dentry); ++void au_update_dbrange(struct dentry *dentry, int do_put_zero); ++void au_update_dbstart(struct dentry *dentry); ++void au_update_dbend(struct dentry *dentry); ++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline struct au_dinfo *au_di(struct dentry *dentry) ++{ ++ return dentry->d_fsdata; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for dinfo */ ++enum { ++ AuLsc_DI_CHILD, /* child first */ ++ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */ ++ AuLsc_DI_CHILD3, /* copyup dirs */ ++ AuLsc_DI_PARENT, ++ AuLsc_DI_PARENT2, ++ AuLsc_DI_PARENT3, ++ AuLsc_DI_TMP /* temp for replacing dinfo */ +}; ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dir.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ + +/* -+ * directory operations ++ * di_read_lock_child, di_write_lock_child, ++ * di_read_lock_child2, di_write_lock_child2, ++ * di_read_lock_child3, di_write_lock_child3, ++ * di_read_lock_parent, di_write_lock_parent, ++ * di_read_lock_parent2, di_write_lock_parent2, ++ * di_read_lock_parent3, di_write_lock_parent3, + */ ++#define AuReadLockFunc(name, lsc) \ ++static inline void di_read_lock_##name(struct dentry *d, int flags) \ ++{ di_read_lock(d, flags, AuLsc_DI_##lsc); } + -+#ifndef __AUFS_DIR_H__ -+#define __AUFS_DIR_H__ ++#define AuWriteLockFunc(name, lsc) \ ++static inline void di_write_lock_##name(struct dentry *d) \ ++{ di_write_lock(d, AuLsc_DI_##lsc); } + -+#ifdef __KERNEL__ ++#define AuRWLockFuncs(name, lsc) \ ++ AuReadLockFunc(name, lsc) \ ++ AuWriteLockFunc(name, lsc) + -+#include ++AuRWLockFuncs(child, CHILD); ++AuRWLockFuncs(child2, CHILD2); ++AuRWLockFuncs(child3, CHILD3); ++AuRWLockFuncs(parent, PARENT); ++AuRWLockFuncs(parent2, PARENT2); ++AuRWLockFuncs(parent3, PARENT3); + -+/* ---------------------------------------------------------------------- */ ++#undef AuReadLockFunc ++#undef AuWriteLockFunc ++#undef AuRWLockFuncs + -+/* need to be faster and smaller */ ++#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem) ++#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem) ++#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem) + -+struct au_nhash { -+ unsigned int nh_num; -+ struct hlist_head *nh_head; -+}; ++/* ---------------------------------------------------------------------- */ + -+struct au_vdir_destr { -+ unsigned char len; -+ unsigned char name[0]; -+} __packed; ++/* todo: memory barrier? */ ++static inline unsigned int au_digen(struct dentry *d) ++{ ++ return atomic_read(&au_di(d)->di_generation); ++} + -+struct au_vdir_dehstr { -+ struct hlist_node hash; -+ struct au_vdir_destr *str; -+} ____cacheline_aligned_in_smp; ++static inline void au_h_dentry_init(struct au_hdentry *hdentry) ++{ ++ hdentry->hd_dentry = NULL; ++} + -+struct au_vdir_de { -+ ino_t de_ino; -+ unsigned char de_type; -+ /* caution: packed */ -+ struct au_vdir_destr de_str; -+} __packed; ++static inline void au_hdput(struct au_hdentry *hd) ++{ ++ if (hd) ++ dput(hd->hd_dentry); ++} + -+struct au_vdir_wh { -+ struct hlist_node wh_hash; -+#ifdef CONFIG_AUFS_SHWH -+ ino_t wh_ino; -+ aufs_bindex_t wh_bindex; -+ unsigned char wh_type; -+#else -+ aufs_bindex_t wh_bindex; -+#endif -+ /* caution: packed */ -+ struct au_vdir_destr wh_str; -+} __packed; ++static inline aufs_bindex_t au_dbstart(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return au_di(dentry)->di_bstart; ++} + -+union au_vdir_deblk_p { -+ unsigned char *deblk; -+ struct au_vdir_de *de; -+}; ++static inline aufs_bindex_t au_dbend(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return au_di(dentry)->di_bend; ++} + -+struct au_vdir { -+ unsigned char **vd_deblk; -+ unsigned long vd_nblk; -+ struct { -+ unsigned long ul; -+ union au_vdir_deblk_p p; -+ } vd_last; ++static inline aufs_bindex_t au_dbwh(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return au_di(dentry)->di_bwh; ++} + -+ unsigned long vd_version; -+ unsigned int vd_deblk_sz; -+ unsigned long vd_jiffy; -+} ____cacheline_aligned_in_smp; ++static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return au_di(dentry)->di_bdiropq; ++} + -+/* ---------------------------------------------------------------------- */ ++/* todo: hard/soft set? */ ++static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ au_di(dentry)->di_bstart = bindex; ++} + -+/* dir.c */ -+extern const struct file_operations aufs_dir_fop; -+void au_add_nlink(struct inode *dir, struct inode *h_dir); -+void au_sub_nlink(struct inode *dir, struct inode *h_dir); -+loff_t au_dir_size(struct file *file, struct dentry *dentry); -+int au_test_empty_lower(struct dentry *dentry); -+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist); ++static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ au_di(dentry)->di_bend = bindex; ++} + -+/* vdir.c */ -+unsigned int au_rdhash_est(loff_t sz); -+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp); -+void au_nhash_wh_free(struct au_nhash *whlist); -+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt, -+ int limit); -+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen); -+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, -+ unsigned int d_type, aufs_bindex_t bindex, -+ unsigned char shwh); -+void au_vdir_free(struct au_vdir *vdir); -+int au_vdir_init(struct file *file); -+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir); ++static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ /* dbwh can be outside of bstart - bend range */ ++ au_di(dentry)->di_bwh = bindex; ++} + -+/* ioctl.c */ -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg); ++static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ au_di(dentry)->di_bdiropq = bindex; ++} + -+#ifdef CONFIG_AUFS_RDU -+/* rdu.c */ -+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg); -+#endif -+#else -+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_HNOTIFY ++static inline void au_digen_dec(struct dentry *d) +{ -+ return -EINVAL; ++ atomic_dec(&au_di(d)->di_generation); +} -+#ifdef CONFIG_COMPAT -+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) ++ ++static inline void au_hn_di_reinit(struct dentry *dentry) +{ -+ return -EINVAL; ++ dentry->d_fsdata = NULL; +} -+#endif -+#endif ++#else ++AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused) ++#endif /* CONFIG_AUFS_HNOTIFY */ + +#endif /* __KERNEL__ */ -+#endif /* __AUFS_DIR_H__ */ ++#endif /* __AUFS_DENTRY_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/dynop.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,379 @@ ++++ linux-3.9/fs/aufs/dinfo.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,543 @@ +/* -+ * Copyright (C) 2010-2013 Junjiro R. Okajima ++ * Copyright (C) 2005-2013 Junjiro R. Okajima + * + * This program, aufs is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by @@ -8172,1263 +7428,1166 @@ + */ + +/* -+ * dynamically customizable operations for regular files ++ * dentry private data + */ + +#include "aufs.h" + -+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop) ++void au_di_init_once(void *_dinfo) ++{ ++ struct au_dinfo *dinfo = _dinfo; ++ static struct lock_class_key aufs_di; + -+/* -+ * How large will these lists be? -+ * Usually just a few elements, 20-30 at most for each, I guess. -+ */ -+static struct au_splhead dynop[AuDyLast]; ++ au_rw_init(&dinfo->di_rwsem); ++ au_rw_class(&dinfo->di_rwsem, &aufs_di); ++} + -+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op) ++struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc) +{ -+ struct au_dykey *key, *tmp; -+ struct list_head *head; ++ struct au_dinfo *dinfo; ++ int nbr, i; ++ ++ dinfo = au_cache_alloc_dinfo(); ++ if (unlikely(!dinfo)) ++ goto out; + -+ key = NULL; -+ head = &spl->head; -+ rcu_read_lock(); -+ list_for_each_entry_rcu(tmp, head, dk_list) -+ if (tmp->dk_op.dy_hop == h_op) { -+ key = tmp; -+ kref_get(&key->dk_kref); -+ break; -+ } -+ rcu_read_unlock(); ++ nbr = au_sbend(sb) + 1; ++ if (nbr <= 0) ++ nbr = 1; ++ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS); ++ if (dinfo->di_hdentry) { ++ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc); ++ dinfo->di_bstart = -1; ++ dinfo->di_bend = -1; ++ dinfo->di_bwh = -1; ++ dinfo->di_bdiropq = -1; ++ for (i = 0; i < nbr; i++) ++ dinfo->di_hdentry[i].hd_id = -1; ++ goto out; ++ } + -+ return key; ++ au_cache_free_dinfo(dinfo); ++ dinfo = NULL; ++ ++out: ++ return dinfo; +} + -+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key) ++void au_di_free(struct au_dinfo *dinfo) +{ -+ struct au_dykey **k, *found; -+ const void *h_op = key->dk_op.dy_hop; -+ int i; ++ struct au_hdentry *p; ++ aufs_bindex_t bend, bindex; + -+ found = NULL; -+ k = br->br_dykey; -+ for (i = 0; i < AuBrDynOp; i++) -+ if (k[i]) { -+ if (k[i]->dk_op.dy_hop == h_op) { -+ found = k[i]; -+ break; -+ } -+ } else -+ break; -+ if (!found) { -+ spin_lock(&br->br_dykey_lock); -+ for (; i < AuBrDynOp; i++) -+ if (k[i]) { -+ if (k[i]->dk_op.dy_hop == h_op) { -+ found = k[i]; -+ break; -+ } -+ } else { -+ k[i] = key; -+ break; -+ } -+ spin_unlock(&br->br_dykey_lock); -+ BUG_ON(i == AuBrDynOp); /* expand the array */ ++ /* dentry may not be revalidated */ ++ bindex = dinfo->di_bstart; ++ if (bindex >= 0) { ++ bend = dinfo->di_bend; ++ p = dinfo->di_hdentry + bindex; ++ while (bindex++ <= bend) ++ au_hdput(p++); + } -+ -+ return found; ++ kfree(dinfo->di_hdentry); ++ au_cache_free_dinfo(dinfo); +} + -+/* kref_get() if @key is already added */ -+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key) ++void au_di_swap(struct au_dinfo *a, struct au_dinfo *b) +{ -+ struct au_dykey *tmp, *found; -+ struct list_head *head; -+ const void *h_op = key->dk_op.dy_hop; ++ struct au_hdentry *p; ++ aufs_bindex_t bi; + -+ found = NULL; -+ head = &spl->head; -+ spin_lock(&spl->spin); -+ list_for_each_entry(tmp, head, dk_list) -+ if (tmp->dk_op.dy_hop == h_op) { -+ kref_get(&tmp->dk_kref); -+ found = tmp; -+ break; -+ } -+ if (!found) -+ list_add_rcu(&key->dk_list, head); -+ spin_unlock(&spl->spin); ++ AuRwMustWriteLock(&a->di_rwsem); ++ AuRwMustWriteLock(&b->di_rwsem); + -+ if (!found) -+ DyPrSym(key); -+ return found; ++#define DiSwap(v, name) \ ++ do { \ ++ v = a->di_##name; \ ++ a->di_##name = b->di_##name; \ ++ b->di_##name = v; \ ++ } while (0) ++ ++ DiSwap(p, hdentry); ++ DiSwap(bi, bstart); ++ DiSwap(bi, bend); ++ DiSwap(bi, bwh); ++ DiSwap(bi, bdiropq); ++ /* smp_mb(); */ ++ ++#undef DiSwap +} + -+static void dy_free_rcu(struct rcu_head *rcu) ++void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src) +{ -+ struct au_dykey *key; ++ AuRwMustWriteLock(&dst->di_rwsem); ++ AuRwMustWriteLock(&src->di_rwsem); + -+ key = container_of(rcu, struct au_dykey, dk_rcu); -+ DyPrSym(key); -+ kfree(key); ++ dst->di_bstart = src->di_bstart; ++ dst->di_bend = src->di_bend; ++ dst->di_bwh = src->di_bwh; ++ dst->di_bdiropq = src->di_bdiropq; ++ /* smp_mb(); */ +} + -+static void dy_free(struct kref *kref) ++int au_di_init(struct dentry *dentry) +{ -+ struct au_dykey *key; -+ struct au_splhead *spl; ++ int err; ++ struct super_block *sb; ++ struct au_dinfo *dinfo; + -+ key = container_of(kref, struct au_dykey, dk_kref); -+ spl = dynop + key->dk_op.dy_type; -+ au_spl_del_rcu(&key->dk_list, spl); -+ call_rcu(&key->dk_rcu, dy_free_rcu); ++ err = 0; ++ sb = dentry->d_sb; ++ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD); ++ if (dinfo) { ++ atomic_set(&dinfo->di_generation, au_sigen(sb)); ++ /* smp_mb(); */ /* atomic_set */ ++ dentry->d_fsdata = dinfo; ++ } else ++ err = -ENOMEM; ++ ++ return err; +} + -+void au_dy_put(struct au_dykey *key) ++void au_di_fin(struct dentry *dentry) +{ -+ kref_put(&key->dk_kref, dy_free); -+} ++ struct au_dinfo *dinfo; + -+/* ---------------------------------------------------------------------- */ ++ dinfo = au_di(dentry); ++ AuRwDestroy(&dinfo->di_rwsem); ++ au_di_free(dinfo); ++} + -+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *)) ++int au_di_realloc(struct au_dinfo *dinfo, int nbr) ++{ ++ int err, sz; ++ struct au_hdentry *hdp; + -+#ifdef CONFIG_AUFS_DEBUG -+#define DyDbgDeclare(cnt) unsigned int cnt = 0 -+#define DyDbgInc(cnt) do { cnt++; } while (0) -+#else -+#define DyDbgDeclare(cnt) do {} while (0) -+#define DyDbgInc(cnt) do {} while (0) -+#endif ++ AuRwMustWriteLock(&dinfo->di_rwsem); + -+#define DySet(func, dst, src, h_op, h_sb) do { \ -+ DyDbgInc(cnt); \ -+ if (h_op->func) { \ -+ if (src.func) \ -+ dst.func = src.func; \ -+ else \ -+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \ -+ } \ -+} while (0) ++ err = -ENOMEM; ++ sz = sizeof(*hdp) * (dinfo->di_bend + 1); ++ if (!sz) ++ sz = sizeof(*hdp); ++ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS); ++ if (hdp) { ++ dinfo->di_hdentry = hdp; ++ err = 0; ++ } + -+#define DySetForce(func, dst, src) do { \ -+ AuDebugOn(!src.func); \ -+ DyDbgInc(cnt); \ -+ dst.func = src.func; \ -+} while (0) ++ return err; ++} + -+#define DySetAop(func) \ -+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb) -+#define DySetAopForce(func) \ -+ DySetForce(func, dyaop->da_op, aufs_aop) ++/* ---------------------------------------------------------------------- */ + -+static void dy_aop(struct au_dykey *key, const void *h_op, -+ struct super_block *h_sb __maybe_unused) ++static void do_ii_write_lock(struct inode *inode, unsigned int lsc) +{ -+ struct au_dyaop *dyaop = (void *)key; -+ const struct address_space_operations *h_aop = h_op; -+ DyDbgDeclare(cnt); -+ -+ AuDbg("%s\n", au_sbtype(h_sb)); ++ switch (lsc) { ++ case AuLsc_DI_CHILD: ++ ii_write_lock_child(inode); ++ break; ++ case AuLsc_DI_CHILD2: ++ ii_write_lock_child2(inode); ++ break; ++ case AuLsc_DI_CHILD3: ++ ii_write_lock_child3(inode); ++ break; ++ case AuLsc_DI_PARENT: ++ ii_write_lock_parent(inode); ++ break; ++ case AuLsc_DI_PARENT2: ++ ii_write_lock_parent2(inode); ++ break; ++ case AuLsc_DI_PARENT3: ++ ii_write_lock_parent3(inode); ++ break; ++ default: ++ BUG(); ++ } ++} + -+ DySetAop(writepage); -+ DySetAopForce(readpage); /* force */ -+ DySetAop(writepages); -+ DySetAop(set_page_dirty); -+ DySetAop(readpages); -+ DySetAop(write_begin); -+ DySetAop(write_end); -+ DySetAop(bmap); -+ DySetAop(invalidatepage); -+ DySetAop(releasepage); -+ DySetAop(freepage); -+ /* these two will be changed according to an aufs mount option */ -+ DySetAop(direct_IO); -+ DySetAop(get_xip_mem); -+ DySetAop(migratepage); -+ DySetAop(launder_page); -+ DySetAop(is_partially_uptodate); -+ DySetAop(error_remove_page); -+ DySetAop(swap_activate); -+ DySetAop(swap_deactivate); ++static void do_ii_read_lock(struct inode *inode, unsigned int lsc) ++{ ++ switch (lsc) { ++ case AuLsc_DI_CHILD: ++ ii_read_lock_child(inode); ++ break; ++ case AuLsc_DI_CHILD2: ++ ii_read_lock_child2(inode); ++ break; ++ case AuLsc_DI_CHILD3: ++ ii_read_lock_child3(inode); ++ break; ++ case AuLsc_DI_PARENT: ++ ii_read_lock_parent(inode); ++ break; ++ case AuLsc_DI_PARENT2: ++ ii_read_lock_parent2(inode); ++ break; ++ case AuLsc_DI_PARENT3: ++ ii_read_lock_parent3(inode); ++ break; ++ default: ++ BUG(); ++ } ++} + -+ DyDbgSize(cnt, *h_aop); -+ dyaop->da_get_xip_mem = h_aop->get_xip_mem; ++void di_read_lock(struct dentry *d, int flags, unsigned int lsc) ++{ ++ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc); ++ if (d->d_inode) { ++ if (au_ftest_lock(flags, IW)) ++ do_ii_write_lock(d->d_inode, lsc); ++ else if (au_ftest_lock(flags, IR)) ++ do_ii_read_lock(d->d_inode, lsc); ++ } +} + -+/* ---------------------------------------------------------------------- */ ++void di_read_unlock(struct dentry *d, int flags) ++{ ++ if (d->d_inode) { ++ if (au_ftest_lock(flags, IW)) { ++ au_dbg_verify_dinode(d); ++ ii_write_unlock(d->d_inode); ++ } else if (au_ftest_lock(flags, IR)) { ++ au_dbg_verify_dinode(d); ++ ii_read_unlock(d->d_inode); ++ } ++ } ++ au_rw_read_unlock(&au_di(d)->di_rwsem); ++} + -+static void dy_bug(struct kref *kref) ++void di_downgrade_lock(struct dentry *d, int flags) +{ -+ BUG(); ++ if (d->d_inode && au_ftest_lock(flags, IR)) ++ ii_downgrade_lock(d->d_inode); ++ au_rw_dgrade_lock(&au_di(d)->di_rwsem); +} + -+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br) ++void di_write_lock(struct dentry *d, unsigned int lsc) +{ -+ struct au_dykey *key, *old; -+ struct au_splhead *spl; -+ struct op { -+ unsigned int sz; -+ void (*set)(struct au_dykey *key, const void *h_op, -+ struct super_block *h_sb __maybe_unused); -+ }; -+ static const struct op a[] = { -+ [AuDy_AOP] = { -+ .sz = sizeof(struct au_dyaop), -+ .set = dy_aop -+ } -+ }; -+ const struct op *p; ++ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc); ++ if (d->d_inode) ++ do_ii_write_lock(d->d_inode, lsc); ++} + -+ spl = dynop + op->dy_type; -+ key = dy_gfind_get(spl, op->dy_hop); -+ if (key) -+ goto out_add; /* success */ ++void di_write_unlock(struct dentry *d) ++{ ++ au_dbg_verify_dinode(d); ++ if (d->d_inode) ++ ii_write_unlock(d->d_inode); ++ au_rw_write_unlock(&au_di(d)->di_rwsem); ++} + -+ p = a + op->dy_type; -+ key = kzalloc(p->sz, GFP_NOFS); -+ if (unlikely(!key)) { -+ key = ERR_PTR(-ENOMEM); -+ goto out; -+ } ++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir) ++{ ++ AuDebugOn(d1 == d2 ++ || d1->d_inode == d2->d_inode ++ || d1->d_sb != d2->d_sb); + -+ key->dk_op.dy_hop = op->dy_hop; -+ kref_init(&key->dk_kref); -+ p->set(key, op->dy_hop, br->br_mnt->mnt_sb); -+ old = dy_gadd(spl, key); -+ if (old) { -+ kfree(key); -+ key = old; ++ if (isdir && au_test_subdir(d1, d2)) { ++ di_write_lock_child(d1); ++ di_write_lock_child2(d2); ++ } else { ++ /* there should be no races */ ++ di_write_lock_child(d2); ++ di_write_lock_child2(d1); + } -+ -+out_add: -+ old = dy_bradd(br, key); -+ if (old) -+ /* its ref-count should never be zero here */ -+ kref_put(&key->dk_kref, dy_bug); -+out: -+ return key; +} + -+/* ---------------------------------------------------------------------- */ -+/* -+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it. -+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead -+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes -+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error. -+ * See the aufs manual in detail. -+ * -+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the -+ * performance of fadvise() and madvise() may be affected. -+ */ -+static void dy_adx(struct au_dyaop *dyaop, int do_dx) ++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir) +{ -+ if (!do_dx) { -+ dyaop->da_op.direct_IO = NULL; -+ dyaop->da_op.get_xip_mem = NULL; ++ AuDebugOn(d1 == d2 ++ || d1->d_inode == d2->d_inode ++ || d1->d_sb != d2->d_sb); ++ ++ if (isdir && au_test_subdir(d1, d2)) { ++ di_write_lock_parent(d1); ++ di_write_lock_parent2(d2); + } else { -+ dyaop->da_op.direct_IO = aufs_aop.direct_IO; -+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem; -+ if (!dyaop->da_get_xip_mem) -+ dyaop->da_op.get_xip_mem = NULL; ++ /* there should be no races */ ++ di_write_lock_parent(d2); ++ di_write_lock_parent2(d1); + } +} + -+static struct au_dyaop *dy_aget(struct au_branch *br, -+ const struct address_space_operations *h_aop, -+ int do_dx) ++void di_write_unlock2(struct dentry *d1, struct dentry *d2) +{ -+ struct au_dyaop *dyaop; -+ struct au_dynop op; ++ di_write_unlock(d1); ++ if (d1->d_inode == d2->d_inode) ++ au_rw_write_unlock(&au_di(d2)->di_rwsem); ++ else ++ di_write_unlock(d2); ++} + -+ op.dy_type = AuDy_AOP; -+ op.dy_haop = h_aop; -+ dyaop = (void *)dy_get(&op, br); -+ if (IS_ERR(dyaop)) -+ goto out; -+ dy_adx(dyaop, do_dx); ++/* ---------------------------------------------------------------------- */ + -+out: -+ return dyaop; ++struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ struct dentry *d; ++ ++ DiMustAnyLock(dentry); ++ ++ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) ++ return NULL; ++ AuDebugOn(bindex < 0); ++ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry; ++ AuDebugOn(d && d->d_count <= 0); ++ return d; +} + -+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode) ++/* ++ * extended version of au_h_dptr(). ++ * returns a hashed and positive h_dentry in bindex, NULL, or error. ++ */ ++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex) +{ -+ int err, do_dx; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct au_dyaop *dyaop; ++ struct dentry *h_dentry; ++ struct inode *inode, *h_inode; + -+ AuDebugOn(!S_ISREG(h_inode->i_mode)); -+ IiMustWriteLock(inode); ++ inode = dentry->d_inode; ++ AuDebugOn(!inode); + -+ sb = inode->i_sb; -+ br = au_sbr(sb, bindex); -+ do_dx = !!au_opt_test(au_mntflags(sb), DIO); -+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx); -+ err = PTR_ERR(dyaop); -+ if (IS_ERR(dyaop)) -+ /* unnecessary to call dy_fput() */ -+ goto out; ++ h_dentry = NULL; ++ if (au_dbstart(dentry) <= bindex ++ && bindex <= au_dbend(dentry)) ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry && !au_d_hashed_positive(h_dentry)) { ++ dget(h_dentry); ++ goto out; /* success */ ++ } + -+ err = 0; -+ inode->i_mapping->a_ops = &dyaop->da_op; ++ AuDebugOn(bindex < au_ibstart(inode)); ++ AuDebugOn(au_ibend(inode) < bindex); ++ h_inode = au_h_iptr(inode, bindex); ++ h_dentry = d_find_alias(h_inode); ++ if (h_dentry) { ++ if (!IS_ERR(h_dentry)) { ++ if (!au_d_hashed_positive(h_dentry)) ++ goto out; /* success */ ++ dput(h_dentry); ++ } else ++ goto out; ++ } ++ ++ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) { ++ h_dentry = au_plink_lkup(inode, bindex); ++ AuDebugOn(!h_dentry); ++ if (!IS_ERR(h_dentry)) { ++ if (!au_d_hashed_positive(h_dentry)) ++ goto out; /* success */ ++ dput(h_dentry); ++ h_dentry = NULL; ++ } ++ } + +out: -+ return err; ++ AuDbgDentry(h_dentry); ++ return h_dentry; +} + -+/* -+ * Is it safe to replace a_ops during the inode/file is in operation? -+ * Yes, I hope so. -+ */ -+int au_dy_irefresh(struct inode *inode) ++aufs_bindex_t au_dbtail(struct dentry *dentry) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct inode *h_inode; ++ aufs_bindex_t bend, bwh; + -+ err = 0; -+ if (S_ISREG(inode->i_mode)) { -+ bstart = au_ibstart(inode); -+ h_inode = au_h_iptr(inode, bstart); -+ err = au_dy_iaop(inode, bstart, h_inode); ++ bend = au_dbend(dentry); ++ if (0 <= bend) { ++ bwh = au_dbwh(dentry); ++ if (!bwh) ++ return bwh; ++ if (0 < bwh && bwh < bend) ++ return bwh - 1; + } -+ return err; ++ return bend; +} + -+void au_dy_arefresh(int do_dx) ++aufs_bindex_t au_dbtaildir(struct dentry *dentry) +{ -+ struct au_splhead *spl; -+ struct list_head *head; -+ struct au_dykey *key; ++ aufs_bindex_t bend, bopq; + -+ spl = dynop + AuDy_AOP; -+ head = &spl->head; -+ spin_lock(&spl->spin); -+ list_for_each_entry(key, head, dk_list) -+ dy_adx((void *)key, do_dx); -+ spin_unlock(&spl->spin); ++ bend = au_dbtail(dentry); ++ if (0 <= bend) { ++ bopq = au_dbdiropq(dentry); ++ if (0 <= bopq && bopq < bend) ++ bend = bopq; ++ } ++ return bend; +} + +/* ---------------------------------------------------------------------- */ + -+void __init au_dy_init(void) ++void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_dentry) +{ -+ int i; ++ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex; ++ struct au_branch *br; + -+ /* make sure that 'struct au_dykey *' can be any type */ -+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key)); ++ DiMustWriteLock(dentry); + -+ for (i = 0; i < AuDyLast; i++) -+ au_spl_init(dynop + i); ++ au_hdput(hd); ++ hd->hd_dentry = h_dentry; ++ if (h_dentry) { ++ br = au_sbr(dentry->d_sb, bindex); ++ hd->hd_id = br->br_id; ++ } +} + -+void au_dy_fin(void) ++int au_dbrange_test(struct dentry *dentry) +{ -+ int i; -+ -+ for (i = 0; i < AuDyLast; i++) -+ WARN_ON(!list_empty(&dynop[i].head)); -+} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/dynop.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,76 @@ -+/* -+ * Copyright (C) 2010-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * dynamically customizable operations (for regular files only) -+ */ -+ -+#ifndef __AUFS_DYNOP_H__ -+#define __AUFS_DYNOP_H__ -+ -+#ifdef __KERNEL__ -+ -+#include "inode.h" -+ -+enum {AuDy_AOP, AuDyLast}; -+ -+struct au_dynop { -+ int dy_type; -+ union { -+ const void *dy_hop; -+ const struct address_space_operations *dy_haop; -+ }; -+}; -+ -+struct au_dykey { -+ union { -+ struct list_head dk_list; -+ struct rcu_head dk_rcu; -+ }; -+ struct au_dynop dk_op; -+ -+ /* -+ * during I am in the branch local array, kref is gotten. when the -+ * branch is removed, kref is put. -+ */ -+ struct kref dk_kref; -+}; -+ -+/* stop unioning since their sizes are very different from each other */ -+struct au_dyaop { -+ struct au_dykey da_key; -+ struct address_space_operations da_op; /* not const */ -+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int, -+ void **, unsigned long *); -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dynop.c */ -+struct au_branch; -+void au_dy_put(struct au_dykey *key); -+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode); -+int au_dy_irefresh(struct inode *inode); -+void au_dy_arefresh(int do_dio); ++ int err; ++ aufs_bindex_t bstart, bend; + -+void __init au_dy_init(void); -+void au_dy_fin(void); ++ err = 0; ++ bstart = au_dbstart(dentry); ++ bend = au_dbend(dentry); ++ if (bstart >= 0) ++ AuDebugOn(bend < 0 && bstart > bend); ++ else { ++ err = -EIO; ++ AuDebugOn(bend >= 0); ++ } + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DYNOP_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/export.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,812 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ ++ return err; ++} + -+/* -+ * export via nfs -+ */ ++int au_digen_test(struct dentry *dentry, unsigned int sigen) ++{ ++ int err; + -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../fs/mount.h" -+#include "aufs.h" ++ err = 0; ++ if (unlikely(au_digen(dentry) != sigen ++ || au_iigen_test(dentry->d_inode, sigen))) ++ err = -EIO; + -+union conv { -+#ifdef CONFIG_AUFS_INO_T_64 -+ __u32 a[2]; -+#else -+ __u32 a[1]; -+#endif -+ ino_t ino; -+}; ++ return err; ++} + -+static ino_t decode_ino(__u32 *a) ++void au_update_digen(struct dentry *dentry) +{ -+ union conv u; -+ -+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a)); -+ u.a[0] = a[0]; -+#ifdef CONFIG_AUFS_INO_T_64 -+ u.a[1] = a[1]; -+#endif -+ return u.ino; ++ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb)); ++ /* smp_mb(); */ /* atomic_set */ +} + -+static void encode_ino(__u32 *a, ino_t ino) ++void au_update_dbrange(struct dentry *dentry, int do_put_zero) +{ -+ union conv u; ++ struct au_dinfo *dinfo; ++ struct dentry *h_d; ++ struct au_hdentry *hdp; + -+ u.ino = ino; -+ a[0] = u.a[0]; -+#ifdef CONFIG_AUFS_INO_T_64 -+ a[1] = u.a[1]; -+#endif -+} ++ DiMustWriteLock(dentry); + -+/* NFS file handle */ -+enum { -+ Fh_br_id, -+ Fh_sigen, -+#ifdef CONFIG_AUFS_INO_T_64 -+ /* support 64bit inode number */ -+ Fh_ino1, -+ Fh_ino2, -+ Fh_dir_ino1, -+ Fh_dir_ino2, -+#else -+ Fh_ino1, -+ Fh_dir_ino1, -+#endif -+ Fh_igen, -+ Fh_h_type, -+ Fh_tail, ++ dinfo = au_di(dentry); ++ if (!dinfo || dinfo->di_bstart < 0) ++ return; + -+ Fh_ino = Fh_ino1, -+ Fh_dir_ino = Fh_dir_ino1 -+}; ++ hdp = dinfo->di_hdentry; ++ if (do_put_zero) { ++ aufs_bindex_t bindex, bend; + -+static int au_test_anon(struct dentry *dentry) -+{ -+ /* note: read d_flags without d_lock */ -+ return !!(dentry->d_flags & DCACHE_DISCONNECTED); -+} ++ bend = dinfo->di_bend; ++ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) { ++ h_d = hdp[0 + bindex].hd_dentry; ++ if (h_d && !h_d->d_inode) ++ au_set_h_dptr(dentry, bindex, NULL); ++ } ++ } ++ ++ dinfo->di_bstart = -1; ++ while (++dinfo->di_bstart <= dinfo->di_bend) ++ if (hdp[0 + dinfo->di_bstart].hd_dentry) ++ break; ++ if (dinfo->di_bstart > dinfo->di_bend) { ++ dinfo->di_bstart = -1; ++ dinfo->di_bend = -1; ++ return; ++ } + -+/* ---------------------------------------------------------------------- */ -+/* inode generation external table */ ++ dinfo->di_bend++; ++ while (0 <= --dinfo->di_bend) ++ if (hdp[0 + dinfo->di_bend].hd_dentry) ++ break; ++ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0); ++} + -+void au_xigen_inc(struct inode *inode) ++void au_update_dbstart(struct dentry *dentry) +{ -+ loff_t pos; -+ ssize_t sz; -+ __u32 igen; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ -+ sb = inode->i_sb; -+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO)); -+ -+ sbinfo = au_sbi(sb); -+ pos = inode->i_ino; -+ pos *= sizeof(igen); -+ igen = inode->i_generation + 1; -+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen, -+ sizeof(igen), &pos); -+ if (sz == sizeof(igen)) -+ return; /* success */ ++ aufs_bindex_t bindex, bend; ++ struct dentry *h_dentry; + -+ if (unlikely(sz >= 0)) -+ AuIOErr("xigen error (%zd)\n", sz); ++ bend = au_dbend(dentry); ++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; ++ if (h_dentry->d_inode) { ++ au_set_dbstart(dentry, bindex); ++ return; ++ } ++ au_set_h_dptr(dentry, bindex, NULL); ++ } +} + -+int au_xigen_new(struct inode *inode) ++void au_update_dbend(struct dentry *dentry) +{ -+ int err; -+ loff_t pos; -+ ssize_t sz; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ -+ err = 0; -+ /* todo: dirty, at mount time */ -+ if (inode->i_ino == AUFS_ROOT_INO) -+ goto out; -+ sb = inode->i_sb; -+ SiMustAnyLock(sb); -+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) -+ goto out; ++ aufs_bindex_t bindex, bstart; ++ struct dentry *h_dentry; + -+ err = -EFBIG; -+ pos = inode->i_ino; -+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) { -+ AuIOErr1("too large i%lld\n", pos); -+ goto out; ++ bstart = au_dbstart(dentry); ++ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; ++ if (h_dentry->d_inode) { ++ au_set_dbend(dentry, bindex); ++ return; ++ } ++ au_set_h_dptr(dentry, bindex, NULL); + } -+ pos *= sizeof(inode->i_generation); ++} + -+ err = 0; -+ sbinfo = au_sbi(sb); -+ file = sbinfo->si_xigen; -+ BUG_ON(!file); ++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry) ++{ ++ aufs_bindex_t bindex, bend; + -+ if (i_size_read(file->f_dentry->d_inode) -+ < pos + sizeof(inode->i_generation)) { -+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next); -+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation, -+ sizeof(inode->i_generation), &pos); -+ } else -+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation, -+ sizeof(inode->i_generation), &pos); -+ if (sz == sizeof(inode->i_generation)) -+ goto out; /* success */ ++ bend = au_dbend(dentry); ++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) ++ if (au_h_dptr(dentry, bindex) == h_dentry) ++ return bindex; ++ return -1; ++} +--- /dev/null ++++ linux-3.9/fs/aufs/dir.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,630 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ err = sz; -+ if (unlikely(sz >= 0)) { -+ err = -EIO; -+ AuIOErr("xigen error (%zd)\n", sz); -+ } ++/* ++ * directory operations ++ */ + -+out: -+ return err; -+} ++#include ++#include "aufs.h" + -+int au_xigen_set(struct super_block *sb, struct file *base) ++void au_add_nlink(struct inode *dir, struct inode *h_dir) +{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ -+ SiMustWriteLock(sb); ++ unsigned int nlink; + -+ sbinfo = au_sbi(sb); -+ file = au_xino_create2(base, sbinfo->si_xigen); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ err = 0; -+ if (sbinfo->si_xigen) -+ fput(sbinfo->si_xigen); -+ sbinfo->si_xigen = file; ++ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); + -+out: -+ return err; ++ nlink = dir->i_nlink; ++ nlink += h_dir->i_nlink - 2; ++ if (h_dir->i_nlink < 2) ++ nlink += 2; ++ /* 0 can happen in revaliding */ ++ set_nlink(dir, nlink); +} + -+void au_xigen_clr(struct super_block *sb) ++void au_sub_nlink(struct inode *dir, struct inode *h_dir) +{ -+ struct au_sbinfo *sbinfo; ++ unsigned int nlink; + -+ SiMustWriteLock(sb); ++ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); + -+ sbinfo = au_sbi(sb); -+ if (sbinfo->si_xigen) { -+ fput(sbinfo->si_xigen); -+ sbinfo->si_xigen = NULL; -+ } ++ nlink = dir->i_nlink; ++ nlink -= h_dir->i_nlink - 2; ++ if (h_dir->i_nlink < 2) ++ nlink -= 2; ++ /* nlink == 0 means the branch-fs is broken */ ++ set_nlink(dir, nlink); +} + -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino, -+ ino_t dir_ino) ++loff_t au_dir_size(struct file *file, struct dentry *dentry) +{ -+ struct dentry *dentry, *d; -+ struct inode *inode; -+ unsigned int sigen; -+ struct hlist_node *p; ++ loff_t sz; ++ aufs_bindex_t bindex, bend; ++ struct file *h_file; ++ struct dentry *h_dentry; + -+ dentry = NULL; -+ inode = ilookup(sb, ino); -+ if (!inode) -+ goto out; ++ sz = 0; ++ if (file) { ++ AuDebugOn(!file_inode(file)); ++ AuDebugOn(!S_ISDIR(file_inode(file)->i_mode)); + -+ dentry = ERR_PTR(-ESTALE); -+ sigen = au_sigen(sb); -+ if (unlikely(is_bad_inode(inode) -+ || IS_DEADDIR(inode) -+ || sigen != au_iigen(inode, NULL))) -+ goto out_iput; ++ bend = au_fbend_dir(file); ++ for (bindex = au_fbstart(file); ++ bindex <= bend && sz < KMALLOC_MAX_SIZE; ++ bindex++) { ++ h_file = au_hf_dir(file, bindex); ++ if (h_file && file_inode(h_file)) ++ sz += vfsub_f_size_read(h_file); ++ } ++ } else { ++ AuDebugOn(!dentry); ++ AuDebugOn(!dentry->d_inode); ++ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); + -+ dentry = NULL; -+ if (!dir_ino || S_ISDIR(inode->i_mode)) -+ dentry = d_find_alias(inode); -+ else { -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, p, &inode->i_dentry, d_alias) { -+ spin_lock(&d->d_lock); -+ if (!au_test_anon(d) -+ && d->d_parent->d_inode->i_ino == dir_ino) { -+ dentry = dget_dlock(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } -+ spin_unlock(&d->d_lock); ++ bend = au_dbtaildir(dentry); ++ for (bindex = au_dbstart(dentry); ++ bindex <= bend && sz < KMALLOC_MAX_SIZE; ++ bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry && h_dentry->d_inode) ++ sz += i_size_read(h_dentry->d_inode); + } -+ spin_unlock(&inode->i_lock); + } -+ if (unlikely(dentry && au_digen_test(dentry, sigen))) { -+ /* need to refresh */ -+ dput(dentry); -+ dentry = NULL; ++ if (sz < KMALLOC_MAX_SIZE) ++ sz = roundup_pow_of_two(sz); ++ if (sz > KMALLOC_MAX_SIZE) ++ sz = KMALLOC_MAX_SIZE; ++ else if (sz < NAME_MAX) { ++ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX); ++ sz = AUFS_RDBLK_DEF; + } -+ -+out_iput: -+ iput(inode); -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; ++ return sz; +} + +/* ---------------------------------------------------------------------- */ + -+/* todo: dirty? */ -+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */ ++static int reopen_dir(struct file *file) ++{ ++ int err; ++ unsigned int flags; ++ aufs_bindex_t bindex, btail, bstart; ++ struct dentry *dentry, *h_dentry; ++ struct file *h_file; ++ ++ /* open all lower dirs */ ++ dentry = file->f_dentry; ++ bstart = au_dbstart(dentry); ++ for (bindex = au_fbstart(file); bindex < bstart; bindex++) ++ au_set_h_fptr(file, bindex, NULL); ++ au_set_fbstart(file, bstart); + -+struct au_compare_mnt_args { -+ /* input */ -+ struct super_block *sb; ++ btail = au_dbtaildir(dentry); ++ for (bindex = au_fbend_dir(file); btail < bindex; bindex--) ++ au_set_h_fptr(file, bindex, NULL); ++ au_set_fbend_dir(file, btail); + -+ /* output */ -+ struct vfsmount *mnt; -+}; ++ flags = vfsub_file_flags(file); ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; ++ h_file = au_hf_dir(file, bindex); ++ if (h_file) ++ continue; + -+static int au_compare_mnt(struct vfsmount *mnt, void *arg) -+{ -+ struct au_compare_mnt_args *a = arg; ++ h_file = au_h_open(dentry, bindex, flags, file); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; /* close all? */ ++ au_set_h_fptr(file, bindex, h_file); ++ } ++ au_update_figen(file); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ err = 0; + -+ if (mnt->mnt_sb != a->sb) -+ return 0; -+ a->mnt = mntget(mnt); -+ return 1; ++out: ++ return err; +} + -+static struct vfsmount *au_mnt_get(struct super_block *sb) ++static int do_open_dir(struct file *file, int flags) +{ + int err; -+ struct path root; -+ struct au_compare_mnt_args args = { -+ .sb = sb -+ }; -+ -+ get_fs_root(current->fs, &root); -+ br_read_lock(&vfsmount_lock); -+ err = iterate_mounts(au_compare_mnt, &args, root.mnt); -+ br_read_unlock(&vfsmount_lock); -+ path_put(&root); -+ AuDebugOn(!err); -+ AuDebugOn(!args.mnt); -+ return args.mnt; -+} ++ aufs_bindex_t bindex, btail; ++ struct dentry *dentry, *h_dentry; ++ struct file *h_file; + -+struct au_nfsd_si_lock { -+ unsigned int sigen; -+ aufs_bindex_t bindex, br_id; -+ unsigned char force_lock; -+}; ++ FiMustWriteLock(file); + -+static int si_nfsd_read_lock(struct super_block *sb, -+ struct au_nfsd_si_lock *nsi_lock) -+{ -+ int err; -+ aufs_bindex_t bindex; ++ dentry = file->f_dentry; ++ err = au_alive_dir(dentry); ++ if (unlikely(err)) ++ goto out; + -+ si_read_lock(sb, AuLock_FLUSH); ++ file->f_version = dentry->d_inode->i_version; ++ bindex = au_dbstart(dentry); ++ au_set_fbstart(file, bindex); ++ btail = au_dbtaildir(dentry); ++ au_set_fbend_dir(file, btail); ++ for (; !err && bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (!h_dentry) ++ continue; + -+ /* branch id may be wrapped around */ -+ err = 0; -+ bindex = au_br_index(sb, nsi_lock->br_id); -+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb)) -+ goto out; /* success */ ++ h_file = au_h_open(dentry, bindex, flags, file); ++ if (IS_ERR(h_file)) { ++ err = PTR_ERR(h_file); ++ break; ++ } ++ au_set_h_fptr(file, bindex, h_file); ++ } ++ au_update_figen(file); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ if (!err) ++ return 0; /* success */ + -+ err = -ESTALE; -+ bindex = -1; -+ if (!nsi_lock->force_lock) -+ si_read_unlock(sb); ++ /* close all */ ++ for (bindex = au_fbstart(file); bindex <= btail; bindex++) ++ au_set_h_fptr(file, bindex, NULL); ++ au_set_fbstart(file, -1); ++ au_set_fbend_dir(file, -1); + +out: -+ nsi_lock->bindex = bindex; + return err; +} + -+struct find_name_by_ino { -+ int called, found; -+ ino_t ino; -+ char *name; -+ int namelen; -+}; -+ -+static int -+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset, -+ u64 ino, unsigned int d_type) ++static int aufs_open_dir(struct inode *inode __maybe_unused, ++ struct file *file) +{ -+ struct find_name_by_ino *a = arg; -+ -+ a->called++; -+ if (a->ino != ino) -+ return 0; ++ int err; ++ struct super_block *sb; ++ struct au_fidir *fidir; + -+ memcpy(a->name, name, namelen); -+ a->namelen = namelen; -+ a->found = 1; -+ return 1; ++ err = -ENOMEM; ++ sb = file->f_dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ fidir = au_fidir_alloc(sb); ++ if (fidir) { ++ err = au_do_open(file, do_open_dir, fidir); ++ if (unlikely(err)) ++ kfree(fidir); ++ } ++ si_read_unlock(sb); ++ return err; +} + -+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino, -+ struct au_nfsd_si_lock *nsi_lock) ++static int aufs_release_dir(struct inode *inode __maybe_unused, ++ struct file *file) +{ -+ struct dentry *dentry, *parent; -+ struct file *file; -+ struct inode *dir; -+ struct find_name_by_ino arg; -+ int err; -+ -+ parent = path->dentry; -+ if (nsi_lock) -+ si_read_unlock(parent->d_sb); -+ file = vfsub_dentry_open(path, au_dir_roflags); -+ dentry = (void *)file; -+ if (IS_ERR(file)) -+ goto out; -+ -+ dentry = ERR_PTR(-ENOMEM); -+ arg.name = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!arg.name)) -+ goto out_file; -+ arg.ino = ino; -+ arg.found = 0; -+ do { -+ arg.called = 0; -+ /* smp_mb(); */ -+ err = vfsub_readdir(file, find_name_by_ino, &arg); -+ } while (!err && !arg.found && arg.called); -+ dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_name; -+ /* instead of ENOENT */ -+ dentry = ERR_PTR(-ESTALE); -+ if (!arg.found) -+ goto out_name; ++ struct au_vdir *vdir_cache; ++ struct au_finfo *finfo; ++ struct au_fidir *fidir; ++ aufs_bindex_t bindex, bend; + -+ /* do not call vfsub_lkup_one() */ -+ dir = parent->d_inode; -+ mutex_lock(&dir->i_mutex); -+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen); -+ mutex_unlock(&dir->i_mutex); -+ AuTraceErrPtr(dentry); -+ if (IS_ERR(dentry)) -+ goto out_name; -+ AuDebugOn(au_test_anon(dentry)); -+ if (unlikely(!dentry->d_inode)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ENOENT); -+ } ++ finfo = au_fi(file); ++ fidir = finfo->fi_hdir; ++ if (fidir) { ++ vdir_cache = fidir->fd_vdir_cache; /* lock-free */ ++ if (vdir_cache) ++ au_vdir_free(vdir_cache); + -+out_name: -+ free_page((unsigned long)arg.name); -+out_file: -+ fput(file); -+out: -+ if (unlikely(nsi_lock -+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0)) -+ if (!IS_ERR(dentry)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); ++ bindex = finfo->fi_btop; ++ if (bindex >= 0) { ++ /* ++ * calls fput() instead of filp_close(), ++ * since no dnotify or lock for the lower file. ++ */ ++ bend = fidir->fd_bbot; ++ for (; bindex <= bend; bindex++) ++ au_set_h_fptr(file, bindex, NULL); + } -+ AuTraceErrPtr(dentry); -+ return dentry; ++ kfree(fidir); ++ finfo->fi_hdir = NULL; ++ } ++ au_finfo_fin(file); ++ return 0; +} + -+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino, -+ ino_t dir_ino, -+ struct au_nfsd_si_lock *nsi_lock) -+{ -+ struct dentry *dentry; -+ struct path path; ++/* ---------------------------------------------------------------------- */ + -+ if (dir_ino != AUFS_ROOT_INO) { -+ path.dentry = decode_by_ino(sb, dir_ino, 0); -+ dentry = path.dentry; -+ if (!path.dentry || IS_ERR(path.dentry)) -+ goto out; -+ AuDebugOn(au_test_anon(path.dentry)); -+ } else -+ path.dentry = dget(sb->s_root); ++static int au_do_flush_dir(struct file *file, fl_owner_t id) ++{ ++ int err; ++ aufs_bindex_t bindex, bend; ++ struct file *h_file; + -+ path.mnt = au_mnt_get(sb); -+ dentry = au_lkup_by_ino(&path, ino, nsi_lock); -+ path_put(&path); ++ err = 0; ++ bend = au_fbend_dir(file); ++ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { ++ h_file = au_hf_dir(file, bindex); ++ if (h_file) ++ err = vfsub_flush(h_file, id); ++ } ++ return err; ++} + -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; ++static int aufs_flush_dir(struct file *file, fl_owner_t id) ++{ ++ return au_do_flush(file, id, au_do_flush_dir); +} + +/* ---------------------------------------------------------------------- */ + -+static int h_acceptable(void *expv, struct dentry *dentry) ++static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync) +{ -+ return 1; ++ int err; ++ aufs_bindex_t bend, bindex; ++ struct inode *inode; ++ struct super_block *sb; ++ ++ err = 0; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ bend = au_dbend(dentry); ++ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) { ++ struct path h_path; ++ ++ if (au_test_ro(sb, bindex, inode)) ++ continue; ++ h_path.dentry = au_h_dptr(dentry, bindex); ++ if (!h_path.dentry) ++ continue; ++ ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_fsync(NULL, &h_path, datasync); ++ } ++ ++ return err; +} + -+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath, -+ char *buf, int len, struct super_block *sb) ++static int au_do_fsync_dir(struct file *file, int datasync) +{ -+ char *p; -+ int n; -+ struct path path; ++ int err; ++ aufs_bindex_t bend, bindex; ++ struct file *h_file; ++ struct super_block *sb; ++ struct inode *inode; + -+ p = d_path(h_rootpath, buf, len); -+ if (IS_ERR(p)) ++ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); ++ if (unlikely(err)) + goto out; -+ n = strlen(p); + -+ path.mnt = h_rootpath->mnt; -+ path.dentry = h_parent; -+ p = d_path(&path, buf, len); -+ if (IS_ERR(p)) -+ goto out; -+ if (n != 1) -+ p += n; ++ sb = file->f_dentry->d_sb; ++ inode = file_inode(file); ++ bend = au_fbend_dir(file); ++ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { ++ h_file = au_hf_dir(file, bindex); ++ if (!h_file || au_test_ro(sb, bindex, inode)) ++ continue; + -+ path.mnt = au_mnt_get(sb); -+ path.dentry = sb->s_root; -+ p = d_path(&path, buf, len - strlen(p)); -+ mntput(path.mnt); -+ if (IS_ERR(p)) -+ goto out; -+ if (n != 1) -+ p[strlen(p)] = '/'; ++ err = vfsub_fsync(h_file, &h_file->f_path, datasync); ++ } + +out: -+ AuTraceErrPtr(p); -+ return p; ++ return err; +} + -+static -+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh, -+ int fh_len, struct au_nfsd_si_lock *nsi_lock) ++/* ++ * @file may be NULL ++ */ ++static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end, ++ int datasync) +{ -+ struct dentry *dentry, *h_parent, *root; -+ struct super_block *h_sb; -+ char *pathname, *p; -+ struct vfsmount *h_mnt; -+ struct au_branch *br; + int err; -+ struct path path; ++ struct dentry *dentry; ++ struct super_block *sb; ++ struct mutex *mtx; + -+ br = au_sbr(sb, nsi_lock->bindex); -+ h_mnt = br->br_mnt; -+ h_sb = h_mnt->mnt_sb; -+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */ -+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail), -+ fh_len - Fh_tail, fh[Fh_h_type], -+ h_acceptable, /*context*/NULL); -+ dentry = h_parent; -+ if (unlikely(!h_parent || IS_ERR(h_parent))) { -+ AuWarn1("%s decode_fh failed, %ld\n", -+ au_sbtype(h_sb), PTR_ERR(h_parent)); -+ goto out; -+ } -+ dentry = NULL; -+ if (unlikely(au_test_anon(h_parent))) { -+ AuWarn1("%s decode_fh returned a disconnected dentry\n", -+ au_sbtype(h_sb)); -+ goto out_h_parent; ++ err = 0; ++ dentry = file->f_dentry; ++ mtx = &dentry->d_inode->i_mutex; ++ mutex_lock(mtx); ++ sb = dentry->d_sb; ++ si_noflush_read_lock(sb); ++ if (file) ++ err = au_do_fsync_dir(file, datasync); ++ else { ++ di_write_lock_child(dentry); ++ err = au_do_fsync_dir_no_file(dentry, datasync); + } ++ au_cpup_attr_timesizes(dentry->d_inode); ++ di_write_unlock(dentry); ++ if (file) ++ fi_write_unlock(file); + -+ dentry = ERR_PTR(-ENOMEM); -+ pathname = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!pathname)) -+ goto out_h_parent; ++ si_read_unlock(sb); ++ mutex_unlock(mtx); ++ return err; ++} + -+ root = sb->s_root; -+ path.mnt = h_mnt; -+ di_read_lock_parent(root, !AuLock_IR); -+ path.dentry = au_h_dptr(root, nsi_lock->bindex); -+ di_read_unlock(root, !AuLock_IR); -+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb); -+ dentry = (void *)p; -+ if (IS_ERR(p)) -+ goto out_pathname; ++/* ---------------------------------------------------------------------- */ + -+ si_read_unlock(sb); -+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); -+ dentry = ERR_PTR(err); ++static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err; ++ struct dentry *dentry; ++ struct inode *inode, *h_inode; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); + if (unlikely(err)) -+ goto out_relock; ++ goto out; ++ err = au_alive_dir(dentry); ++ if (!err) ++ err = au_vdir_init(file); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; + -+ dentry = ERR_PTR(-ENOENT); -+ AuDebugOn(au_test_anon(path.dentry)); -+ if (unlikely(!path.dentry->d_inode)) -+ goto out_path; ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ if (!au_test_nfsd()) { ++ err = au_vdir_fill_de(file, dirent, filldir); ++ fsstack_copy_attr_atime(inode, h_inode); ++ } else { ++ /* ++ * nfsd filldir may call lookup_one_len(), vfs_getattr(), ++ * encode_fh() and others. ++ */ ++ atomic_inc(&h_inode->i_count); ++ di_read_unlock(dentry, AuLock_IR); ++ si_read_unlock(sb); ++ err = au_vdir_fill_de(file, dirent, filldir); ++ fsstack_copy_attr_atime(inode, h_inode); ++ fi_write_unlock(file); ++ iput(h_inode); + -+ if (ino != path.dentry->d_inode->i_ino) -+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL); -+ else -+ dentry = dget(path.dentry); ++ AuTraceErr(err); ++ return err; ++ } + -+out_path: -+ path_put(&path); -+out_relock: -+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0)) -+ if (!IS_ERR(dentry)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); -+ } -+out_pathname: -+ free_page((unsigned long)pathname); -+out_h_parent: -+ dput(h_parent); ++out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); +out: -+ AuTraceErrPtr(dentry); -+ return dentry; ++ si_read_unlock(sb); ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+static struct dentry * -+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, -+ int fh_type) -+{ -+ struct dentry *dentry; -+ __u32 *fh = fid->raw; -+ struct au_branch *br; -+ ino_t ino, dir_ino; -+ struct au_nfsd_si_lock nsi_lock = { -+ .force_lock = 0 -+ }; -+ -+ dentry = ERR_PTR(-ESTALE); -+ /* it should never happen, but the file handle is unreliable */ -+ if (unlikely(fh_len < Fh_tail)) -+ goto out; -+ nsi_lock.sigen = fh[Fh_sigen]; -+ nsi_lock.br_id = fh[Fh_br_id]; ++#define AuTestEmpty_WHONLY 1 ++#define AuTestEmpty_CALLED (1 << 1) ++#define AuTestEmpty_SHWH (1 << 2) ++#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name) ++#define au_fset_testempty(flags, name) \ ++ do { (flags) |= AuTestEmpty_##name; } while (0) ++#define au_fclr_testempty(flags, name) \ ++ do { (flags) &= ~AuTestEmpty_##name; } while (0) + -+ /* branch id may be wrapped around */ -+ br = NULL; -+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock))) -+ goto out; -+ nsi_lock.force_lock = 1; ++#ifndef CONFIG_AUFS_SHWH ++#undef AuTestEmpty_SHWH ++#define AuTestEmpty_SHWH 0 ++#endif + -+ /* is this inode still cached? */ -+ ino = decode_ino(fh + Fh_ino); -+ /* it should never happen */ -+ if (unlikely(ino == AUFS_ROOT_INO)) -+ goto out; ++struct test_empty_arg { ++ struct au_nhash *whlist; ++ unsigned int flags; ++ int err; ++ aufs_bindex_t bindex; ++}; + -+ dir_ino = decode_ino(fh + Fh_dir_ino); -+ dentry = decode_by_ino(sb, ino, dir_ino); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (dentry) -+ goto accept; ++static int test_empty_cb(void *__arg, const char *__name, int namelen, ++ loff_t offset __maybe_unused, u64 ino, ++ unsigned int d_type) ++{ ++ struct test_empty_arg *arg = __arg; ++ char *name = (void *)__name; + -+ /* is the parent dir cached? */ -+ br = au_sbr(sb, nsi_lock.bindex); -+ atomic_inc(&br->br_count); -+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (dentry) -+ goto accept; ++ arg->err = 0; ++ au_fset_testempty(arg->flags, CALLED); ++ /* smp_mb(); */ ++ if (name[0] == '.' ++ && (namelen == 1 || (name[1] == '.' && namelen == 2))) ++ goto out; /* success */ + -+ /* lookup path */ -+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (unlikely(!dentry)) -+ /* todo?: make it ESTALE */ -+ goto out_unlock; ++ if (namelen <= AUFS_WH_PFX_LEN ++ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { ++ if (au_ftest_testempty(arg->flags, WHONLY) ++ && !au_nhash_test_known_wh(arg->whlist, name, namelen)) ++ arg->err = -ENOTEMPTY; ++ goto out; ++ } + -+accept: -+ if (!au_digen_test(dentry, au_sigen(sb)) -+ && dentry->d_inode->i_generation == fh[Fh_igen]) -+ goto out_unlock; /* success */ ++ name += AUFS_WH_PFX_LEN; ++ namelen -= AUFS_WH_PFX_LEN; ++ if (!au_nhash_test_known_wh(arg->whlist, name, namelen)) ++ arg->err = au_nhash_append_wh ++ (arg->whlist, name, namelen, ino, d_type, arg->bindex, ++ au_ftest_testempty(arg->flags, SHWH)); + -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); -+out_unlock: -+ if (br) -+ atomic_dec(&br->br_count); -+ si_read_unlock(sb); +out: -+ AuTraceErrPtr(dentry); -+ return dentry; ++ /* smp_mb(); */ ++ AuTraceErr(arg->err); ++ return arg->err; +} + -+#if 0 /* reserved for future use */ -+/* support subtreecheck option */ -+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid, -+ int fh_len, int fh_type) ++static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg) +{ -+ struct dentry *parent; -+ __u32 *fh = fid->raw; -+ ino_t dir_ino; ++ int err; ++ struct file *h_file; + -+ dir_ino = decode_ino(fh + Fh_dir_ino); -+ parent = decode_by_ino(sb, dir_ino, 0); -+ if (IS_ERR(parent)) ++ h_file = au_h_open(dentry, arg->bindex, ++ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE, ++ /*file*/NULL); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) + goto out; -+ if (!parent) -+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]), -+ dir_ino, fh, fh_len); + ++ err = 0; ++ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE) ++ && !file_inode(h_file)->i_nlink) ++ goto out_put; ++ ++ do { ++ arg->err = 0; ++ au_fclr_testempty(arg->flags, CALLED); ++ /* smp_mb(); */ ++ err = vfsub_readdir(h_file, test_empty_cb, arg); ++ if (err >= 0) ++ err = arg->err; ++ } while (!err && au_ftest_testempty(arg->flags, CALLED)); ++ ++out_put: ++ fput(h_file); ++ au_sbr_put(dentry->d_sb, arg->bindex); +out: -+ AuTraceErrPtr(parent); -+ return parent; ++ return err; +} -+#endif + -+/* ---------------------------------------------------------------------- */ ++struct do_test_empty_args { ++ int *errp; ++ struct dentry *dentry; ++ struct test_empty_arg *arg; ++}; + -+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, -+ struct inode *dir) ++static void call_do_test_empty(void *args) +{ -+ int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb, *h_sb; -+ struct dentry *dentry, *parent, *h_parent; -+ struct inode *h_dir; -+ struct au_branch *br; ++ struct do_test_empty_args *a = args; ++ *a->errp = do_test_empty(a->dentry, a->arg); ++} + -+ err = -ENOSPC; -+ if (unlikely(*max_len <= Fh_tail)) { -+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len); -+ goto out; -+ } ++static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg) ++{ ++ int err, wkq_err; ++ struct dentry *h_dentry; ++ struct inode *h_inode; + -+ err = FILEID_ROOT; -+ if (inode->i_ino == AUFS_ROOT_INO) { -+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO); -+ goto out; ++ h_dentry = au_h_dptr(dentry, arg->bindex); ++ h_inode = h_dentry->d_inode; ++ /* todo: i_mode changes anytime? */ ++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); ++ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ); ++ mutex_unlock(&h_inode->i_mutex); ++ if (!err) ++ err = do_test_empty(dentry, arg); ++ else { ++ struct do_test_empty_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .arg = arg ++ }; ++ unsigned int flags = arg->flags; ++ ++ wkq_err = au_wkq_wait(call_do_test_empty, &args); ++ if (unlikely(wkq_err)) ++ err = wkq_err; ++ arg->flags = flags; + } + -+ h_parent = NULL; -+ sb = inode->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH); -+ if (unlikely(err)) -+ goto out; ++ return err; ++} + -+#ifdef CONFIG_AUFS_DEBUG -+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) -+ AuWarn1("NFS-exporting requires xino\n"); -+#endif -+ err = -EIO; -+ parent = NULL; -+ ii_read_lock_child(inode); -+ bindex = au_ibstart(inode); -+ if (!dir) { -+ dentry = d_find_alias(inode); -+ if (unlikely(!dentry)) -+ goto out_unlock; -+ AuDebugOn(au_test_anon(dentry)); -+ parent = dget_parent(dentry); -+ dput(dentry); -+ if (unlikely(!parent)) -+ goto out_unlock; -+ dir = parent->d_inode; -+ } ++int au_test_empty_lower(struct dentry *dentry) ++{ ++ int err; ++ unsigned int rdhash; ++ aufs_bindex_t bindex, bstart, btail; ++ struct au_nhash whlist; ++ struct test_empty_arg arg; + -+ ii_read_lock_parent(dir); -+ h_dir = au_h_iptr(dir, bindex); -+ ii_read_unlock(dir); -+ if (unlikely(!h_dir)) -+ goto out_parent; -+ h_parent = d_find_alias(h_dir); -+ if (unlikely(!h_parent)) -+ goto out_hparent; ++ SiMustAnyLock(dentry->d_sb); + -+ err = -EPERM; -+ br = au_sbr(sb, bindex); -+ h_sb = br->br_mnt->mnt_sb; -+ if (unlikely(!h_sb->s_export_op)) { -+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb)); -+ goto out_hparent; -+ } ++ rdhash = au_sbi(dentry->d_sb)->si_rdhash; ++ if (!rdhash) ++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry)); ++ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; + -+ fh[Fh_br_id] = br->br_id; -+ fh[Fh_sigen] = au_sigen(sb); -+ encode_ino(fh + Fh_ino, inode->i_ino); -+ encode_ino(fh + Fh_dir_ino, dir->i_ino); -+ fh[Fh_igen] = inode->i_generation; ++ arg.flags = 0; ++ arg.whlist = &whlist; ++ bstart = au_dbstart(dentry); ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) ++ au_fset_testempty(arg.flags, SHWH); ++ arg.bindex = bstart; ++ err = do_test_empty(dentry, &arg); ++ if (unlikely(err)) ++ goto out_whlist; + -+ *max_len -= Fh_tail; -+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail), -+ max_len, -+ /*connectable or subtreecheck*/0); -+ err = fh[Fh_h_type]; -+ *max_len += Fh_tail; -+ /* todo: macros? */ -+ if (err != FILEID_INVALID) -+ err = 99; -+ else -+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb)); ++ au_fset_testempty(arg.flags, WHONLY); ++ btail = au_dbtaildir(dentry); ++ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) { ++ struct dentry *h_dentry; ++ ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry && h_dentry->d_inode) { ++ arg.bindex = bindex; ++ err = do_test_empty(dentry, &arg); ++ } ++ } + -+out_hparent: -+ dput(h_parent); -+out_parent: -+ dput(parent); -+out_unlock: -+ ii_read_unlock(inode); -+ si_read_unlock(sb); ++out_whlist: ++ au_nhash_wh_free(&whlist); +out: -+ if (unlikely(err < 0)) -+ err = FILEID_INVALID; + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_commit_metadata(struct inode *inode) ++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist) +{ + int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb; -+ struct inode *h_inode; -+ int (*f)(struct inode *inode); -+ -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ ii_write_lock_child(inode); -+ bindex = au_ibstart(inode); -+ AuDebugOn(bindex < 0); -+ h_inode = au_h_iptr(inode, bindex); ++ struct test_empty_arg arg; ++ aufs_bindex_t bindex, btail; + -+ f = h_inode->i_sb->s_export_op->commit_metadata; -+ if (f) -+ err = f(h_inode); -+ else { -+ struct writeback_control wbc = { -+ .sync_mode = WB_SYNC_ALL, -+ .nr_to_write = 0 /* metadata only */ -+ }; ++ err = 0; ++ arg.whlist = whlist; ++ arg.flags = AuTestEmpty_WHONLY; ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) ++ au_fset_testempty(arg.flags, SHWH); ++ btail = au_dbtaildir(dentry); ++ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) { ++ struct dentry *h_dentry; + -+ err = sync_inode(h_inode, &wbc); ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry && h_dentry->d_inode) { ++ arg.bindex = bindex; ++ err = sio_test_empty(dentry, &arg); ++ } + } + -+ au_cpup_attr_timesizes(inode); -+ ii_write_unlock(inode); -+ si_read_unlock(sb); + return err; +} + +/* ---------------------------------------------------------------------- */ + -+static struct export_operations aufs_export_op = { -+ .fh_to_dentry = aufs_fh_to_dentry, -+ /* .fh_to_parent = aufs_fh_to_parent, */ -+ .encode_fh = aufs_encode_fh, -+ .commit_metadata = aufs_commit_metadata ++const struct file_operations aufs_dir_fop = { ++ .owner = THIS_MODULE, ++ .llseek = default_llseek, ++ .read = generic_read_dir, ++ .readdir = aufs_readdir, ++ .unlocked_ioctl = aufs_ioctl_dir, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = aufs_compat_ioctl_dir, ++#endif ++ .open = aufs_open_dir, ++ .release = aufs_release_dir, ++ .flush = aufs_flush_dir, ++ .fsync = aufs_fsync_dir +}; -+ -+void au_export_init(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ __u32 u; -+ -+ sb->s_export_op = &aufs_export_op; -+ sbinfo = au_sbi(sb); -+ sbinfo->si_xigen = NULL; -+ get_random_bytes(&u, sizeof(u)); -+ BUILD_BUG_ON(sizeof(u) != sizeof(int)); -+ atomic_set(&sbinfo->si_xigen_next, u); -+} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/file.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,683 @@ ++++ linux-3.9/fs/aufs/dir.h 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,137 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -9448,673 +8607,588 @@ + */ + +/* -+ * handling file/dir, and address_space operation ++ * directory operations + */ + -+#ifdef CONFIG_AUFS_DEBUG -+#include ++#ifndef __AUFS_DIR_H__ ++#define __AUFS_DIR_H__ ++ ++#ifdef __KERNEL__ ++ ++#include ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* need to be faster and smaller */ ++ ++struct au_nhash { ++ unsigned int nh_num; ++ struct hlist_head *nh_head; ++}; ++ ++struct au_vdir_destr { ++ unsigned char len; ++ unsigned char name[0]; ++} __packed; ++ ++struct au_vdir_dehstr { ++ struct hlist_node hash; ++ struct au_vdir_destr *str; ++} ____cacheline_aligned_in_smp; ++ ++struct au_vdir_de { ++ ino_t de_ino; ++ unsigned char de_type; ++ /* caution: packed */ ++ struct au_vdir_destr de_str; ++} __packed; ++ ++struct au_vdir_wh { ++ struct hlist_node wh_hash; ++#ifdef CONFIG_AUFS_SHWH ++ ino_t wh_ino; ++ aufs_bindex_t wh_bindex; ++ unsigned char wh_type; ++#else ++ aufs_bindex_t wh_bindex; +#endif -+#include -+#include "aufs.h" ++ /* caution: packed */ ++ struct au_vdir_destr wh_str; ++} __packed; + -+/* drop flags for writing */ -+unsigned int au_file_roflags(unsigned int flags) ++union au_vdir_deblk_p { ++ unsigned char *deblk; ++ struct au_vdir_de *de; ++}; ++ ++struct au_vdir { ++ unsigned char **vd_deblk; ++ unsigned long vd_nblk; ++ struct { ++ unsigned long ul; ++ union au_vdir_deblk_p p; ++ } vd_last; ++ ++ unsigned long vd_version; ++ unsigned int vd_deblk_sz; ++ unsigned long vd_jiffy; ++} ____cacheline_aligned_in_smp; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* dir.c */ ++extern const struct file_operations aufs_dir_fop; ++void au_add_nlink(struct inode *dir, struct inode *h_dir); ++void au_sub_nlink(struct inode *dir, struct inode *h_dir); ++loff_t au_dir_size(struct file *file, struct dentry *dentry); ++int au_test_empty_lower(struct dentry *dentry); ++int au_test_empty(struct dentry *dentry, struct au_nhash *whlist); ++ ++/* vdir.c */ ++unsigned int au_rdhash_est(loff_t sz); ++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp); ++void au_nhash_wh_free(struct au_nhash *whlist); ++int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt, ++ int limit); ++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen); ++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, ++ unsigned int d_type, aufs_bindex_t bindex, ++ unsigned char shwh); ++void au_vdir_free(struct au_vdir *vdir); ++int au_vdir_init(struct file *file); ++int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir); ++ ++/* ioctl.c */ ++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg); ++ ++#ifdef CONFIG_AUFS_RDU ++/* rdu.c */ ++long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++#ifdef CONFIG_COMPAT ++long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg); ++#endif ++#else ++static inline long au_rdu_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) +{ -+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); -+ flags |= O_RDONLY | O_NOATIME; -+ return flags; ++ return -EINVAL; +} -+ -+/* common functions to regular file and dir */ -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file) ++#ifdef CONFIG_COMPAT ++static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) +{ -+ struct file *h_file; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct path h_path; -+ int err, exec_flag; ++ return -EINVAL; ++} ++#endif ++#endif + -+ /* a race condition can happen between open and unlink/rmdir */ -+ h_file = ERR_PTR(-ENOENT); -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (au_test_nfsd() && !h_dentry) -+ goto out; -+ h_inode = h_dentry->d_inode; -+ if (au_test_nfsd() && !h_inode) -+ goto out; -+ spin_lock(&h_dentry->d_lock); -+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry)) -+ || !h_inode -+ /* || !dentry->d_inode->i_nlink */ -+ ; -+ spin_unlock(&h_dentry->d_lock); -+ if (unlikely(err)) -+ goto out; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DIR_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/dynop.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,379 @@ ++/* ++ * Copyright (C) 2010-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * dynamically customizable operations for regular files ++ */ + -+ sb = dentry->d_sb; -+ br = au_sbr(sb, bindex); -+ h_file = ERR_PTR(-EACCES); -+ exec_flag = flags & __FMODE_EXEC; -+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC)) -+ goto out; ++#include "aufs.h" + -+ /* drop flags for writing */ -+ if (au_test_ro(sb, bindex, dentry->d_inode)) -+ flags = au_file_roflags(flags); -+ flags &= ~O_CREAT; -+ atomic_inc(&br->br_count); -+ h_path.dentry = h_dentry; -+ h_path.mnt = br->br_mnt; -+ if (!au_special_file(h_inode->i_mode)) -+ h_file = vfsub_dentry_open(&h_path, flags); -+ else { -+ /* this block depends upon the configuration */ -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ si_read_unlock(sb); -+ h_file = vfsub_dentry_open(&h_path, flags); -+ si_noflush_read_lock(sb); -+ fi_write_lock(file); -+ di_read_lock_child(dentry, AuLock_IR); -+ } -+ if (IS_ERR(h_file)) -+ goto out_br; ++#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop) + -+ if (exec_flag) { -+ err = deny_write_access(h_file); -+ if (unlikely(err)) { -+ fput(h_file); -+ h_file = ERR_PTR(err); -+ goto out_br; ++/* ++ * How large will these lists be? ++ * Usually just a few elements, 20-30 at most for each, I guess. ++ */ ++static struct au_splhead dynop[AuDyLast]; ++ ++static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op) ++{ ++ struct au_dykey *key, *tmp; ++ struct list_head *head; ++ ++ key = NULL; ++ head = &spl->head; ++ rcu_read_lock(); ++ list_for_each_entry_rcu(tmp, head, dk_list) ++ if (tmp->dk_op.dy_hop == h_op) { ++ key = tmp; ++ kref_get(&key->dk_kref); ++ break; + } -+ } -+ fsnotify_open(h_file); -+ goto out; /* success */ ++ rcu_read_unlock(); + -+out_br: -+ atomic_dec(&br->br_count); -+out: -+ return h_file; ++ return key; +} + -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags), -+ struct au_fidir *fidir) ++static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key) +{ -+ int err; -+ struct dentry *dentry; -+ -+ err = au_finfo_init(file, fidir); -+ if (unlikely(err)) -+ goto out; -+ -+ dentry = file->f_dentry; -+ di_read_lock_child(dentry, AuLock_IR); -+ err = open(file, vfsub_file_flags(file)); -+ di_read_unlock(dentry, AuLock_IR); ++ struct au_dykey **k, *found; ++ const void *h_op = key->dk_op.dy_hop; ++ int i; + -+ fi_write_unlock(file); -+ if (unlikely(err)) { -+ au_fi(file)->fi_hdir = NULL; -+ au_finfo_fin(file); ++ found = NULL; ++ k = br->br_dykey; ++ for (i = 0; i < AuBrDynOp; i++) ++ if (k[i]) { ++ if (k[i]->dk_op.dy_hop == h_op) { ++ found = k[i]; ++ break; ++ } ++ } else ++ break; ++ if (!found) { ++ spin_lock(&br->br_dykey_lock); ++ for (; i < AuBrDynOp; i++) ++ if (k[i]) { ++ if (k[i]->dk_op.dy_hop == h_op) { ++ found = k[i]; ++ break; ++ } ++ } else { ++ k[i] = key; ++ break; ++ } ++ spin_unlock(&br->br_dykey_lock); ++ BUG_ON(i == AuBrDynOp); /* expand the array */ + } + -+out: -+ return err; ++ return found; +} + -+int au_reopen_nondir(struct file *file) ++/* kref_get() if @key is already added */ ++static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct dentry *dentry; -+ struct file *h_file, *h_file_tmp; -+ -+ dentry = file->f_dentry; -+ AuDebugOn(au_special_file(dentry->d_inode->i_mode)); -+ bstart = au_dbstart(dentry); -+ h_file_tmp = NULL; -+ if (au_fbstart(file) == bstart) { -+ h_file = au_hf_top(file); -+ if (file->f_mode == h_file->f_mode) -+ return 0; /* success */ -+ h_file_tmp = h_file; -+ get_file(h_file_tmp); -+ au_set_h_fptr(file, bstart, NULL); -+ } -+ AuDebugOn(au_fi(file)->fi_hdir); -+ AuDebugOn(au_fbstart(file) < bstart); -+ -+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC, -+ file); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; /* todo: close all? */ ++ struct au_dykey *tmp, *found; ++ struct list_head *head; ++ const void *h_op = key->dk_op.dy_hop; + -+ err = 0; -+ au_set_fbstart(file, bstart); -+ au_set_h_fptr(file, bstart, h_file); -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ ++ found = NULL; ++ head = &spl->head; ++ spin_lock(&spl->spin); ++ list_for_each_entry(tmp, head, dk_list) ++ if (tmp->dk_op.dy_hop == h_op) { ++ kref_get(&tmp->dk_kref); ++ found = tmp; ++ break; ++ } ++ if (!found) ++ list_add_rcu(&key->dk_list, head); ++ spin_unlock(&spl->spin); + -+out: -+ if (h_file_tmp) -+ fput(h_file_tmp); -+ return err; ++ if (!found) ++ DyPrSym(key); ++ return found; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt, -+ struct dentry *hi_wh) ++static void dy_free_rcu(struct rcu_head *rcu) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct au_dinfo *dinfo; -+ struct dentry *h_dentry; -+ struct au_hdentry *hdp; ++ struct au_dykey *key; + -+ dinfo = au_di(file->f_dentry); -+ AuRwMustWriteLock(&dinfo->di_rwsem); ++ key = container_of(rcu, struct au_dykey, dk_rcu); ++ DyPrSym(key); ++ kfree(key); ++} + -+ bstart = dinfo->di_bstart; -+ dinfo->di_bstart = btgt; -+ hdp = dinfo->di_hdentry; -+ h_dentry = hdp[0 + btgt].hd_dentry; -+ hdp[0 + btgt].hd_dentry = hi_wh; -+ err = au_reopen_nondir(file); -+ hdp[0 + btgt].hd_dentry = h_dentry; -+ dinfo->di_bstart = bstart; ++static void dy_free(struct kref *kref) ++{ ++ struct au_dykey *key; ++ struct au_splhead *spl; + -+ return err; ++ key = container_of(kref, struct au_dykey, dk_kref); ++ spl = dynop + key->dk_op.dy_type; ++ au_spl_del_rcu(&key->dk_list, spl); ++ call_rcu(&key->dk_rcu, dy_free_rcu); +} + -+static int au_ready_to_write_wh(struct file *file, loff_t len, -+ aufs_bindex_t bcpup) ++void au_dy_put(struct au_dykey *key) +{ -+ int err; -+ struct inode *inode, *h_inode; -+ struct dentry *dentry, *h_dentry, *hi_wh; ++ kref_put(&key->dk_kref, dy_free); ++} + -+ dentry = file->f_dentry; -+ au_update_dbstart(dentry); -+ inode = dentry->d_inode; -+ h_inode = NULL; -+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) { -+ h_dentry = au_h_dptr(dentry, bcpup); -+ if (h_dentry) -+ h_inode = h_dentry->d_inode; -+ } -+ hi_wh = au_hi_wh(inode, bcpup); -+ if (!hi_wh && !h_inode) -+ err = au_sio_cpup_wh(dentry, bcpup, len, file); -+ else -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, bcpup, hi_wh); ++/* ---------------------------------------------------------------------- */ + -+ if (!err -+ && inode->i_nlink > 1 -+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK)) -+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup)); ++#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *)) + -+ return err; -+} ++#ifdef CONFIG_AUFS_DEBUG ++#define DyDbgDeclare(cnt) unsigned int cnt = 0 ++#define DyDbgInc(cnt) do { cnt++; } while (0) ++#else ++#define DyDbgDeclare(cnt) do {} while (0) ++#define DyDbgInc(cnt) do {} while (0) ++#endif ++ ++#define DySet(func, dst, src, h_op, h_sb) do { \ ++ DyDbgInc(cnt); \ ++ if (h_op->func) { \ ++ if (src.func) \ ++ dst.func = src.func; \ ++ else \ ++ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \ ++ } \ ++} while (0) ++ ++#define DySetForce(func, dst, src) do { \ ++ AuDebugOn(!src.func); \ ++ DyDbgInc(cnt); \ ++ dst.func = src.func; \ ++} while (0) ++ ++#define DySetAop(func) \ ++ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb) ++#define DySetAopForce(func) \ ++ DySetForce(func, dyaop->da_op, aufs_aop) + -+/* -+ * prepare the @file for writing. -+ */ -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin) ++static void dy_aop(struct au_dykey *key, const void *h_op, ++ struct super_block *h_sb __maybe_unused) +{ -+ int err; -+ aufs_bindex_t bstart, bcpup, dbstart; -+ struct dentry *dentry, *parent, *h_dentry; -+ struct inode *h_inode, *inode; -+ struct super_block *sb; -+ struct file *h_file; ++ struct au_dyaop *dyaop = (void *)key; ++ const struct address_space_operations *h_aop = h_op; ++ DyDbgDeclare(cnt); + -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ AuDebugOn(au_special_file(inode->i_mode)); -+ bstart = au_fbstart(file); -+ err = au_test_ro(sb, bstart, inode); -+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) { -+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0); -+ goto out; -+ } ++ AuDbg("%s\n", au_sbtype(h_sb)); + -+ /* need to cpup or reopen */ -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ bcpup = err; -+ if (unlikely(err < 0)) -+ goto out_dgrade; -+ err = 0; ++ DySetAop(writepage); ++ DySetAopForce(readpage); /* force */ ++ DySetAop(writepages); ++ DySetAop(set_page_dirty); ++ DySetAop(readpages); ++ DySetAop(write_begin); ++ DySetAop(write_end); ++ DySetAop(bmap); ++ DySetAop(invalidatepage); ++ DySetAop(releasepage); ++ DySetAop(freepage); ++ /* these two will be changed according to an aufs mount option */ ++ DySetAop(direct_IO); ++ DySetAop(get_xip_mem); ++ DySetAop(migratepage); ++ DySetAop(launder_page); ++ DySetAop(is_partially_uptodate); ++ DySetAop(error_remove_page); ++ DySetAop(swap_activate); ++ DySetAop(swap_deactivate); + -+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) { -+ err = au_cpup_dirs(dentry, bcpup); -+ if (unlikely(err)) -+ goto out_dgrade; -+ } ++ DyDbgSize(cnt, *h_aop); ++ dyaop->da_get_xip_mem = h_aop->get_xip_mem; ++} + -+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_dgrade; ++/* ---------------------------------------------------------------------- */ + -+ h_dentry = au_hf_top(file)->f_dentry; -+ h_inode = h_dentry->d_inode; -+ dbstart = au_dbstart(dentry); -+ if (dbstart <= bcpup) { -+ h_dentry = au_h_dptr(dentry, bcpup); -+ AuDebugOn(!h_dentry); -+ h_inode = h_dentry->d_inode; -+ AuDebugOn(!h_inode); -+ bstart = bcpup; -+ } ++static void dy_bug(struct kref *kref) ++{ ++ BUG(); ++} + -+ if (dbstart <= bcpup /* just reopen */ -+ || !d_unhashed(dentry) /* copyup and reopen */ -+ ) { -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ h_file = au_h_open_pre(dentry, bstart); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else { -+ di_downgrade_lock(parent, AuLock_IR); -+ if (dbstart > bcpup) -+ err = au_sio_cpup_simple(dentry, bcpup, len, -+ AuCpup_DTIME); -+ if (!err) -+ err = au_reopen_nondir(file); ++static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br) ++{ ++ struct au_dykey *key, *old; ++ struct au_splhead *spl; ++ struct op { ++ unsigned int sz; ++ void (*set)(struct au_dykey *key, const void *h_op, ++ struct super_block *h_sb __maybe_unused); ++ }; ++ static const struct op a[] = { ++ [AuDy_AOP] = { ++ .sz = sizeof(struct au_dyaop), ++ .set = dy_aop + } -+ mutex_unlock(&h_inode->i_mutex); -+ au_h_open_post(dentry, bstart, h_file); -+ } else { /* copyup as wh and reopen */ -+ /* -+ * since writable hfsplus branch is not supported, -+ * h_open_pre/post() are unnecessary. -+ */ -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ err = au_ready_to_write_wh(file, len, bcpup); -+ di_downgrade_lock(parent, AuLock_IR); -+ mutex_unlock(&h_inode->i_mutex); ++ }; ++ const struct op *p; ++ ++ spl = dynop + op->dy_type; ++ key = dy_gfind_get(spl, op->dy_hop); ++ if (key) ++ goto out_add; /* success */ ++ ++ p = a + op->dy_type; ++ key = kzalloc(p->sz, GFP_NOFS); ++ if (unlikely(!key)) { ++ key = ERR_PTR(-ENOMEM); ++ goto out; + } + -+ if (!err) { -+ au_pin_set_parent_lflag(pin, /*lflag*/0); -+ goto out_dput; /* success */ ++ key->dk_op.dy_hop = op->dy_hop; ++ kref_init(&key->dk_kref); ++ p->set(key, op->dy_hop, au_br_sb(br)); ++ old = dy_gadd(spl, key); ++ if (old) { ++ kfree(key); ++ key = old; + } -+ au_unpin(pin); -+ goto out_unlock; + -+out_dgrade: -+ di_downgrade_lock(parent, AuLock_IR); -+out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+out_dput: -+ dput(parent); ++out_add: ++ old = dy_bradd(br, key); ++ if (old) ++ /* its ref-count should never be zero here */ ++ kref_put(&key->dk_kref, dy_bug); +out: -+ return err; ++ return key; +} + +/* ---------------------------------------------------------------------- */ -+ -+int au_do_flush(struct file *file, fl_owner_t id, -+ int (*flush)(struct file *file, fl_owner_t id)) ++/* ++ * Aufs prohibits O_DIRECT by defaut even if the branch supports it. ++ * This behaviour is neccessary to return an error from open(O_DIRECT) instead ++ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes ++ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error. ++ * See the aufs manual in detail. ++ * ++ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the ++ * performance of fadvise() and madvise() may be affected. ++ */ ++static void dy_adx(struct au_dyaop *dyaop, int do_dx) +{ -+ int err; -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct inode *inode; ++ if (!do_dx) { ++ dyaop->da_op.direct_IO = NULL; ++ dyaop->da_op.get_xip_mem = NULL; ++ } else { ++ dyaop->da_op.direct_IO = aufs_aop.direct_IO; ++ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem; ++ if (!dyaop->da_get_xip_mem) ++ dyaop->da_op.get_xip_mem = NULL; ++ } ++} + -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ si_noflush_read_lock(sb); -+ fi_read_lock(file); -+ ii_read_lock_child(inode); ++static struct au_dyaop *dy_aget(struct au_branch *br, ++ const struct address_space_operations *h_aop, ++ int do_dx) ++{ ++ struct au_dyaop *dyaop; ++ struct au_dynop op; + -+ err = flush(file, id); -+ au_cpup_attr_timesizes(inode); ++ op.dy_type = AuDy_AOP; ++ op.dy_haop = h_aop; ++ dyaop = (void *)dy_get(&op, br); ++ if (IS_ERR(dyaop)) ++ goto out; ++ dy_adx(dyaop, do_dx); + -+ ii_read_unlock(inode); -+ fi_read_unlock(file); -+ si_read_unlock(sb); -+ return err; ++out: ++ return dyaop; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static int au_file_refresh_by_inode(struct file *file, int *need_reopen) ++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct au_pin pin; -+ struct au_finfo *finfo; -+ struct dentry *dentry, *parent, *hi_wh; -+ struct inode *inode; ++ int err, do_dx; + struct super_block *sb; ++ struct au_branch *br; ++ struct au_dyaop *dyaop; + -+ FiMustWriteLock(file); -+ -+ err = 0; -+ finfo = au_fi(file); -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ bstart = au_ibstart(inode); -+ if (bstart == finfo->fi_btop || IS_ROOT(dentry)) -+ goto out; -+ -+ parent = dget_parent(dentry); -+ if (au_test_ro(sb, bstart, inode)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ bstart = err; -+ di_read_unlock(parent, !AuLock_IR); -+ if (unlikely(err < 0)) -+ goto out_parent; -+ err = 0; -+ } ++ AuDebugOn(!S_ISREG(h_inode->i_mode)); ++ IiMustWriteLock(inode); + -+ di_read_lock_parent(parent, AuLock_IR); -+ hi_wh = au_hi_wh(inode, bstart); -+ if (!S_ISDIR(inode->i_mode) -+ && au_opt_test(au_mntflags(sb), PLINK) -+ && au_plink_test(inode) -+ && !d_unhashed(dentry)) { -+ err = au_test_and_cpup_dirs(dentry, bstart); -+ if (unlikely(err)) -+ goto out_unlock; ++ sb = inode->i_sb; ++ br = au_sbr(sb, bindex); ++ do_dx = !!au_opt_test(au_mntflags(sb), DIO); ++ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx); ++ err = PTR_ERR(dyaop); ++ if (IS_ERR(dyaop)) ++ /* unnecessary to call dy_fput() */ ++ goto out; + -+ /* always superio. */ -+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (!err) -+ err = au_sio_cpup_simple(dentry, bstart, -1, -+ AuCpup_DTIME); -+ au_unpin(&pin); -+ } else if (hi_wh) { -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, bstart, hi_wh); -+ *need_reopen = 0; -+ } ++ err = 0; ++ inode->i_mapping->a_ops = &dyaop->da_op; + -+out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+out_parent: -+ dput(parent); +out: + return err; +} + -+static void au_do_refresh_dir(struct file *file) ++/* ++ * Is it safe to replace a_ops during the inode/file is in operation? ++ * Yes, I hope so. ++ */ ++int au_dy_irefresh(struct inode *inode) +{ -+ aufs_bindex_t bindex, bend, new_bindex, brid; -+ struct au_hfile *p, tmp, *q; -+ struct au_finfo *finfo; -+ struct super_block *sb; -+ struct au_fidir *fidir; ++ int err; ++ aufs_bindex_t bstart; ++ struct inode *h_inode; + -+ FiMustWriteLock(file); ++ err = 0; ++ if (S_ISREG(inode->i_mode)) { ++ bstart = au_ibstart(inode); ++ h_inode = au_h_iptr(inode, bstart); ++ err = au_dy_iaop(inode, bstart, h_inode); ++ } ++ return err; ++} + -+ sb = file->f_dentry->d_sb; -+ finfo = au_fi(file); -+ fidir = finfo->fi_hdir; -+ AuDebugOn(!fidir); -+ p = fidir->fd_hfile + finfo->fi_btop; -+ brid = p->hf_br->br_id; -+ bend = fidir->fd_bbot; -+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) { -+ if (!p->hf_file) -+ continue; ++void au_dy_arefresh(int do_dx) ++{ ++ struct au_splhead *spl; ++ struct list_head *head; ++ struct au_dykey *key; + -+ new_bindex = au_br_index(sb, p->hf_br->br_id); -+ if (new_bindex == bindex) -+ continue; -+ if (new_bindex < 0) { -+ au_set_h_fptr(file, bindex, NULL); -+ continue; -+ } ++ spl = dynop + AuDy_AOP; ++ head = &spl->head; ++ spin_lock(&spl->spin); ++ list_for_each_entry(key, head, dk_list) ++ dy_adx((void *)key, do_dx); ++ spin_unlock(&spl->spin); ++} + -+ /* swap two lower inode, and loop again */ -+ q = fidir->fd_hfile + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hf_file) { -+ bindex--; -+ p--; -+ } -+ } ++/* ---------------------------------------------------------------------- */ + -+ p = fidir->fd_hfile; -+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) { -+ bend = au_sbend(sb); -+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend; -+ finfo->fi_btop++, p++) -+ if (p->hf_file) { -+ if (p->hf_file->f_dentry -+ && p->hf_file->f_dentry->d_inode) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ } else { -+ bend = au_br_index(sb, brid); -+ for (finfo->fi_btop = 0; finfo->fi_btop < bend; -+ finfo->fi_btop++, p++) -+ if (p->hf_file) -+ au_hfput(p, file); -+ bend = au_sbend(sb); -+ } ++void __init au_dy_init(void) ++{ ++ int i; + -+ p = fidir->fd_hfile + bend; -+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop; -+ fidir->fd_bbot--, p--) -+ if (p->hf_file) { -+ if (p->hf_file->f_dentry -+ && p->hf_file->f_dentry->d_inode) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop); ++ /* make sure that 'struct au_dykey *' can be any type */ ++ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key)); ++ ++ for (i = 0; i < AuDyLast; i++) ++ au_spl_init(dynop + i); +} + ++void au_dy_fin(void) ++{ ++ int i; ++ ++ for (i = 0; i < AuDyLast; i++) ++ WARN_ON(!list_empty(&dynop[i].head)); ++} +--- /dev/null ++++ linux-3.9/fs/aufs/dynop.h 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,76 @@ +/* -+ * after branch manipulating, refresh the file. ++ * Copyright (C) 2010-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ -+static int refresh_file(struct file *file, int (*reopen)(struct file *file)) -+{ -+ int err, need_reopen; -+ aufs_bindex_t bend, bindex; -+ struct dentry *dentry; -+ struct au_finfo *finfo; -+ struct au_hfile *hfile; + -+ dentry = file->f_dentry; -+ finfo = au_fi(file); -+ if (!finfo->fi_hdir) { -+ hfile = &finfo->fi_htop; -+ AuDebugOn(!hfile->hf_file); -+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id); -+ AuDebugOn(bindex < 0); -+ if (bindex != finfo->fi_btop) -+ au_set_fbstart(file, bindex); -+ } else { -+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ au_do_refresh_dir(file); -+ } ++/* ++ * dynamically customizable operations (for regular files only) ++ */ + -+ err = 0; -+ need_reopen = 1; -+ if (!au_test_mmapped(file)) -+ err = au_file_refresh_by_inode(file, &need_reopen); -+ if (!err && need_reopen && !d_unlinked(dentry)) -+ err = reopen(file); -+ if (!err) { -+ au_update_figen(file); -+ goto out; /* success */ -+ } ++#ifndef __AUFS_DYNOP_H__ ++#define __AUFS_DYNOP_H__ + -+ /* error, close all lower files */ -+ if (finfo->fi_hdir) { -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ } ++#ifdef __KERNEL__ + -+out: -+ return err; -+} ++#include "inode.h" + -+/* common function to regular file and dir */ -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock) -+{ -+ int err; -+ unsigned int sigen, figen; -+ aufs_bindex_t bstart; -+ unsigned char pseudo_link; -+ struct dentry *dentry; -+ struct inode *inode; ++enum {AuDy_AOP, AuDyLast}; + -+ err = 0; -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ AuDebugOn(au_special_file(inode->i_mode)); -+ sigen = au_sigen(dentry->d_sb); -+ fi_write_lock(file); -+ figen = au_figen(file); -+ di_write_lock_child(dentry); -+ bstart = au_dbstart(dentry); -+ pseudo_link = (bstart != au_ibstart(inode)); -+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ goto out; /* success */ -+ } ++struct au_dynop { ++ int dy_type; ++ union { ++ const void *dy_hop; ++ const struct address_space_operations *dy_haop; ++ }; ++}; + -+ AuDbg("sigen %d, figen %d\n", sigen, figen); -+ if (au_digen_test(dentry, sigen)) { -+ err = au_reval_dpath(dentry, sigen); -+ AuDebugOn(!err && au_digen_test(dentry, sigen)); -+ } ++struct au_dykey { ++ union { ++ struct list_head dk_list; ++ struct rcu_head dk_rcu; ++ }; ++ struct au_dynop dk_op; + -+ if (!err) -+ err = refresh_file(file, reopen); -+ if (!err) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ } else { -+ di_write_unlock(dentry); -+ fi_write_unlock(file); -+ } ++ /* ++ * during I am in the branch local array, kref is gotten. when the ++ * branch is removed, kref is put. ++ */ ++ struct kref dk_kref; ++}; + -+out: -+ return err; -+} ++/* stop unioning since their sizes are very different from each other */ ++struct au_dyaop { ++ struct au_dykey da_key; ++ struct address_space_operations da_op; /* not const */ ++ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int, ++ void **, unsigned long *); ++}; + +/* ---------------------------------------------------------------------- */ + -+/* cf. aufs_nopage() */ -+/* for madvise(2) */ -+static int aufs_readpage(struct file *file __maybe_unused, struct page *page) -+{ -+ unlock_page(page); -+ return 0; -+} -+ -+/* it will never be called, but necessary to support O_DIRECT */ -+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, -+ const struct iovec *iov, loff_t offset, -+ unsigned long nr_segs) -+{ BUG(); return 0; } -+ -+/* -+ * it will never be called, but madvise and fadvise behaves differently -+ * when get_xip_mem is defined -+ */ -+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, -+ int create, void **kmem, unsigned long *pfn) -+{ BUG(); return 0; } -+ -+/* they will never be called. */ -+#ifdef CONFIG_AUFS_DEBUG -+static int aufs_write_begin(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned flags, -+ struct page **pagep, void **fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_write_end(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned copied, -+ struct page *page, void *fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_writepage(struct page *page, struct writeback_control *wbc) -+{ AuUnsupport(); return 0; } ++/* dynop.c */ ++struct au_branch; ++void au_dy_put(struct au_dykey *key); ++int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode); ++int au_dy_irefresh(struct inode *inode); ++void au_dy_arefresh(int do_dio); + -+static int aufs_set_page_dirty(struct page *page) -+{ AuUnsupport(); return 0; } -+static void aufs_invalidatepage(struct page *page, unsigned long offset) -+{ AuUnsupport(); } -+static int aufs_releasepage(struct page *page, gfp_t gfp) -+{ AuUnsupport(); return 0; } -+static int aufs_migratepage(struct address_space *mapping, struct page *newpage, -+ struct page *page, enum migrate_mode mode) -+{ AuUnsupport(); return 0; } -+static int aufs_launder_page(struct page *page) -+{ AuUnsupport(); return 0; } -+static int aufs_is_partially_uptodate(struct page *page, -+ read_descriptor_t *desc, -+ unsigned long from) -+{ AuUnsupport(); return 0; } -+static int aufs_error_remove_page(struct address_space *mapping, -+ struct page *page) -+{ AuUnsupport(); return 0; } -+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file, -+ sector_t *span) -+{ AuUnsupport(); return 0; } -+static void aufs_swap_deactivate(struct file *file) -+{ AuUnsupport(); } -+#endif /* CONFIG_AUFS_DEBUG */ ++void __init au_dy_init(void); ++void au_dy_fin(void); + -+const struct address_space_operations aufs_aop = { -+ .readpage = aufs_readpage, -+ .direct_IO = aufs_direct_IO, -+ .get_xip_mem = aufs_get_xip_mem, -+#ifdef CONFIG_AUFS_DEBUG -+ .writepage = aufs_writepage, -+ /* no writepages, because of writepage */ -+ .set_page_dirty = aufs_set_page_dirty, -+ /* no readpages, because of readpage */ -+ .write_begin = aufs_write_begin, -+ .write_end = aufs_write_end, -+ /* no bmap, no block device */ -+ .invalidatepage = aufs_invalidatepage, -+ .releasepage = aufs_releasepage, -+ .migratepage = aufs_migratepage, -+ .launder_page = aufs_launder_page, -+ .is_partially_uptodate = aufs_is_partially_uptodate, -+ .error_remove_page = aufs_error_remove_page, -+ .swap_activate = aufs_swap_activate, -+ .swap_deactivate = aufs_swap_deactivate -+#endif /* CONFIG_AUFS_DEBUG */ -+}; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DYNOP_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/file.h 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,298 @@ ++++ linux-3.9/fs/aufs/export.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,826 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -10134,448 +9208,816 @@ + */ + +/* -+ * file operations ++ * export via nfs + */ + -+#ifndef __AUFS_FILE_H__ -+#define __AUFS_FILE_H__ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "../fs/mount.h" ++#include "aufs.h" ++ ++union conv { ++#ifdef CONFIG_AUFS_INO_T_64 ++ __u32 a[2]; ++#else ++ __u32 a[1]; ++#endif ++ ino_t ino; ++}; ++ ++static ino_t decode_ino(__u32 *a) ++{ ++ union conv u; ++ ++ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a)); ++ u.a[0] = a[0]; ++#ifdef CONFIG_AUFS_INO_T_64 ++ u.a[1] = a[1]; ++#endif ++ return u.ino; ++} ++ ++static void encode_ino(__u32 *a, ino_t ino) ++{ ++ union conv u; ++ ++ u.ino = ino; ++ a[0] = u.a[0]; ++#ifdef CONFIG_AUFS_INO_T_64 ++ a[1] = u.a[1]; ++#endif ++} ++ ++/* NFS file handle */ ++enum { ++ Fh_br_id, ++ Fh_sigen, ++#ifdef CONFIG_AUFS_INO_T_64 ++ /* support 64bit inode number */ ++ Fh_ino1, ++ Fh_ino2, ++ Fh_dir_ino1, ++ Fh_dir_ino2, ++#else ++ Fh_ino1, ++ Fh_dir_ino1, ++#endif ++ Fh_igen, ++ Fh_h_type, ++ Fh_tail, ++ ++ Fh_ino = Fh_ino1, ++ Fh_dir_ino = Fh_dir_ino1 ++}; ++ ++static int au_test_anon(struct dentry *dentry) ++{ ++ /* note: read d_flags without d_lock */ ++ return !!(dentry->d_flags & DCACHE_DISCONNECTED); ++} ++ ++int au_test_nfsd(void) ++{ ++ int ret; ++ struct task_struct *tsk = current; ++ char comm[sizeof(tsk->comm)]; ++ ++ ret = 0; ++ if (tsk->flags & PF_KTHREAD) { ++ get_task_comm(comm, tsk); ++ ret = !strcmp(comm, "nfsd"); ++ } ++ ++ return ret; ++} ++ ++/* ---------------------------------------------------------------------- */ ++/* inode generation external table */ ++ ++void au_xigen_inc(struct inode *inode) ++{ ++ loff_t pos; ++ ssize_t sz; ++ __u32 igen; ++ struct super_block *sb; ++ struct au_sbinfo *sbinfo; ++ ++ sb = inode->i_sb; ++ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO)); ++ ++ sbinfo = au_sbi(sb); ++ pos = inode->i_ino; ++ pos *= sizeof(igen); ++ igen = inode->i_generation + 1; ++ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen, ++ sizeof(igen), &pos); ++ if (sz == sizeof(igen)) ++ return; /* success */ ++ ++ if (unlikely(sz >= 0)) ++ AuIOErr("xigen error (%zd)\n", sz); ++} ++ ++int au_xigen_new(struct inode *inode) ++{ ++ int err; ++ loff_t pos; ++ ssize_t sz; ++ struct super_block *sb; ++ struct au_sbinfo *sbinfo; ++ struct file *file; ++ ++ err = 0; ++ /* todo: dirty, at mount time */ ++ if (inode->i_ino == AUFS_ROOT_INO) ++ goto out; ++ sb = inode->i_sb; ++ SiMustAnyLock(sb); ++ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) ++ goto out; ++ ++ err = -EFBIG; ++ pos = inode->i_ino; ++ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) { ++ AuIOErr1("too large i%lld\n", pos); ++ goto out; ++ } ++ pos *= sizeof(inode->i_generation); ++ ++ err = 0; ++ sbinfo = au_sbi(sb); ++ file = sbinfo->si_xigen; ++ BUG_ON(!file); ++ ++ if (vfsub_f_size_read(file) ++ < pos + sizeof(inode->i_generation)) { ++ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next); ++ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation, ++ sizeof(inode->i_generation), &pos); ++ } else ++ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation, ++ sizeof(inode->i_generation), &pos); ++ if (sz == sizeof(inode->i_generation)) ++ goto out; /* success */ ++ ++ err = sz; ++ if (unlikely(sz >= 0)) { ++ err = -EIO; ++ AuIOErr("xigen error (%zd)\n", sz); ++ } ++ ++out: ++ return err; ++} + -+#ifdef __KERNEL__ ++int au_xigen_set(struct super_block *sb, struct file *base) ++{ ++ int err; ++ struct au_sbinfo *sbinfo; ++ struct file *file; + -+#include -+#include -+#include -+#include "rwsem.h" ++ SiMustWriteLock(sb); + -+struct au_branch; -+struct au_hfile { -+ struct file *hf_file; -+ struct au_branch *hf_br; -+}; ++ sbinfo = au_sbi(sb); ++ file = au_xino_create2(base, sbinfo->si_xigen); ++ err = PTR_ERR(file); ++ if (IS_ERR(file)) ++ goto out; ++ err = 0; ++ if (sbinfo->si_xigen) ++ fput(sbinfo->si_xigen); ++ sbinfo->si_xigen = file; + -+struct au_vdir; -+struct au_fidir { -+ aufs_bindex_t fd_bbot; -+ aufs_bindex_t fd_nent; -+ struct au_vdir *fd_vdir_cache; -+ struct au_hfile fd_hfile[]; -+}; ++out: ++ return err; ++} + -+static inline int au_fidir_sz(int nent) ++void au_xigen_clr(struct super_block *sb) +{ -+ AuDebugOn(nent < 0); -+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent; ++ struct au_sbinfo *sbinfo; ++ ++ SiMustWriteLock(sb); ++ ++ sbinfo = au_sbi(sb); ++ if (sbinfo->si_xigen) { ++ fput(sbinfo->si_xigen); ++ sbinfo->si_xigen = NULL; ++ } +} + -+struct au_finfo { -+ atomic_t fi_generation; ++/* ---------------------------------------------------------------------- */ + -+ struct au_rwsem fi_rwsem; -+ aufs_bindex_t fi_btop; ++static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino, ++ ino_t dir_ino) ++{ ++ struct dentry *dentry, *d; ++ struct inode *inode; ++ unsigned int sigen; + -+ /* do not union them */ -+ struct { /* for non-dir */ -+ struct au_hfile fi_htop; -+ atomic_t fi_mmapped; -+ }; -+ struct au_fidir *fi_hdir; /* for dir only */ -+} ____cacheline_aligned_in_smp; ++ dentry = NULL; ++ inode = ilookup(sb, ino); ++ if (!inode) ++ goto out; ++ ++ dentry = ERR_PTR(-ESTALE); ++ sigen = au_sigen(sb); ++ if (unlikely(is_bad_inode(inode) ++ || IS_DEADDIR(inode) ++ || sigen != au_iigen(inode, NULL))) ++ goto out_iput; ++ ++ dentry = NULL; ++ if (!dir_ino || S_ISDIR(inode->i_mode)) ++ dentry = d_find_alias(inode); ++ else { ++ spin_lock(&inode->i_lock); ++ hlist_for_each_entry(d, &inode->i_dentry, d_alias) { ++ spin_lock(&d->d_lock); ++ if (!au_test_anon(d) ++ && d->d_parent->d_inode->i_ino == dir_ino) { ++ dentry = dget_dlock(d); ++ spin_unlock(&d->d_lock); ++ break; ++ } ++ spin_unlock(&d->d_lock); ++ } ++ spin_unlock(&inode->i_lock); ++ } ++ if (unlikely(dentry && au_digen_test(dentry, sigen))) { ++ /* need to refresh */ ++ dput(dentry); ++ dentry = NULL; ++ } ++ ++out_iput: ++ iput(inode); ++out: ++ AuTraceErrPtr(dentry); ++ return dentry; ++} + +/* ---------------------------------------------------------------------- */ + -+/* file.c */ -+extern const struct address_space_operations aufs_aop; -+unsigned int au_file_roflags(unsigned int flags); -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file); -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags), -+ struct au_fidir *fidir); -+int au_reopen_nondir(struct file *file); -+struct au_pin; -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin); -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock); -+int au_do_flush(struct file *file, fl_owner_t id, -+ int (*flush)(struct file *file, fl_owner_t id)); ++/* todo: dirty? */ ++/* if exportfs_decode_fh() passed vfsmount*, we could be happy */ + -+/* poll.c */ -+#ifdef CONFIG_AUFS_POLL -+unsigned int aufs_poll(struct file *file, poll_table *wait); -+#endif ++struct au_compare_mnt_args { ++ /* input */ ++ struct super_block *sb; + -+#ifdef CONFIG_AUFS_BR_HFSPLUS -+/* hfsplus.c */ -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex); -+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file); -+#else -+static inline -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex) ++ /* output */ ++ struct vfsmount *mnt; ++}; ++ ++static int au_compare_mnt(struct vfsmount *mnt, void *arg) +{ -+ return NULL; ++ struct au_compare_mnt_args *a = arg; ++ ++ if (mnt->mnt_sb != a->sb) ++ return 0; ++ a->mnt = mntget(mnt); ++ return 1; +} + -+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file); -+#endif ++static struct vfsmount *au_mnt_get(struct super_block *sb) ++{ ++ int err; ++ struct path root; ++ struct au_compare_mnt_args args = { ++ .sb = sb ++ }; + -+/* f_op.c */ -+extern const struct file_operations aufs_file_fop; -+int au_do_open_nondir(struct file *file, int flags); -+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file); ++ get_fs_root(current->fs, &root); ++ br_read_lock(&vfsmount_lock); ++ err = iterate_mounts(au_compare_mnt, &args, root.mnt); ++ br_read_unlock(&vfsmount_lock); ++ path_put(&root); ++ AuDebugOn(!err); ++ AuDebugOn(!args.mnt); ++ return args.mnt; ++} + -+#ifdef CONFIG_AUFS_SP_IATTR -+/* f_op_sp.c */ -+int au_special_file(umode_t mode); -+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev); -+#else -+AuStubInt0(au_special_file, umode_t mode) -+static inline void au_init_special_fop(struct inode *inode, umode_t mode, -+ dev_t rdev) ++struct au_nfsd_si_lock { ++ unsigned int sigen; ++ aufs_bindex_t bindex, br_id; ++ unsigned char force_lock; ++}; ++ ++static int si_nfsd_read_lock(struct super_block *sb, ++ struct au_nfsd_si_lock *nsi_lock) +{ -+ init_special_inode(inode, mode, rdev); ++ int err; ++ aufs_bindex_t bindex; ++ ++ si_read_lock(sb, AuLock_FLUSH); ++ ++ /* branch id may be wrapped around */ ++ err = 0; ++ bindex = au_br_index(sb, nsi_lock->br_id); ++ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb)) ++ goto out; /* success */ ++ ++ err = -ESTALE; ++ bindex = -1; ++ if (!nsi_lock->force_lock) ++ si_read_unlock(sb); ++ ++out: ++ nsi_lock->bindex = bindex; ++ return err; +} -+#endif + -+/* finfo.c */ -+void au_hfput(struct au_hfile *hf, struct file *file); -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, -+ struct file *h_file); ++struct find_name_by_ino { ++ int called, found; ++ ino_t ino; ++ char *name; ++ int namelen; ++}; + -+void au_update_figen(struct file *file); -+struct au_fidir *au_fidir_alloc(struct super_block *sb); -+int au_fidir_realloc(struct au_finfo *finfo, int nbr); ++static int ++find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset, ++ u64 ino, unsigned int d_type) ++{ ++ struct find_name_by_ino *a = arg; ++ ++ a->called++; ++ if (a->ino != ino) ++ return 0; ++ ++ memcpy(a->name, name, namelen); ++ a->namelen = namelen; ++ a->found = 1; ++ return 1; ++} ++ ++static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino, ++ struct au_nfsd_si_lock *nsi_lock) ++{ ++ struct dentry *dentry, *parent; ++ struct file *file; ++ struct inode *dir; ++ struct find_name_by_ino arg; ++ int err; ++ ++ parent = path->dentry; ++ if (nsi_lock) ++ si_read_unlock(parent->d_sb); ++ file = vfsub_dentry_open(path, au_dir_roflags); ++ dentry = (void *)file; ++ if (IS_ERR(file)) ++ goto out; + -+void au_fi_init_once(void *_fi); -+void au_finfo_fin(struct file *file); -+int au_finfo_init(struct file *file, struct au_fidir *fidir); ++ dentry = ERR_PTR(-ENOMEM); ++ arg.name = (void *)__get_free_page(GFP_NOFS); ++ if (unlikely(!arg.name)) ++ goto out_file; ++ arg.ino = ino; ++ arg.found = 0; ++ do { ++ arg.called = 0; ++ /* smp_mb(); */ ++ err = vfsub_readdir(file, find_name_by_ino, &arg); ++ } while (!err && !arg.found && arg.called); ++ dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_name; ++ /* instead of ENOENT */ ++ dentry = ERR_PTR(-ESTALE); ++ if (!arg.found) ++ goto out_name; + -+/* ioctl.c */ -+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg); -+#endif ++ /* do not call vfsub_lkup_one() */ ++ dir = parent->d_inode; ++ mutex_lock(&dir->i_mutex); ++ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen); ++ mutex_unlock(&dir->i_mutex); ++ AuTraceErrPtr(dentry); ++ if (IS_ERR(dentry)) ++ goto out_name; ++ AuDebugOn(au_test_anon(dentry)); ++ if (unlikely(!dentry->d_inode)) { ++ dput(dentry); ++ dentry = ERR_PTR(-ENOENT); ++ } + -+/* ---------------------------------------------------------------------- */ ++out_name: ++ free_page((unsigned long)arg.name); ++out_file: ++ fput(file); ++out: ++ if (unlikely(nsi_lock ++ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0)) ++ if (!IS_ERR(dentry)) { ++ dput(dentry); ++ dentry = ERR_PTR(-ESTALE); ++ } ++ AuTraceErrPtr(dentry); ++ return dentry; ++} + -+static inline struct au_finfo *au_fi(struct file *file) ++static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino, ++ ino_t dir_ino, ++ struct au_nfsd_si_lock *nsi_lock) +{ -+ return file->private_data; -+} ++ struct dentry *dentry; ++ struct path path; + -+/* ---------------------------------------------------------------------- */ ++ if (dir_ino != AUFS_ROOT_INO) { ++ path.dentry = decode_by_ino(sb, dir_ino, 0); ++ dentry = path.dentry; ++ if (!path.dentry || IS_ERR(path.dentry)) ++ goto out; ++ AuDebugOn(au_test_anon(path.dentry)); ++ } else ++ path.dentry = dget(sb->s_root); + -+/* -+ * fi_read_lock, fi_write_lock, -+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem); ++ path.mnt = au_mnt_get(sb); ++ dentry = au_lkup_by_ino(&path, ino, nsi_lock); ++ path_put(&path); + -+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem) -+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem) -+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem) ++out: ++ AuTraceErrPtr(dentry); ++ return dentry; ++} + +/* ---------------------------------------------------------------------- */ + -+/* todo: hard/soft set? */ -+static inline aufs_bindex_t au_fbstart(struct file *file) -+{ -+ FiMustAnyLock(file); -+ return au_fi(file)->fi_btop; -+} -+ -+static inline aufs_bindex_t au_fbend_dir(struct file *file) ++static int h_acceptable(void *expv, struct dentry *dentry) +{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_bbot; ++ return 1; +} + -+static inline struct au_vdir *au_fvdir_cache(struct file *file) ++static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath, ++ char *buf, int len, struct super_block *sb) +{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_vdir_cache; -+} ++ char *p; ++ int n; ++ struct path path; + -+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex) -+{ -+ FiMustWriteLock(file); -+ au_fi(file)->fi_btop = bindex; -+} ++ p = d_path(h_rootpath, buf, len); ++ if (IS_ERR(p)) ++ goto out; ++ n = strlen(p); + -+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex) -+{ -+ FiMustWriteLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ au_fi(file)->fi_hdir->fd_bbot = bindex; -+} ++ path.mnt = h_rootpath->mnt; ++ path.dentry = h_parent; ++ p = d_path(&path, buf, len); ++ if (IS_ERR(p)) ++ goto out; ++ if (n != 1) ++ p += n; + -+static inline void au_set_fvdir_cache(struct file *file, -+ struct au_vdir *vdir_cache) -+{ -+ FiMustWriteLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache; -+} ++ path.mnt = au_mnt_get(sb); ++ path.dentry = sb->s_root; ++ p = d_path(&path, buf, len - strlen(p)); ++ mntput(path.mnt); ++ if (IS_ERR(p)) ++ goto out; ++ if (n != 1) ++ p[strlen(p)] = '/'; + -+static inline struct file *au_hf_top(struct file *file) -+{ -+ FiMustAnyLock(file); -+ AuDebugOn(au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_htop.hf_file; ++out: ++ AuTraceErrPtr(p); ++ return p; +} + -+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex) ++static ++struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh, ++ int fh_len, struct au_nfsd_si_lock *nsi_lock) +{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file; -+} ++ struct dentry *dentry, *h_parent, *root; ++ struct super_block *h_sb; ++ char *pathname, *p; ++ struct vfsmount *h_mnt; ++ struct au_branch *br; ++ int err; ++ struct path path; + -+/* todo: memory barrier? */ -+static inline unsigned int au_figen(struct file *f) -+{ -+ return atomic_read(&au_fi(f)->fi_generation); -+} ++ br = au_sbr(sb, nsi_lock->bindex); ++ h_mnt = au_br_mnt(br); ++ h_sb = h_mnt->mnt_sb; ++ /* todo: call lower fh_to_dentry()? fh_to_parent()? */ ++ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail), ++ fh_len - Fh_tail, fh[Fh_h_type], ++ h_acceptable, /*context*/NULL); ++ dentry = h_parent; ++ if (unlikely(!h_parent || IS_ERR(h_parent))) { ++ AuWarn1("%s decode_fh failed, %ld\n", ++ au_sbtype(h_sb), PTR_ERR(h_parent)); ++ goto out; ++ } ++ dentry = NULL; ++ if (unlikely(au_test_anon(h_parent))) { ++ AuWarn1("%s decode_fh returned a disconnected dentry\n", ++ au_sbtype(h_sb)); ++ goto out_h_parent; ++ } + -+static inline void au_set_mmapped(struct file *f) -+{ -+ if (atomic_inc_return(&au_fi(f)->fi_mmapped)) -+ return; -+ pr_warn("fi_mmapped wrapped around\n"); -+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped)) -+ ; -+} ++ dentry = ERR_PTR(-ENOMEM); ++ pathname = (void *)__get_free_page(GFP_NOFS); ++ if (unlikely(!pathname)) ++ goto out_h_parent; + -+static inline void au_unset_mmapped(struct file *f) -+{ -+ atomic_dec(&au_fi(f)->fi_mmapped); -+} ++ root = sb->s_root; ++ path.mnt = h_mnt; ++ di_read_lock_parent(root, !AuLock_IR); ++ path.dentry = au_h_dptr(root, nsi_lock->bindex); ++ di_read_unlock(root, !AuLock_IR); ++ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb); ++ dentry = (void *)p; ++ if (IS_ERR(p)) ++ goto out_pathname; + -+static inline int au_test_mmapped(struct file *f) -+{ -+ return atomic_read(&au_fi(f)->fi_mmapped); -+} ++ si_read_unlock(sb); ++ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); ++ dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_relock; + -+/* customize vma->vm_file */ ++ dentry = ERR_PTR(-ENOENT); ++ AuDebugOn(au_test_anon(path.dentry)); ++ if (unlikely(!path.dentry->d_inode)) ++ goto out_path; + -+static inline void au_do_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ struct file *f; ++ if (ino != path.dentry->d_inode->i_ino) ++ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL); ++ else ++ dentry = dget(path.dentry); + -+ f = vma->vm_file; -+ get_file(file); -+ vma->vm_file = file; -+ fput(f); ++out_path: ++ path_put(&path); ++out_relock: ++ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0)) ++ if (!IS_ERR(dentry)) { ++ dput(dentry); ++ dentry = ERR_PTR(-ESTALE); ++ } ++out_pathname: ++ free_page((unsigned long)pathname); ++out_h_parent: ++ dput(h_parent); ++out: ++ AuTraceErrPtr(dentry); ++ return dentry; +} + -+#ifdef CONFIG_MMU -+#define AuDbgVmRegion(file, vma) do {} while (0) ++/* ---------------------------------------------------------------------- */ + -+static inline void au_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) ++static struct dentry * ++aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, ++ int fh_type) +{ -+ au_do_vm_file_reset(vma, file); -+} -+#else -+#define AuDbgVmRegion(file, vma) \ -+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file)) ++ struct dentry *dentry; ++ __u32 *fh = fid->raw; ++ struct au_branch *br; ++ ino_t ino, dir_ino; ++ struct au_nfsd_si_lock nsi_lock = { ++ .force_lock = 0 ++ }; + -+static inline void au_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ struct file *f; ++ dentry = ERR_PTR(-ESTALE); ++ /* it should never happen, but the file handle is unreliable */ ++ if (unlikely(fh_len < Fh_tail)) ++ goto out; ++ nsi_lock.sigen = fh[Fh_sigen]; ++ nsi_lock.br_id = fh[Fh_br_id]; + -+ au_do_vm_file_reset(vma, file); -+ f = vma->vm_region->vm_file; -+ get_file(file); -+ vma->vm_region->vm_file = file; -+ fput(f); -+} -+#endif /* CONFIG_MMU */ ++ /* branch id may be wrapped around */ ++ br = NULL; ++ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock))) ++ goto out; ++ nsi_lock.force_lock = 1; + -+/* handle vma->vm_prfile */ -+static inline void au_vm_prfile_set(struct vm_area_struct *vma, -+ struct file *file) -+{ -+#ifdef CONFIG_AUFS_PROC_MAP -+ get_file(file); -+ vma->vm_prfile = file; -+#ifndef CONFIG_MMU -+ get_file(file); -+ vma->vm_region->vm_prfile = file; -+#endif -+#endif -+} ++ /* is this inode still cached? */ ++ ino = decode_ino(fh + Fh_ino); ++ /* it should never happen */ ++ if (unlikely(ino == AUFS_ROOT_INO)) ++ goto out; + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_FILE_H__ */ ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/finfo.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,157 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ ++ dir_ino = decode_ino(fh + Fh_dir_ino); ++ dentry = decode_by_ino(sb, ino, dir_ino); ++ if (IS_ERR(dentry)) ++ goto out_unlock; ++ if (dentry) ++ goto accept; + -+/* -+ * file private data -+ */ ++ /* is the parent dir cached? */ ++ br = au_sbr(sb, nsi_lock.bindex); ++ atomic_inc(&br->br_count); ++ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock); ++ if (IS_ERR(dentry)) ++ goto out_unlock; ++ if (dentry) ++ goto accept; + -+#include "aufs.h" ++ /* lookup path */ ++ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock); ++ if (IS_ERR(dentry)) ++ goto out_unlock; ++ if (unlikely(!dentry)) ++ /* todo?: make it ESTALE */ ++ goto out_unlock; + -+void au_hfput(struct au_hfile *hf, struct file *file) -+{ -+ /* todo: direct access f_flags */ -+ if (vfsub_file_flags(file) & __FMODE_EXEC) -+ allow_write_access(hf->hf_file); -+ fput(hf->hf_file); -+ hf->hf_file = NULL; -+ atomic_dec(&hf->hf_br->br_count); -+ hf->hf_br = NULL; ++accept: ++ if (!au_digen_test(dentry, au_sigen(sb)) ++ && dentry->d_inode->i_generation == fh[Fh_igen]) ++ goto out_unlock; /* success */ ++ ++ dput(dentry); ++ dentry = ERR_PTR(-ESTALE); ++out_unlock: ++ if (br) ++ atomic_dec(&br->br_count); ++ si_read_unlock(sb); ++out: ++ AuTraceErrPtr(dentry); ++ return dentry; +} + -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) ++#if 0 /* reserved for future use */ ++/* support subtreecheck option */ ++static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid, ++ int fh_len, int fh_type) +{ -+ struct au_finfo *finfo = au_fi(file); -+ struct au_hfile *hf; -+ struct au_fidir *fidir; -+ -+ fidir = finfo->fi_hdir; -+ if (!fidir) { -+ AuDebugOn(finfo->fi_btop != bindex); -+ hf = &finfo->fi_htop; -+ } else -+ hf = fidir->fd_hfile + bindex; ++ struct dentry *parent; ++ __u32 *fh = fid->raw; ++ ino_t dir_ino; + -+ if (hf && hf->hf_file) -+ au_hfput(hf, file); -+ if (val) { -+ FiMustWriteLock(file); -+ hf->hf_file = val; -+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex); -+ } -+} ++ dir_ino = decode_ino(fh + Fh_dir_ino); ++ parent = decode_by_ino(sb, dir_ino, 0); ++ if (IS_ERR(parent)) ++ goto out; ++ if (!parent) ++ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]), ++ dir_ino, fh, fh_len); + -+void au_update_figen(struct file *file) -+{ -+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry)); -+ /* smp_mb(); */ /* atomic_set */ ++out: ++ AuTraceErrPtr(parent); ++ return parent; +} ++#endif + +/* ---------------------------------------------------------------------- */ + -+struct au_fidir *au_fidir_alloc(struct super_block *sb) ++static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, ++ struct inode *dir) +{ -+ struct au_fidir *fidir; -+ int nbr; ++ int err; ++ aufs_bindex_t bindex; ++ struct super_block *sb, *h_sb; ++ struct dentry *dentry, *parent, *h_parent; ++ struct inode *h_dir; ++ struct au_branch *br; + -+ nbr = au_sbend(sb) + 1; -+ if (nbr < 2) -+ nbr = 2; /* initial allocate for 2 branches */ -+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS); -+ if (fidir) { -+ fidir->fd_bbot = -1; -+ fidir->fd_nent = nbr; -+ fidir->fd_vdir_cache = NULL; ++ err = -ENOSPC; ++ if (unlikely(*max_len <= Fh_tail)) { ++ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len); ++ goto out; + } + -+ return fidir; -+} ++ err = FILEID_ROOT; ++ if (inode->i_ino == AUFS_ROOT_INO) { ++ AuDebugOn(inode->i_ino != AUFS_ROOT_INO); ++ goto out; ++ } + -+int au_fidir_realloc(struct au_finfo *finfo, int nbr) -+{ -+ int err; -+ struct au_fidir *fidir, *p; ++ h_parent = NULL; ++ sb = inode->i_sb; ++ err = si_read_lock(sb, AuLock_FLUSH); ++ if (unlikely(err)) ++ goto out; + -+ AuRwMustWriteLock(&finfo->fi_rwsem); -+ fidir = finfo->fi_hdir; -+ AuDebugOn(!fidir); ++#ifdef CONFIG_AUFS_DEBUG ++ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) ++ AuWarn1("NFS-exporting requires xino\n"); ++#endif ++ err = -EIO; ++ parent = NULL; ++ ii_read_lock_child(inode); ++ bindex = au_ibstart(inode); ++ if (!dir) { ++ dentry = d_find_alias(inode); ++ if (unlikely(!dentry)) ++ goto out_unlock; ++ AuDebugOn(au_test_anon(dentry)); ++ parent = dget_parent(dentry); ++ dput(dentry); ++ if (unlikely(!parent)) ++ goto out_unlock; ++ dir = parent->d_inode; ++ } + -+ err = -ENOMEM; -+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr), -+ GFP_NOFS); -+ if (p) { -+ p->fd_nent = nbr; -+ finfo->fi_hdir = p; -+ err = 0; ++ ii_read_lock_parent(dir); ++ h_dir = au_h_iptr(dir, bindex); ++ ii_read_unlock(dir); ++ if (unlikely(!h_dir)) ++ goto out_parent; ++ h_parent = d_find_alias(h_dir); ++ if (unlikely(!h_parent)) ++ goto out_hparent; ++ ++ err = -EPERM; ++ br = au_sbr(sb, bindex); ++ h_sb = au_br_sb(br); ++ if (unlikely(!h_sb->s_export_op)) { ++ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb)); ++ goto out_hparent; + } + ++ fh[Fh_br_id] = br->br_id; ++ fh[Fh_sigen] = au_sigen(sb); ++ encode_ino(fh + Fh_ino, inode->i_ino); ++ encode_ino(fh + Fh_dir_ino, dir->i_ino); ++ fh[Fh_igen] = inode->i_generation; ++ ++ *max_len -= Fh_tail; ++ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail), ++ max_len, ++ /*connectable or subtreecheck*/0); ++ err = fh[Fh_h_type]; ++ *max_len += Fh_tail; ++ /* todo: macros? */ ++ if (err != FILEID_INVALID) ++ err = 99; ++ else ++ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb)); ++ ++out_hparent: ++ dput(h_parent); ++out_parent: ++ dput(parent); ++out_unlock: ++ ii_read_unlock(inode); ++ si_read_unlock(sb); ++out: ++ if (unlikely(err < 0)) ++ err = FILEID_INVALID; + return err; +} + +/* ---------------------------------------------------------------------- */ + -+void au_finfo_fin(struct file *file) ++static int aufs_commit_metadata(struct inode *inode) +{ -+ struct au_finfo *finfo; ++ int err; ++ aufs_bindex_t bindex; ++ struct super_block *sb; ++ struct inode *h_inode; ++ int (*f)(struct inode *inode); + -+ au_nfiles_dec(file->f_dentry->d_sb); ++ sb = inode->i_sb; ++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); ++ ii_write_lock_child(inode); ++ bindex = au_ibstart(inode); ++ AuDebugOn(bindex < 0); ++ h_inode = au_h_iptr(inode, bindex); + -+ finfo = au_fi(file); -+ AuDebugOn(finfo->fi_hdir); -+ AuRwDestroy(&finfo->fi_rwsem); -+ au_cache_free_finfo(finfo); -+} ++ f = h_inode->i_sb->s_export_op->commit_metadata; ++ if (f) ++ err = f(h_inode); ++ else { ++ struct writeback_control wbc = { ++ .sync_mode = WB_SYNC_ALL, ++ .nr_to_write = 0 /* metadata only */ ++ }; + -+void au_fi_init_once(void *_finfo) -+{ -+ struct au_finfo *finfo = _finfo; -+ static struct lock_class_key aufs_fi; ++ err = sync_inode(h_inode, &wbc); ++ } + -+ au_rw_init(&finfo->fi_rwsem); -+ au_rw_class(&finfo->fi_rwsem, &aufs_fi); ++ au_cpup_attr_timesizes(inode); ++ ii_write_unlock(inode); ++ si_read_unlock(sb); ++ return err; +} + -+int au_finfo_init(struct file *file, struct au_fidir *fidir) -+{ -+ int err; -+ struct au_finfo *finfo; -+ struct dentry *dentry; -+ -+ err = -ENOMEM; -+ dentry = file->f_dentry; -+ finfo = au_cache_alloc_finfo(); -+ if (unlikely(!finfo)) -+ goto out; ++/* ---------------------------------------------------------------------- */ + -+ err = 0; -+ au_nfiles_inc(dentry->d_sb); -+ /* verbose coding for lock class name */ -+ if (!fidir) -+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO); -+ else -+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO); -+ au_rw_write_lock(&finfo->fi_rwsem); -+ finfo->fi_btop = -1; -+ finfo->fi_hdir = fidir; -+ atomic_set(&finfo->fi_generation, au_digen(dentry)); -+ /* smp_mb(); */ /* atomic_set */ ++static struct export_operations aufs_export_op = { ++ .fh_to_dentry = aufs_fh_to_dentry, ++ /* .fh_to_parent = aufs_fh_to_parent, */ ++ .encode_fh = aufs_encode_fh, ++ .commit_metadata = aufs_commit_metadata ++}; + -+ file->private_data = finfo; ++void au_export_init(struct super_block *sb) ++{ ++ struct au_sbinfo *sbinfo; ++ __u32 u; + -+out: -+ return err; ++ sb->s_export_op = &aufs_export_op; ++ sbinfo = au_sbi(sb); ++ sbinfo->si_xigen = NULL; ++ get_random_bytes(&u, sizeof(u)); ++ BUILD_BUG_ON(sizeof(u) != sizeof(int)); ++ atomic_set(&sbinfo->si_xigen_next, u); +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/f_op.c 2013-02-19 11:26:12.675790341 -0500 -@@ -0,0 +1,723 @@ ++++ linux-3.9/fs/aufs/f_op.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,720 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -10721,7 +10163,7 @@ + /* todo: necessary? */ + /* file->f_ra = h_file->f_ra; */ + /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); ++ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); + fput(h_file); + +out: @@ -10788,7 +10230,7 @@ + err = vfsub_write_u(h_file, buf, count, ppos); + ii_write_lock_child(inode); + au_cpup_attr_timesizes(inode); -+ inode->i_mode = h_file->f_dentry->d_inode->i_mode; ++ inode->i_mode = file_inode(h_file)->i_mode; + ii_write_unlock(inode); + fput(h_file); + @@ -10856,7 +10298,7 @@ + /* todo: necessary? */ + /* file->f_ra = h_file->f_ra; */ + /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); ++ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); + fput(h_file); + +out: @@ -10901,7 +10343,7 @@ + err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos); + ii_write_lock_child(inode); + au_cpup_attr_timesizes(inode); -+ inode->i_mode = h_file->f_dentry->d_inode->i_mode; ++ inode->i_mode = file_inode(h_file)->i_mode; + ii_write_unlock(inode); + fput(h_file); + @@ -10944,7 +10386,7 @@ + /* todo: necessasry? */ + /* file->f_ra = h_file->f_ra; */ + /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); ++ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); + fput(h_file); + +out: @@ -10989,7 +10431,7 @@ + err = vfsub_splice_from(pipe, h_file, ppos, len, flags); + ii_write_lock_child(inode); + au_cpup_attr_timesizes(inode); -+ inode->i_mode = h_file->f_dentry->d_inode->i_mode; ++ inode->i_mode = file_inode(h_file)->i_mode; + ii_write_unlock(inode); + fput(h_file); + @@ -11105,8 +10547,7 @@ + + au_vm_prfile_set(vma, file); + /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(file->f_dentry->d_inode, -+ h_file->f_dentry->d_inode); ++ fsstack_copy_attr_atime(file_inode(file), file_inode(h_file)); + goto out_fput; /* success */ + +out_reset: @@ -11201,11 +10642,9 @@ + err = -ENOSYS; + h_file = au_hf_top(file); + if (h_file->f_op && h_file->f_op->aio_fsync) { -+ struct dentry *h_d; + struct mutex *h_mtx; + -+ h_d = h_file->f_dentry; -+ h_mtx = &h_d->d_inode->i_mutex; ++ h_mtx = &file_inode(h_file)->i_mutex; + if (!is_sync_kiocb(kio)) { + get_file(h_file); + fput(file); @@ -11300,7 +10739,7 @@ +#endif +}; --- /dev/null -+++ linux-3.x-rcN/fs/aufs/f_op_sp.c 2013-02-19 11:26:12.675790341 -0500 ++++ linux-3.9/fs/aufs/f_op_sp.c 2013-05-13 17:13:17.213798992 +0200 @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -11500,7 +10939,7 @@ + err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb), + AuPin_MNT_WRITE); + if (!err) { -+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME); ++ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME, &pin); + au_unpin(&pin); + } + @@ -11533,12 +10972,12 @@ + + sb = dentry->d_sb; + h_file = au_hf_top(file); -+ h_inode = h_file->f_dentry->d_inode; ++ h_inode = file_inode(h_file); + di_read_unlock(dentry, AuLock_IR); + fi_write_unlock(file); + si_read_unlock(sb); + /* open this fifo in aufs */ -+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file); ++ err = h_inode->i_fop->open(file_inode(file), file); + si_noflush_read_lock(sb); + fi_write_lock(file); + di_read_lock_child(dentry, AuLock_IR); @@ -11561,529 +11000,736 @@ + return err; +} + -+/* ---------------------------------------------------------------------- */ ++/* ---------------------------------------------------------------------- */ ++ ++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev) ++{ ++ init_special_inode(inode, mode, rdev); ++ ++ switch (mode & S_IFMT) { ++ case S_IFIFO: ++ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop; ++ /*FALLTHROUGH*/ ++ case S_IFCHR: ++ case S_IFBLK: ++ case S_IFSOCK: ++ break; ++ default: ++ AuDebugOn(1); ++ } ++} ++ ++int au_special_file(umode_t mode) ++{ ++ int ret; ++ ++ ret = 0; ++ switch (mode & S_IFMT) { ++ case S_IFIFO: ++#if 0 ++ case S_IFCHR: ++ case S_IFBLK: ++ case S_IFSOCK: ++#endif ++ ret = 1; ++ } ++ ++ return ret; ++} +--- /dev/null ++++ linux-3.9/fs/aufs/file.c 2013-05-13 17:13:17.213798992 +0200 +@@ -0,0 +1,688 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * handling file/dir, and address_space operation ++ */ ++ ++#ifdef CONFIG_AUFS_DEBUG ++#include ++#endif ++#include ++#include "aufs.h" ++ ++/* drop flags for writing */ ++unsigned int au_file_roflags(unsigned int flags) ++{ ++ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); ++ flags |= O_RDONLY | O_NOATIME; ++ return flags; ++} ++ ++/* common functions to regular file and dir */ ++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, ++ struct file *file) ++{ ++ struct file *h_file; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ struct super_block *sb; ++ struct au_branch *br; ++ struct path h_path; ++ int err, exec_flag; ++ ++ /* a race condition can happen between open and unlink/rmdir */ ++ h_file = ERR_PTR(-ENOENT); ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (au_test_nfsd() && !h_dentry) ++ goto out; ++ h_inode = h_dentry->d_inode; ++ if (au_test_nfsd() && !h_inode) ++ goto out; ++ spin_lock(&h_dentry->d_lock); ++ err = (!d_unhashed(dentry) && d_unlinked(h_dentry)) ++ || !h_inode ++ /* || !dentry->d_inode->i_nlink */ ++ ; ++ spin_unlock(&h_dentry->d_lock); ++ if (unlikely(err)) ++ goto out; ++ ++ sb = dentry->d_sb; ++ br = au_sbr(sb, bindex); ++ h_file = ERR_PTR(-EACCES); ++ exec_flag = flags & __FMODE_EXEC; ++ if (exec_flag && (au_br_mnt(br)->mnt_flags & MNT_NOEXEC)) ++ goto out; ++ ++ /* drop flags for writing */ ++ if (au_test_ro(sb, bindex, dentry->d_inode)) ++ flags = au_file_roflags(flags); ++ flags &= ~O_CREAT; ++ atomic_inc(&br->br_count); ++ h_path.dentry = h_dentry; ++ h_path.mnt = au_br_mnt(br); ++ if (!au_special_file(h_inode->i_mode)) ++ h_file = vfsub_dentry_open(&h_path, flags); ++ else { ++ /* this block depends upon the configuration */ ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ si_read_unlock(sb); ++ h_file = vfsub_dentry_open(&h_path, flags); ++ si_noflush_read_lock(sb); ++ fi_write_lock(file); ++ di_read_lock_child(dentry, AuLock_IR); ++ } ++ if (IS_ERR(h_file)) ++ goto out_br; ++ ++ if (exec_flag) { ++ err = deny_write_access(h_file); ++ if (unlikely(err)) { ++ fput(h_file); ++ h_file = ERR_PTR(err); ++ goto out_br; ++ } ++ } ++ fsnotify_open(h_file); ++ goto out; /* success */ ++ ++out_br: ++ atomic_dec(&br->br_count); ++out: ++ return h_file; ++} + -+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev) ++int au_do_open(struct file *file, int (*open)(struct file *file, int flags), ++ struct au_fidir *fidir) +{ -+ init_special_inode(inode, mode, rdev); ++ int err; ++ struct dentry *dentry; + -+ switch (mode & S_IFMT) { -+ case S_IFIFO: -+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop; -+ /*FALLTHROUGH*/ -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFSOCK: -+ break; -+ default: -+ AuDebugOn(1); -+ } -+} ++ err = au_finfo_init(file, fidir); ++ if (unlikely(err)) ++ goto out; + -+int au_special_file(umode_t mode) -+{ -+ int ret; ++ dentry = file->f_dentry; ++ di_read_lock_child(dentry, AuLock_IR); ++ err = open(file, vfsub_file_flags(file)); ++ di_read_unlock(dentry, AuLock_IR); + -+ ret = 0; -+ switch (mode & S_IFMT) { -+ case S_IFIFO: -+#if 0 -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFSOCK: -+#endif -+ ret = 1; ++ fi_write_unlock(file); ++ if (unlikely(err)) { ++ au_fi(file)->fi_hdir = NULL; ++ au_finfo_fin(file); + } + -+ return ret; ++out: ++ return err; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/fstype.h 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,481 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ + -+/* -+ * judging filesystem type -+ */ ++int au_reopen_nondir(struct file *file) ++{ ++ int err; ++ aufs_bindex_t bstart; ++ struct dentry *dentry; ++ struct file *h_file, *h_file_tmp; + -+#ifndef __AUFS_FSTYPE_H__ -+#define __AUFS_FSTYPE_H__ ++ dentry = file->f_dentry; ++ AuDebugOn(au_special_file(dentry->d_inode->i_mode)); ++ bstart = au_dbstart(dentry); ++ h_file_tmp = NULL; ++ if (au_fbstart(file) == bstart) { ++ h_file = au_hf_top(file); ++ if (file->f_mode == h_file->f_mode) ++ return 0; /* success */ ++ h_file_tmp = h_file; ++ get_file(h_file_tmp); ++ au_set_h_fptr(file, bstart, NULL); ++ } ++ AuDebugOn(au_fi(file)->fi_hdir); ++ /* ++ * it can happen ++ * file exists on both of rw and ro ++ * open --> dbstart and fbstart are both 0 ++ * prepend a branch as rw, "rw" become ro ++ * remove rw/file ++ * delete the top branch, "rw" becomes rw again ++ * --> dbstart is 1, fbstart is still 0 ++ * write --> fbstart is 0 but dbstart is 1 ++ */ ++ /* AuDebugOn(au_fbstart(file) < bstart); */ + -+#ifdef __KERNEL__ ++ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC, ++ file); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) { ++ if (h_file_tmp) { ++ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count); ++ au_set_h_fptr(file, bstart, h_file_tmp); ++ h_file_tmp = NULL; ++ } ++ goto out; /* todo: close all? */ ++ } + -+#include -+#include -+#include ++ err = 0; ++ au_set_fbstart(file, bstart); ++ au_set_h_fptr(file, bstart, h_file); ++ au_update_figen(file); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ + -+static inline int au_test_aufs(struct super_block *sb) -+{ -+ return sb->s_magic == AUFS_SUPER_MAGIC; ++out: ++ if (h_file_tmp) ++ fput(h_file_tmp); ++ return err; +} + -+static inline const char *au_sbtype(struct super_block *sb) -+{ -+ return sb->s_type->name; -+} ++/* ---------------------------------------------------------------------- */ + -+static inline int au_test_iso9660(struct super_block *sb __maybe_unused) ++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt, ++ struct dentry *hi_wh) +{ -+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE) -+ return sb->s_magic == ROMFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ aufs_bindex_t bstart; ++ struct au_dinfo *dinfo; ++ struct dentry *h_dentry; ++ struct au_hdentry *hdp; + -+static inline int au_test_romfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE) -+ return sb->s_magic == ISOFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ dinfo = au_di(file->f_dentry); ++ AuRwMustWriteLock(&dinfo->di_rwsem); + -+static inline int au_test_cramfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE) -+ return sb->s_magic == CRAMFS_MAGIC; -+#endif -+ return 0; -+} ++ bstart = dinfo->di_bstart; ++ dinfo->di_bstart = btgt; ++ hdp = dinfo->di_hdentry; ++ h_dentry = hdp[0 + btgt].hd_dentry; ++ hdp[0 + btgt].hd_dentry = hi_wh; ++ err = au_reopen_nondir(file); ++ hdp[0 + btgt].hd_dentry = h_dentry; ++ dinfo->di_bstart = bstart; + -+static inline int au_test_nfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) -+ return sb->s_magic == NFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif ++ return err; +} + -+static inline int au_test_fuse(struct super_block *sb __maybe_unused) ++static int au_ready_to_write_wh(struct file *file, loff_t len, ++ aufs_bindex_t bcpup, struct au_pin *pin) +{ -+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE) -+ return sb->s_magic == FUSE_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ struct inode *inode, *h_inode; ++ struct dentry *dentry, *h_dentry, *hi_wh; + -+static inline int au_test_xfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE) -+ return sb->s_magic == XFS_SB_MAGIC; -+#else -+ return 0; -+#endif -+} ++ dentry = file->f_dentry; ++ au_update_dbstart(dentry); ++ inode = dentry->d_inode; ++ h_inode = NULL; ++ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) { ++ h_dentry = au_h_dptr(dentry, bcpup); ++ if (h_dentry) ++ h_inode = h_dentry->d_inode; ++ } ++ hi_wh = au_hi_wh(inode, bcpup); ++ if (!hi_wh && !h_inode) ++ err = au_sio_cpup_wh(dentry, bcpup, len, file, pin); ++ else ++ /* already copied-up after unlink */ ++ err = au_reopen_wh(file, bcpup, hi_wh); + -+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_TMPFS -+ return sb->s_magic == TMPFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ if (!err ++ && inode->i_nlink > 1 ++ && au_opt_test(au_mntflags(dentry->d_sb), PLINK)) ++ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup)); + -+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "ecryptfs"); -+#else -+ return 0; -+#endif ++ return err; +} + -+static inline int au_test_smbfs(struct super_block *sb __maybe_unused) ++/* ++ * prepare the @file for writing. ++ */ ++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin) +{ -+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE) -+ return sb->s_magic == SMB_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ aufs_bindex_t bstart, bcpup, dbstart; ++ struct dentry *dentry, *parent, *h_dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *h_file; + -+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE) -+ return sb->s_magic == OCFS2_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ AuDebugOn(au_special_file(inode->i_mode)); ++ bstart = au_fbstart(file); ++ err = au_test_ro(sb, bstart, inode); ++ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) { ++ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0); ++ goto out; ++ } + -+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE) -+ return sb->s_magic == DLMFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ /* need to cpup or reopen */ ++ parent = dget_parent(dentry); ++ di_write_lock_parent(parent); ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ bcpup = err; ++ if (unlikely(err < 0)) ++ goto out_dgrade; ++ err = 0; + -+static inline int au_test_coda(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE) -+ return sb->s_magic == CODA_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) { ++ err = au_cpup_dirs(dentry, bcpup); ++ if (unlikely(err)) ++ goto out_dgrade; ++ } + -+static inline int au_test_v9fs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE) -+ return sb->s_magic == V9FS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out_dgrade; + -+static inline int au_test_ext4(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE) -+ return sb->s_magic == EXT4_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ h_dentry = au_hf_top(file)->f_dentry; ++ dbstart = au_dbstart(dentry); ++ if (dbstart <= bcpup) { ++ h_dentry = au_h_dptr(dentry, bcpup); ++ AuDebugOn(!h_dentry); ++ bstart = bcpup; ++ } + -+static inline int au_test_sysv(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "sysv"); -+#else -+ return 0; -+#endif -+} ++ if (dbstart <= bcpup /* just reopen */ ++ || !d_unhashed(dentry) /* copyup and reopen */ ++ ) { ++ h_file = au_h_open_pre(dentry, bstart); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ di_downgrade_lock(parent, AuLock_IR); ++ if (dbstart > bcpup) ++ err = au_sio_cpup_simple(dentry, bcpup, len, ++ AuCpup_DTIME, pin); ++ if (!err) ++ err = au_reopen_nondir(file); ++ au_h_open_post(dentry, bstart, h_file); ++ } ++ } else { /* copyup as wh and reopen */ ++ /* ++ * since writable hfsplus branch is not supported, ++ * h_open_pre/post() are unnecessary. ++ */ ++ err = au_ready_to_write_wh(file, len, bcpup, pin); ++ di_downgrade_lock(parent, AuLock_IR); ++ } + -+static inline int au_test_ramfs(struct super_block *sb) -+{ -+ return sb->s_magic == RAMFS_MAGIC; -+} ++ if (!err) { ++ au_pin_set_parent_lflag(pin, /*lflag*/0); ++ goto out_dput; /* success */ ++ } ++ au_unpin(pin); ++ goto out_unlock; + -+static inline int au_test_ubifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE) -+ return sb->s_magic == UBIFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif ++out_dgrade: ++ di_downgrade_lock(parent, AuLock_IR); ++out_unlock: ++ di_read_unlock(parent, AuLock_IR); ++out_dput: ++ dput(parent); ++out: ++ return err; +} + -+static inline int au_test_procfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_PROC_FS -+ return sb->s_magic == PROC_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++/* ---------------------------------------------------------------------- */ + -+static inline int au_test_sysfs(struct super_block *sb __maybe_unused) ++int au_do_flush(struct file *file, fl_owner_t id, ++ int (*flush)(struct file *file, fl_owner_t id)) +{ -+#ifdef CONFIG_SYSFS -+ return sb->s_magic == SYSFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ struct super_block *sb; ++ struct inode *inode; + -+static inline int au_test_configfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE) -+ return sb->s_magic == CONFIGFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ inode = file_inode(file); ++ sb = inode->i_sb; ++ si_noflush_read_lock(sb); ++ fi_read_lock(file); ++ ii_read_lock_child(inode); + -+static inline int au_test_minix(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE) -+ return sb->s_magic == MINIX3_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC2 -+ || sb->s_magic == MINIX_SUPER_MAGIC -+ || sb->s_magic == MINIX_SUPER_MAGIC2; -+#else -+ return 0; -+#endif -+} ++ err = flush(file, id); ++ au_cpup_attr_timesizes(inode); + -+static inline int au_test_cifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE) -+ return sb->s_magic == CIFS_MAGIC_NUMBER; -+#else -+ return 0; -+#endif ++ ii_read_unlock(inode); ++ fi_read_unlock(file); ++ si_read_unlock(sb); ++ return err; +} + -+static inline int au_test_fat(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE) -+ return sb->s_magic == MSDOS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++/* ---------------------------------------------------------------------- */ + -+static inline int au_test_msdos(struct super_block *sb) ++static int au_file_refresh_by_inode(struct file *file, int *need_reopen) +{ -+ return au_test_fat(sb); -+} ++ int err; ++ aufs_bindex_t bstart; ++ struct au_pin pin; ++ struct au_finfo *finfo; ++ struct dentry *dentry, *parent, *hi_wh; ++ struct inode *inode; ++ struct super_block *sb; + -+static inline int au_test_vfat(struct super_block *sb) -+{ -+ return au_test_fat(sb); -+} ++ FiMustWriteLock(file); + -+static inline int au_test_securityfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_SECURITYFS -+ return sb->s_magic == SECURITYFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ err = 0; ++ finfo = au_fi(file); ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ bstart = au_ibstart(inode); ++ if (bstart == finfo->fi_btop || IS_ROOT(dentry)) ++ goto out; + -+static inline int au_test_squashfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE) -+ return sb->s_magic == SQUASHFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ parent = dget_parent(dentry); ++ if (au_test_ro(sb, bstart, inode)) { ++ di_read_lock_parent(parent, !AuLock_IR); ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ bstart = err; ++ di_read_unlock(parent, !AuLock_IR); ++ if (unlikely(err < 0)) ++ goto out_parent; ++ err = 0; ++ } + -+static inline int au_test_btrfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE) -+ return sb->s_magic == BTRFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ di_read_lock_parent(parent, AuLock_IR); ++ hi_wh = au_hi_wh(inode, bstart); ++ if (!S_ISDIR(inode->i_mode) ++ && au_opt_test(au_mntflags(sb), PLINK) ++ && au_plink_test(inode) ++ && !d_unhashed(dentry) ++ && bstart < au_dbstart(dentry)) { ++ err = au_test_and_cpup_dirs(dentry, bstart); ++ if (unlikely(err)) ++ goto out_unlock; + -+static inline int au_test_xenfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE) -+ return sb->s_magic == XENFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ /* always superio. */ ++ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (!err) ++ err = au_sio_cpup_simple(dentry, bstart, -1, ++ AuCpup_DTIME, &pin); ++ au_unpin(&pin); ++ } else if (hi_wh) { ++ /* already copied-up after unlink */ ++ err = au_reopen_wh(file, bstart, hi_wh); ++ *need_reopen = 0; ++ } + -+static inline int au_test_debugfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_DEBUG_FS -+ return sb->s_magic == DEBUGFS_MAGIC; -+#else -+ return 0; -+#endif ++out_unlock: ++ di_read_unlock(parent, AuLock_IR); ++out_parent: ++ dput(parent); ++out: ++ return err; +} + -+static inline int au_test_nilfs(struct super_block *sb __maybe_unused) ++static void au_do_refresh_dir(struct file *file) +{ -+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE) -+ return sb->s_magic == NILFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ aufs_bindex_t bindex, bend, new_bindex, brid; ++ struct au_hfile *p, tmp, *q; ++ struct au_finfo *finfo; ++ struct super_block *sb; ++ struct au_fidir *fidir; + -+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE) -+ return sb->s_magic == HFSPLUS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ FiMustWriteLock(file); + -+/* ---------------------------------------------------------------------- */ -+/* -+ * they can't be an aufs branch. -+ */ -+static inline int au_test_fs_unsuppoted(struct super_block *sb) -+{ -+ return -+#ifndef CONFIG_AUFS_BR_RAMFS -+ au_test_ramfs(sb) || -+#endif -+ au_test_procfs(sb) -+ || au_test_sysfs(sb) -+ || au_test_configfs(sb) -+ || au_test_debugfs(sb) -+ || au_test_securityfs(sb) -+ || au_test_xenfs(sb) -+ || au_test_ecryptfs(sb) -+ /* || !strcmp(au_sbtype(sb), "unionfs") */ -+ || au_test_aufs(sb); /* will be supported in next version */ -+} ++ sb = file->f_dentry->d_sb; ++ finfo = au_fi(file); ++ fidir = finfo->fi_hdir; ++ AuDebugOn(!fidir); ++ p = fidir->fd_hfile + finfo->fi_btop; ++ brid = p->hf_br->br_id; ++ bend = fidir->fd_bbot; ++ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) { ++ if (!p->hf_file) ++ continue; + -+static inline int au_test_fs_remote(struct super_block *sb) -+{ -+ return !au_test_tmpfs(sb) -+#ifdef CONFIG_AUFS_BR_RAMFS -+ && !au_test_ramfs(sb) -+#endif -+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV); -+} ++ new_bindex = au_br_index(sb, p->hf_br->br_id); ++ if (new_bindex == bindex) ++ continue; ++ if (new_bindex < 0) { ++ au_set_h_fptr(file, bindex, NULL); ++ continue; ++ } + -+/* ---------------------------------------------------------------------- */ ++ /* swap two lower inode, and loop again */ ++ q = fidir->fd_hfile + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hf_file) { ++ bindex--; ++ p--; ++ } ++ } + -+/* -+ * Note: these functions (below) are created after reading ->getattr() in all -+ * filesystems under linux/fs. it means we have to do so in every update... -+ */ ++ p = fidir->fd_hfile; ++ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) { ++ bend = au_sbend(sb); ++ for (finfo->fi_btop = 0; finfo->fi_btop <= bend; ++ finfo->fi_btop++, p++) ++ if (p->hf_file) { ++ if (file_inode(p->hf_file)) ++ break; ++ else ++ au_hfput(p, file); ++ } ++ } else { ++ bend = au_br_index(sb, brid); ++ for (finfo->fi_btop = 0; finfo->fi_btop < bend; ++ finfo->fi_btop++, p++) ++ if (p->hf_file) ++ au_hfput(p, file); ++ bend = au_sbend(sb); ++ } + -+/* -+ * some filesystems require getattr to refresh the inode attributes before -+ * referencing. -+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs) -+ * and leave the work for d_revalidate() -+ */ -+static inline int au_test_fs_refresh_iattr(struct super_block *sb) -+{ -+ return au_test_nfs(sb) -+ || au_test_fuse(sb) -+ /* || au_test_smbfs(sb) */ /* untested */ -+ /* || au_test_ocfs2(sb) */ /* untested */ -+ /* || au_test_btrfs(sb) */ /* untested */ -+ /* || au_test_coda(sb) */ /* untested */ -+ /* || au_test_v9fs(sb) */ /* untested */ -+ ; ++ p = fidir->fd_hfile + bend; ++ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop; ++ fidir->fd_bbot--, p--) ++ if (p->hf_file) { ++ if (file_inode(p->hf_file)) ++ break; ++ else ++ au_hfput(p, file); ++ } ++ AuDebugOn(fidir->fd_bbot < finfo->fi_btop); +} + +/* -+ * filesystems which don't maintain i_size or i_blocks. ++ * after branch manipulating, refresh the file. + */ -+static inline int au_test_fs_bad_iattr_size(struct super_block *sb) ++static int refresh_file(struct file *file, int (*reopen)(struct file *file)) +{ -+ return au_test_xfs(sb) -+ || au_test_btrfs(sb) -+ || au_test_ubifs(sb) -+ || au_test_hfsplus(sb) /* maintained, but incorrect */ -+ /* || au_test_ext4(sb) */ /* untested */ -+ /* || au_test_ocfs2(sb) */ /* untested */ -+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */ -+ /* || au_test_sysv(sb) */ /* untested */ -+ /* || au_test_minix(sb) */ /* untested */ -+ ; -+} ++ int err, need_reopen; ++ aufs_bindex_t bend, bindex; ++ struct dentry *dentry; ++ struct au_finfo *finfo; ++ struct au_hfile *hfile; + -+/* -+ * filesystems which don't store the correct value in some of their inode -+ * attributes. -+ */ -+static inline int au_test_fs_bad_iattr(struct super_block *sb) -+{ -+ return au_test_fs_bad_iattr_size(sb) -+ /* || au_test_cifs(sb) */ /* untested */ -+ || au_test_fat(sb) -+ || au_test_msdos(sb) -+ || au_test_vfat(sb); -+} ++ dentry = file->f_dentry; ++ finfo = au_fi(file); ++ if (!finfo->fi_hdir) { ++ hfile = &finfo->fi_htop; ++ AuDebugOn(!hfile->hf_file); ++ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id); ++ AuDebugOn(bindex < 0); ++ if (bindex != finfo->fi_btop) ++ au_set_fbstart(file, bindex); ++ } else { ++ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1); ++ if (unlikely(err)) ++ goto out; ++ au_do_refresh_dir(file); ++ } + -+/* they don't check i_nlink in link(2) */ -+static inline int au_test_fs_no_limit_nlink(struct super_block *sb) -+{ -+ return au_test_tmpfs(sb) -+#ifdef CONFIG_AUFS_BR_RAMFS -+ || au_test_ramfs(sb) -+#endif -+ || au_test_ubifs(sb) -+ || au_test_btrfs(sb) -+ || au_test_hfsplus(sb); ++ err = 0; ++ need_reopen = 1; ++ if (!au_test_mmapped(file)) ++ err = au_file_refresh_by_inode(file, &need_reopen); ++ if (!err && need_reopen && !d_unlinked(dentry)) ++ err = reopen(file); ++ if (!err) { ++ au_update_figen(file); ++ goto out; /* success */ ++ } ++ ++ /* error, close all lower files */ ++ if (finfo->fi_hdir) { ++ bend = au_fbend_dir(file); ++ for (bindex = au_fbstart(file); bindex <= bend; bindex++) ++ au_set_h_fptr(file, bindex, NULL); ++ } ++ ++out: ++ return err; +} + -+/* -+ * filesystems which sets S_NOATIME and S_NOCMTIME. -+ */ -+static inline int au_test_fs_notime(struct super_block *sb) ++/* common function to regular file and dir */ ++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), ++ int wlock) +{ -+ return au_test_nfs(sb) -+ || au_test_fuse(sb) -+ || au_test_ubifs(sb) -+ /* || au_test_cifs(sb) */ /* untested */ -+ ; -+} ++ int err; ++ unsigned int sigen, figen; ++ aufs_bindex_t bstart; ++ unsigned char pseudo_link; ++ struct dentry *dentry; ++ struct inode *inode; ++ ++ err = 0; ++ dentry = file->f_dentry; ++ inode = dentry->d_inode; ++ AuDebugOn(au_special_file(inode->i_mode)); ++ sigen = au_sigen(dentry->d_sb); ++ fi_write_lock(file); ++ figen = au_figen(file); ++ di_write_lock_child(dentry); ++ bstart = au_dbstart(dentry); ++ pseudo_link = (bstart != au_ibstart(inode)); ++ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) { ++ if (!wlock) { ++ di_downgrade_lock(dentry, AuLock_IR); ++ fi_downgrade_lock(file); ++ } ++ goto out; /* success */ ++ } + -+/* -+ * filesystems which requires replacing i_mapping. -+ */ -+static inline int au_test_fs_bad_mapping(struct super_block *sb) -+{ -+ return au_test_fuse(sb) -+ || au_test_ubifs(sb); -+} ++ AuDbg("sigen %d, figen %d\n", sigen, figen); ++ if (au_digen_test(dentry, sigen)) { ++ err = au_reval_dpath(dentry, sigen); ++ AuDebugOn(!err && au_digen_test(dentry, sigen)); ++ } + -+/* temporary support for i#1 in cramfs */ -+static inline int au_test_fs_unique_ino(struct inode *inode) -+{ -+ if (au_test_cramfs(inode->i_sb)) -+ return inode->i_ino != 1; -+ return 1; ++ if (!err) ++ err = refresh_file(file, reopen); ++ if (!err) { ++ if (!wlock) { ++ di_downgrade_lock(dentry, AuLock_IR); ++ fi_downgrade_lock(file); ++ } ++ } else { ++ di_write_unlock(dentry); ++ fi_write_unlock(file); ++ } ++ ++out: ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+/* -+ * the filesystem where the xino files placed must support i/o after unlink and -+ * maintain i_size and i_blocks. -+ */ -+static inline int au_test_fs_bad_xino(struct super_block *sb) ++/* cf. aufs_nopage() */ ++/* for madvise(2) */ ++static int aufs_readpage(struct file *file __maybe_unused, struct page *page) +{ -+ return au_test_fs_remote(sb) -+ || au_test_fs_bad_iattr_size(sb) -+ /* don't want unnecessary work for xino */ -+ || au_test_aufs(sb) -+ || au_test_ecryptfs(sb) -+ || au_test_nilfs(sb); ++ unlock_page(page); ++ return 0; +} + -+static inline int au_test_fs_trunc_xino(struct super_block *sb) -+{ -+ return au_test_tmpfs(sb) -+ || au_test_ramfs(sb); -+} ++/* it will never be called, but necessary to support O_DIRECT */ ++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, ++ const struct iovec *iov, loff_t offset, ++ unsigned long nr_segs) ++{ BUG(); return 0; } + +/* -+ * test if the @sb is real-readonly. ++ * it will never be called, but madvise and fadvise behaves differently ++ * when get_xip_mem is defined + */ -+static inline int au_test_fs_rr(struct super_block *sb) -+{ -+ return au_test_squashfs(sb) -+ || au_test_iso9660(sb) -+ || au_test_cramfs(sb) -+ || au_test_romfs(sb); -+} ++static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, ++ int create, void **kmem, unsigned long *pfn) ++{ BUG(); return 0; } + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_FSTYPE_H__ */ ++/* they will never be called. */ ++#ifdef CONFIG_AUFS_DEBUG ++static int aufs_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata) ++{ AuUnsupport(); return 0; } ++static int aufs_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata) ++{ AuUnsupport(); return 0; } ++static int aufs_writepage(struct page *page, struct writeback_control *wbc) ++{ AuUnsupport(); return 0; } ++ ++static int aufs_set_page_dirty(struct page *page) ++{ AuUnsupport(); return 0; } ++static void aufs_invalidatepage(struct page *page, unsigned long offset) ++{ AuUnsupport(); } ++static int aufs_releasepage(struct page *page, gfp_t gfp) ++{ AuUnsupport(); return 0; } ++static int aufs_migratepage(struct address_space *mapping, struct page *newpage, ++ struct page *page, enum migrate_mode mode) ++{ AuUnsupport(); return 0; } ++static int aufs_launder_page(struct page *page) ++{ AuUnsupport(); return 0; } ++static int aufs_is_partially_uptodate(struct page *page, ++ read_descriptor_t *desc, ++ unsigned long from) ++{ AuUnsupport(); return 0; } ++static int aufs_error_remove_page(struct address_space *mapping, ++ struct page *page) ++{ AuUnsupport(); return 0; } ++static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file, ++ sector_t *span) ++{ AuUnsupport(); return 0; } ++static void aufs_swap_deactivate(struct file *file) ++{ AuUnsupport(); } ++#endif /* CONFIG_AUFS_DEBUG */ ++ ++const struct address_space_operations aufs_aop = { ++ .readpage = aufs_readpage, ++ .direct_IO = aufs_direct_IO, ++ .get_xip_mem = aufs_get_xip_mem, ++#ifdef CONFIG_AUFS_DEBUG ++ .writepage = aufs_writepage, ++ /* no writepages, because of writepage */ ++ .set_page_dirty = aufs_set_page_dirty, ++ /* no readpages, because of readpage */ ++ .write_begin = aufs_write_begin, ++ .write_end = aufs_write_end, ++ /* no bmap, no block device */ ++ .invalidatepage = aufs_invalidatepage, ++ .releasepage = aufs_releasepage, ++ .migratepage = aufs_migratepage, ++ .launder_page = aufs_launder_page, ++ .is_partially_uptodate = aufs_is_partially_uptodate, ++ .error_remove_page = aufs_error_remove_page, ++ .swap_activate = aufs_swap_activate, ++ .swap_deactivate = aufs_swap_deactivate ++#endif /* CONFIG_AUFS_DEBUG */ ++}; --- /dev/null -+++ linux-3.x-rcN/fs/aufs/hfsnotify.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,293 @@ ++++ linux-3.9/fs/aufs/file.h 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,298 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -12103,285 +11749,290 @@ + */ + +/* -+ * fsnotify for the lower directories ++ * file operations + */ + -+#include "aufs.h" ++#ifndef __AUFS_FILE_H__ ++#define __AUFS_FILE_H__ + -+/* FS_IN_IGNORED is unnecessary */ -+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE -+ | FS_CREATE | FS_EVENT_ON_CHILD); -+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq); -+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0); ++#ifdef __KERNEL__ + -+static void au_hfsn_free_mark(struct fsnotify_mark *mark) ++#include ++#include ++#include ++#include "rwsem.h" ++ ++struct au_branch; ++struct au_hfile { ++ struct file *hf_file; ++ struct au_branch *hf_br; ++}; ++ ++struct au_vdir; ++struct au_fidir { ++ aufs_bindex_t fd_bbot; ++ aufs_bindex_t fd_nent; ++ struct au_vdir *fd_vdir_cache; ++ struct au_hfile fd_hfile[]; ++}; ++ ++static inline int au_fidir_sz(int nent) +{ -+ struct au_hnotify *hn = container_of(mark, struct au_hnotify, -+ hn_mark); -+ AuDbg("here\n"); -+ au_cache_free_hnotify(hn); -+ smp_mb__before_atomic_dec(); -+ if (atomic64_dec_and_test(&au_hfsn_ifree)) -+ wake_up(&au_hfsn_wq); ++ AuDebugOn(nent < 0); ++ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent; +} + -+static int au_hfsn_alloc(struct au_hinode *hinode) ++struct au_finfo { ++ atomic_t fi_generation; ++ ++ struct au_rwsem fi_rwsem; ++ aufs_bindex_t fi_btop; ++ ++ /* do not union them */ ++ struct { /* for non-dir */ ++ struct au_hfile fi_htop; ++ atomic_t fi_mmapped; ++ }; ++ struct au_fidir *fi_hdir; /* for dir only */ ++} ____cacheline_aligned_in_smp; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* file.c */ ++extern const struct address_space_operations aufs_aop; ++unsigned int au_file_roflags(unsigned int flags); ++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, ++ struct file *file); ++int au_do_open(struct file *file, int (*open)(struct file *file, int flags), ++ struct au_fidir *fidir); ++int au_reopen_nondir(struct file *file); ++struct au_pin; ++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin); ++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), ++ int wlock); ++int au_do_flush(struct file *file, fl_owner_t id, ++ int (*flush)(struct file *file, fl_owner_t id)); ++ ++/* poll.c */ ++#ifdef CONFIG_AUFS_POLL ++unsigned int aufs_poll(struct file *file, poll_table *wait); ++#endif ++ ++#ifdef CONFIG_AUFS_BR_HFSPLUS ++/* hfsplus.c */ ++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex); ++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, ++ struct file *h_file); ++#else ++static inline ++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex) +{ -+ int err; -+ struct au_hnotify *hn; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct fsnotify_mark *mark; -+ aufs_bindex_t bindex; ++ return NULL; ++} + -+ hn = hinode->hi_notify; -+ sb = hn->hn_aufs_inode->i_sb; -+ bindex = au_br_index(sb, hinode->hi_id); -+ br = au_sbr(sb, bindex); -+ AuDebugOn(!br->br_hfsn); ++AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex, ++ struct file *h_file); ++#endif + -+ mark = &hn->hn_mark; -+ fsnotify_init_mark(mark, au_hfsn_free_mark); -+ mark->mask = AuHfsnMask; -+ /* -+ * by udba rename or rmdir, aufs assign a new inode to the known -+ * h_inode, so specify 1 to allow dups. -+ */ -+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode, -+ /*mnt*/NULL, /*allow_dups*/1); -+ /* even if err */ -+ fsnotify_put_mark(mark); ++/* f_op.c */ ++extern const struct file_operations aufs_file_fop; ++int au_do_open_nondir(struct file *file, int flags); ++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file); + -+ return err; ++#ifdef CONFIG_AUFS_SP_IATTR ++/* f_op_sp.c */ ++int au_special_file(umode_t mode); ++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev); ++#else ++AuStubInt0(au_special_file, umode_t mode) ++static inline void au_init_special_fop(struct inode *inode, umode_t mode, ++ dev_t rdev) ++{ ++ init_special_inode(inode, mode, rdev); +} ++#endif + -+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn) -+{ -+ struct fsnotify_mark *mark; -+ unsigned long long ull; -+ struct fsnotify_group *group; ++/* finfo.c */ ++void au_hfput(struct au_hfile *hf, struct file *file); ++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, ++ struct file *h_file); + -+ ull = atomic64_inc_return(&au_hfsn_ifree); -+ BUG_ON(!ull); ++void au_update_figen(struct file *file); ++struct au_fidir *au_fidir_alloc(struct super_block *sb); ++int au_fidir_realloc(struct au_finfo *finfo, int nbr); + -+ mark = &hn->hn_mark; -+ spin_lock(&mark->lock); -+ group = mark->group; -+ fsnotify_get_group(group); -+ spin_unlock(&mark->lock); -+ fsnotify_destroy_mark(mark, group); -+ fsnotify_put_group(group); ++void au_fi_init_once(void *_fi); ++void au_finfo_fin(struct file *file); ++int au_finfo_init(struct file *file, struct au_fidir *fidir); + -+ /* free hn by myself */ -+ return 0; -+} ++/* ioctl.c */ ++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg); ++#ifdef CONFIG_COMPAT ++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, ++ unsigned long arg); ++#endif + +/* ---------------------------------------------------------------------- */ + -+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set) ++static inline struct au_finfo *au_fi(struct file *file) +{ -+ struct fsnotify_mark *mark; -+ -+ mark = &hinode->hi_notify->hn_mark; -+ spin_lock(&mark->lock); -+ if (do_set) { -+ AuDebugOn(mark->mask & AuHfsnMask); -+ mark->mask |= AuHfsnMask; -+ } else { -+ AuDebugOn(!(mark->mask & AuHfsnMask)); -+ mark->mask &= ~AuHfsnMask; -+ } -+ spin_unlock(&mark->lock); -+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */ ++ return file->private_data; +} + +/* ---------------------------------------------------------------------- */ + -+/* #define AuDbgHnotify */ -+#ifdef AuDbgHnotify -+static char *au_hfsn_name(u32 mask) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+#define test_ret(flag) if (mask & flag) \ -+ return #flag; -+ test_ret(FS_ACCESS); -+ test_ret(FS_MODIFY); -+ test_ret(FS_ATTRIB); -+ test_ret(FS_CLOSE_WRITE); -+ test_ret(FS_CLOSE_NOWRITE); -+ test_ret(FS_OPEN); -+ test_ret(FS_MOVED_FROM); -+ test_ret(FS_MOVED_TO); -+ test_ret(FS_CREATE); -+ test_ret(FS_DELETE); -+ test_ret(FS_DELETE_SELF); -+ test_ret(FS_MOVE_SELF); -+ test_ret(FS_UNMOUNT); -+ test_ret(FS_Q_OVERFLOW); -+ test_ret(FS_IN_IGNORED); -+ test_ret(FS_IN_ISDIR); -+ test_ret(FS_IN_ONESHOT); -+ test_ret(FS_EVENT_ON_CHILD); -+ return ""; -+#undef test_ret -+#else -+ return "??"; -+#endif -+} -+#endif ++/* ++ * fi_read_lock, fi_write_lock, ++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock ++ */ ++AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem); ++ ++#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem) ++#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem) ++#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem) + +/* ---------------------------------------------------------------------- */ + -+static void au_hfsn_free_group(struct fsnotify_group *group) ++/* todo: hard/soft set? */ ++static inline aufs_bindex_t au_fbstart(struct file *file) +{ -+ struct au_br_hfsnotify *hfsn = group->private; -+ -+ AuDbg("here\n"); -+ kfree(hfsn); ++ FiMustAnyLock(file); ++ return au_fi(file)->fi_btop; +} + -+static int au_hfsn_handle_event(struct fsnotify_group *group, -+ struct fsnotify_mark *inode_mark, -+ struct fsnotify_mark *vfsmount_mark, -+ struct fsnotify_event *event) ++static inline aufs_bindex_t au_fbend_dir(struct file *file) +{ -+ int err; -+ struct au_hnotify *hnotify; -+ struct inode *h_dir, *h_inode; -+ __u32 mask; -+ struct qstr h_child_qstr = QSTR_INIT(event->file_name, event->name_len); -+ -+ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE); -+ -+ err = 0; -+ /* if FS_UNMOUNT happens, there must be another bug */ -+ mask = event->mask; -+ AuDebugOn(mask & FS_UNMOUNT); -+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT)) -+ goto out; -+ -+ h_dir = event->to_tell; -+ h_inode = event->inode; -+#ifdef AuDbgHnotify -+ au_debug(1); -+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1 -+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) { -+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n", -+ h_dir->i_ino, mask, au_hfsn_name(mask), -+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0); -+ /* WARN_ON(1); */ -+ } -+ au_debug(0); -+#endif ++ FiMustAnyLock(file); ++ AuDebugOn(!au_fi(file)->fi_hdir); ++ return au_fi(file)->fi_hdir->fd_bbot; ++} + -+ AuDebugOn(!inode_mark); -+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark); -+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode); ++static inline struct au_vdir *au_fvdir_cache(struct file *file) ++{ ++ FiMustAnyLock(file); ++ AuDebugOn(!au_fi(file)->fi_hdir); ++ return au_fi(file)->fi_hdir->fd_vdir_cache; ++} + -+out: -+ return err; ++static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex) ++{ ++ FiMustWriteLock(file); ++ au_fi(file)->fi_btop = bindex; +} + -+/* isn't it waste to ask every registered 'group'? */ -+/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */ -+/* it should be exported to modules */ -+static bool au_hfsn_should_send_event(struct fsnotify_group *group, -+ struct inode *h_inode, -+ struct fsnotify_mark *inode_mark, -+ struct fsnotify_mark *vfsmount_mark, -+ __u32 mask, void *data, int data_type) ++static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex) +{ -+ mask = (mask & ~FS_EVENT_ON_CHILD); -+ return inode_mark->mask & mask; ++ FiMustWriteLock(file); ++ AuDebugOn(!au_fi(file)->fi_hdir); ++ au_fi(file)->fi_hdir->fd_bbot = bindex; +} + -+static struct fsnotify_ops au_hfsn_ops = { -+ .should_send_event = au_hfsn_should_send_event, -+ .handle_event = au_hfsn_handle_event, -+ .free_group_priv = au_hfsn_free_group -+}; ++static inline void au_set_fvdir_cache(struct file *file, ++ struct au_vdir *vdir_cache) ++{ ++ FiMustWriteLock(file); ++ AuDebugOn(!au_fi(file)->fi_hdir); ++ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache; ++} + -+/* ---------------------------------------------------------------------- */ ++static inline struct file *au_hf_top(struct file *file) ++{ ++ FiMustAnyLock(file); ++ AuDebugOn(au_fi(file)->fi_hdir); ++ return au_fi(file)->fi_htop.hf_file; ++} + -+static void au_hfsn_fin_br(struct au_branch *br) ++static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex) +{ -+ struct au_br_hfsnotify *hfsn; ++ FiMustAnyLock(file); ++ AuDebugOn(!au_fi(file)->fi_hdir); ++ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file; ++} + -+ hfsn = br->br_hfsn; -+ if (hfsn) -+ fsnotify_put_group(hfsn->hfsn_group); ++/* todo: memory barrier? */ ++static inline unsigned int au_figen(struct file *f) ++{ ++ return atomic_read(&au_fi(f)->fi_generation); +} + -+static int au_hfsn_init_br(struct au_branch *br, int perm) ++static inline void au_set_mmapped(struct file *f) +{ -+ int err; -+ struct fsnotify_group *group; -+ struct au_br_hfsnotify *hfsn; ++ if (atomic_inc_return(&au_fi(f)->fi_mmapped)) ++ return; ++ pr_warn("fi_mmapped wrapped around\n"); ++ while (!atomic_inc_return(&au_fi(f)->fi_mmapped)) ++ ; ++} + -+ err = 0; -+ br->br_hfsn = NULL; -+ if (!au_br_hnotifyable(perm)) -+ goto out; ++static inline void au_unset_mmapped(struct file *f) ++{ ++ atomic_dec(&au_fi(f)->fi_mmapped); ++} + -+ err = -ENOMEM; -+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS); -+ if (unlikely(!hfsn)) -+ goto out; ++static inline int au_test_mmapped(struct file *f) ++{ ++ return atomic_read(&au_fi(f)->fi_mmapped); ++} + -+ err = 0; -+ group = fsnotify_alloc_group(&au_hfsn_ops); -+ if (IS_ERR(group)) { -+ err = PTR_ERR(group); -+ pr_err("fsnotify_alloc_group() failed, %d\n", err); -+ goto out_hfsn; -+ } ++/* customize vma->vm_file */ + -+ group->private = hfsn; -+ hfsn->hfsn_group = group; -+ br->br_hfsn = hfsn; -+ goto out; /* success */ ++static inline void au_do_vm_file_reset(struct vm_area_struct *vma, ++ struct file *file) ++{ ++ struct file *f; + -+out_hfsn: -+ kfree(hfsn); -+out: -+ return err; ++ f = vma->vm_file; ++ get_file(file); ++ vma->vm_file = file; ++ fput(f); +} + -+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm) ++#ifdef CONFIG_MMU ++#define AuDbgVmRegion(file, vma) do {} while (0) ++ ++static inline void au_vm_file_reset(struct vm_area_struct *vma, ++ struct file *file) +{ -+ int err; ++ au_do_vm_file_reset(vma, file); ++} ++#else ++#define AuDbgVmRegion(file, vma) \ ++ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file)) + -+ err = 0; -+ if (!br->br_hfsn) -+ err = au_hfsn_init_br(br, perm); ++static inline void au_vm_file_reset(struct vm_area_struct *vma, ++ struct file *file) ++{ ++ struct file *f; + -+ return err; ++ au_do_vm_file_reset(vma, file); ++ f = vma->vm_region->vm_file; ++ get_file(file); ++ vma->vm_region->vm_file = file; ++ fput(f); +} ++#endif /* CONFIG_MMU */ + -+/* ---------------------------------------------------------------------- */ -+ -+static void au_hfsn_fin(void) ++/* handle vma->vm_prfile */ ++static inline void au_vm_prfile_set(struct vm_area_struct *vma, ++ struct file *file) +{ -+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree)); -+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree)); ++#ifdef CONFIG_AUFS_PROC_MAP ++ get_file(file); ++ vma->vm_prfile = file; ++#ifndef CONFIG_MMU ++ get_file(file); ++ vma->vm_region->vm_prfile = file; ++#endif ++#endif +} + -+const struct au_hnotify_op au_hnotify_op = { -+ .ctl = au_hfsn_ctl, -+ .alloc = au_hfsn_alloc, -+ .free = au_hfsn_free, -+ -+ .fin = au_hfsn_fin, -+ -+ .reset_br = au_hfsn_reset_br, -+ .fin_br = au_hfsn_fin_br, -+ .init_br = au_hfsn_init_br -+}; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_FILE_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/hfsplus.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,57 @@ ++++ linux-3.9/fs/aufs/finfo.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,157 @@ +/* -+ * Copyright (C) 2010-2013 Junjiro R. Okajima ++ * Copyright (C) 2005-2013 Junjiro R. Okajima + * + * This program, aufs is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by @@ -12399,47 +12050,147 @@ + */ + +/* -+ * special support for filesystems which aqucires an inode mutex -+ * at final closing a file, eg, hfsplus. -+ * -+ * This trick is very simple and stupid, just to open the file before really -+ * neceeary open to tell hfsplus that this is not the final closing. -+ * The caller should call au_h_open_pre() after acquiring the inode mutex, -+ * and au_h_open_post() after releasing it. ++ * file private data + */ + +#include "aufs.h" + -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex) ++void au_hfput(struct au_hfile *hf, struct file *file) +{ -+ struct file *h_file; -+ struct dentry *h_dentry; ++ /* todo: direct access f_flags */ ++ if (vfsub_file_flags(file) & __FMODE_EXEC) ++ allow_write_access(hf->hf_file); ++ fput(hf->hf_file); ++ hf->hf_file = NULL; ++ atomic_dec(&hf->hf_br->br_count); ++ hf->hf_br = NULL; ++} + -+ h_dentry = au_h_dptr(dentry, bindex); -+ AuDebugOn(!h_dentry); -+ AuDebugOn(!h_dentry->d_inode); -+ IMustLock(h_dentry->d_inode); ++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) ++{ ++ struct au_finfo *finfo = au_fi(file); ++ struct au_hfile *hf; ++ struct au_fidir *fidir; + -+ h_file = NULL; -+ if (au_test_hfsplus(h_dentry->d_sb) -+ && S_ISREG(h_dentry->d_inode->i_mode)) -+ h_file = au_h_open(dentry, bindex, -+ O_RDONLY | O_NOATIME | O_LARGEFILE, -+ /*file*/NULL); -+ return h_file; ++ fidir = finfo->fi_hdir; ++ if (!fidir) { ++ AuDebugOn(finfo->fi_btop != bindex); ++ hf = &finfo->fi_htop; ++ } else ++ hf = fidir->fd_hfile + bindex; ++ ++ if (hf && hf->hf_file) ++ au_hfput(hf, file); ++ if (val) { ++ FiMustWriteLock(file); ++ hf->hf_file = val; ++ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex); ++ } +} + -+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file) ++void au_update_figen(struct file *file) +{ -+ if (h_file) { -+ fput(h_file); -+ au_sbr_put(dentry->d_sb, bindex); ++ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry)); ++ /* smp_mb(); */ /* atomic_set */ ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct au_fidir *au_fidir_alloc(struct super_block *sb) ++{ ++ struct au_fidir *fidir; ++ int nbr; ++ ++ nbr = au_sbend(sb) + 1; ++ if (nbr < 2) ++ nbr = 2; /* initial allocate for 2 branches */ ++ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS); ++ if (fidir) { ++ fidir->fd_bbot = -1; ++ fidir->fd_nent = nbr; ++ fidir->fd_vdir_cache = NULL; ++ } ++ ++ return fidir; ++} ++ ++int au_fidir_realloc(struct au_finfo *finfo, int nbr) ++{ ++ int err; ++ struct au_fidir *fidir, *p; ++ ++ AuRwMustWriteLock(&finfo->fi_rwsem); ++ fidir = finfo->fi_hdir; ++ AuDebugOn(!fidir); ++ ++ err = -ENOMEM; ++ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr), ++ GFP_NOFS); ++ if (p) { ++ p->fd_nent = nbr; ++ finfo->fi_hdir = p; ++ err = 0; + } ++ ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_finfo_fin(struct file *file) ++{ ++ struct au_finfo *finfo; ++ ++ au_nfiles_dec(file->f_dentry->d_sb); ++ ++ finfo = au_fi(file); ++ AuDebugOn(finfo->fi_hdir); ++ AuRwDestroy(&finfo->fi_rwsem); ++ au_cache_free_finfo(finfo); ++} ++ ++void au_fi_init_once(void *_finfo) ++{ ++ struct au_finfo *finfo = _finfo; ++ static struct lock_class_key aufs_fi; ++ ++ au_rw_init(&finfo->fi_rwsem); ++ au_rw_class(&finfo->fi_rwsem, &aufs_fi); ++} ++ ++int au_finfo_init(struct file *file, struct au_fidir *fidir) ++{ ++ int err; ++ struct au_finfo *finfo; ++ struct dentry *dentry; ++ ++ err = -ENOMEM; ++ dentry = file->f_dentry; ++ finfo = au_cache_alloc_finfo(); ++ if (unlikely(!finfo)) ++ goto out; ++ ++ err = 0; ++ au_nfiles_inc(dentry->d_sb); ++ /* verbose coding for lock class name */ ++ if (!fidir) ++ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO); ++ else ++ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO); ++ au_rw_write_lock(&finfo->fi_rwsem); ++ finfo->fi_btop = -1; ++ finfo->fi_hdir = fidir; ++ atomic_set(&finfo->fi_generation, au_digen(dentry)); ++ /* smp_mb(); */ /* atomic_set */ ++ ++ file->private_data = finfo; ++ ++out: ++ return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/hnotify.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,713 @@ ++++ linux-3.9/fs/aufs/fstype.h 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,480 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -12459,703 +12210,828 @@ + */ + +/* -+ * abstraction to notify the direct changes on lower directories ++ * judging filesystem type + */ + -+#include "aufs.h" ++#ifndef __AUFS_FSTYPE_H__ ++#define __AUFS_FSTYPE_H__ + -+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode) ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++ ++static inline int au_test_aufs(struct super_block *sb) +{ -+ int err; -+ struct au_hnotify *hn; ++ return sb->s_magic == AUFS_SUPER_MAGIC; ++} + -+ err = -ENOMEM; -+ hn = au_cache_alloc_hnotify(); -+ if (hn) { -+ hn->hn_aufs_inode = inode; -+ hinode->hi_notify = hn; -+ err = au_hnotify_op.alloc(hinode); -+ AuTraceErr(err); -+ if (unlikely(err)) { -+ hinode->hi_notify = NULL; -+ au_cache_free_hnotify(hn); -+ /* -+ * The upper dir was removed by udba, but the same named -+ * dir left. In this case, aufs assignes a new inode -+ * number and set the monitor again. -+ * For the lower dir, the old monitnor is still left. -+ */ -+ if (err == -EEXIST) -+ err = 0; -+ } -+ } ++static inline const char *au_sbtype(struct super_block *sb) ++{ ++ return sb->s_type->name; ++} + -+ AuTraceErr(err); -+ return err; ++static inline int au_test_iso9660(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE) ++ return sb->s_magic == ROMFS_MAGIC; ++#else ++ return 0; ++#endif +} + -+void au_hn_free(struct au_hinode *hinode) ++static inline int au_test_romfs(struct super_block *sb __maybe_unused) +{ -+ struct au_hnotify *hn; -+ -+ hn = hinode->hi_notify; -+ if (hn) { -+ hinode->hi_notify = NULL; -+ if (au_hnotify_op.free(hinode, hn)) -+ au_cache_free_hnotify(hn); -+ } ++#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE) ++ return sb->s_magic == ISOFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif +} + -+/* ---------------------------------------------------------------------- */ -+ -+void au_hn_ctl(struct au_hinode *hinode, int do_set) ++static inline int au_test_cramfs(struct super_block *sb __maybe_unused) +{ -+ if (hinode->hi_notify) -+ au_hnotify_op.ctl(hinode, do_set); ++#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE) ++ return sb->s_magic == CRAMFS_MAGIC; ++#endif ++ return 0; +} + -+void au_hn_reset(struct inode *inode, unsigned int flags) ++static inline int au_test_nfs(struct super_block *sb __maybe_unused) +{ -+ aufs_bindex_t bindex, bend; -+ struct inode *hi; -+ struct dentry *iwhdentry; ++#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) ++ return sb->s_magic == NFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { -+ hi = au_h_iptr(inode, bindex); -+ if (!hi) -+ continue; ++static inline int au_test_fuse(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE) ++ return sb->s_magic == FUSE_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */ -+ iwhdentry = au_hi_wh(inode, bindex); -+ if (iwhdentry) -+ dget(iwhdentry); -+ au_igrab(hi); -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ au_set_h_iptr(inode, bindex, au_igrab(hi), -+ flags & ~AuHi_XINO); -+ iput(hi); -+ dput(iwhdentry); -+ /* mutex_unlock(&hi->i_mutex); */ -+ } ++static inline int au_test_xfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE) ++ return sb->s_magic == XFS_SB_MAGIC; ++#else ++ return 0; ++#endif +} + -+/* ---------------------------------------------------------------------- */ ++static inline int au_test_tmpfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_TMPFS ++ return sb->s_magic == TMPFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+static int hn_xino(struct inode *inode, struct inode *h_inode) ++static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused) +{ -+ int err; -+ aufs_bindex_t bindex, bend, bfound, bstart; -+ struct inode *h_i; ++#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE) ++ return !strcmp(au_sbtype(sb), "ecryptfs"); ++#else ++ return 0; ++#endif ++} + -+ err = 0; -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("branch root dir was changed\n"); -+ goto out; -+ } ++static inline int au_test_smbfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE) ++ return sb->s_magic == SMB_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ bfound = -1; -+ bend = au_ibend(inode); -+ bstart = au_ibstart(inode); -+#if 0 /* reserved for future use */ -+ if (bindex == bend) { -+ /* keep this ino in rename case */ -+ goto out; -+ } ++static inline int au_test_ocfs2(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE) ++ return sb->s_magic == OCFS2_SUPER_MAGIC; ++#else ++ return 0; +#endif -+ for (bindex = bstart; bindex <= bend; bindex++) -+ if (au_h_iptr(inode, bindex) == h_inode) { -+ bfound = bindex; -+ break; -+ } -+ if (bfound < 0) -+ goto out; ++} + -+ for (bindex = bstart; bindex <= bend; bindex++) { -+ h_i = au_h_iptr(inode, bindex); -+ if (!h_i) -+ continue; ++static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE) ++ return sb->s_magic == DLMFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0); -+ /* ignore this error */ -+ /* bad action? */ -+ } ++static inline int au_test_coda(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE) ++ return sb->s_magic == CODA_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ /* children inode number will be broken */ ++static inline int au_test_v9fs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE) ++ return sb->s_magic == V9FS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+out: -+ AuTraceErr(err); -+ return err; ++static inline int au_test_ext4(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE) ++ return sb->s_magic == EXT4_SUPER_MAGIC; ++#else ++ return 0; ++#endif +} + -+static int hn_gen_tree(struct dentry *dentry) ++static inline int au_test_sysv(struct super_block *sb __maybe_unused) +{ -+ int err, i, j, ndentry; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries; ++#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE) ++ return !strcmp(au_sbtype(sb), "sysv"); ++#else ++ return 0; ++#endif ++} + -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; ++static inline int au_test_ramfs(struct super_block *sb) ++{ ++ return sb->s_magic == RAMFS_MAGIC; ++} + -+ for (i = 0; i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ ndentry = dpage->ndentry; -+ for (j = 0; j < ndentry; j++) { -+ struct dentry *d; ++static inline int au_test_ubifs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE) ++ return sb->s_magic == UBIFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ d = dentries[j]; -+ if (IS_ROOT(d)) -+ continue; ++static inline int au_test_procfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_PROC_FS ++ return sb->s_magic == PROC_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ au_digen_dec(d); -+ if (d->d_inode) -+ /* todo: reset children xino? -+ cached children only? */ -+ au_iigen_dec(d->d_inode); -+ } -+ } ++static inline int au_test_sysfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_SYSFS ++ return sb->s_magic == SYSFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+out_dpages: -+ au_dpages_free(&dpages); ++static inline int au_test_configfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE) ++ return sb->s_magic == CONFIGFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+#if 0 -+ /* discard children */ -+ dentry_unhash(dentry); -+ dput(dentry); ++static inline int au_test_minix(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE) ++ return sb->s_magic == MINIX3_SUPER_MAGIC ++ || sb->s_magic == MINIX2_SUPER_MAGIC ++ || sb->s_magic == MINIX2_SUPER_MAGIC2 ++ || sb->s_magic == MINIX_SUPER_MAGIC ++ || sb->s_magic == MINIX_SUPER_MAGIC2; ++#else ++ return 0; +#endif -+out: -+ return err; +} + -+/* -+ * return 0 if processed. -+ */ -+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode, -+ const unsigned int isdir) ++static inline int au_test_cifs(struct super_block *sb __maybe_unused) +{ -+ int err; -+ struct dentry *d; -+ struct qstr *dname; -+ struct hlist_node *p; -+ -+ err = 1; -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("branch root dir was changed\n"); -+ err = 0; -+ goto out; -+ } -+ -+ if (!isdir) { -+ AuDebugOn(!name); -+ au_iigen_dec(inode); -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, p, &inode->i_dentry, d_alias) { -+ spin_lock(&d->d_lock); -+ dname = &d->d_name; -+ if (dname->len != nlen -+ && memcmp(dname->name, name, nlen)) { -+ spin_unlock(&d->d_lock); -+ continue; -+ } -+ err = 0; -+ au_digen_dec(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } -+ spin_unlock(&inode->i_lock); -+ } else { -+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR); -+ d = d_find_alias(inode); -+ if (!d) { -+ au_iigen_dec(inode); -+ goto out; -+ } -+ -+ spin_lock(&d->d_lock); -+ dname = &d->d_name; -+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) { -+ spin_unlock(&d->d_lock); -+ err = hn_gen_tree(d); -+ spin_lock(&d->d_lock); -+ } -+ spin_unlock(&d->d_lock); -+ dput(d); -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; ++#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE) ++ return sb->s_magic == CIFS_MAGIC_NUMBER; ++#else ++ return 0; ++#endif +} + -+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir) ++static inline int au_test_fat(struct super_block *sb __maybe_unused) +{ -+ int err; -+ struct inode *inode; ++#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE) ++ return sb->s_magic == MSDOS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ inode = dentry->d_inode; -+ if (IS_ROOT(dentry) -+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */ -+ ) { -+ pr_warn("branch root dir was changed\n"); -+ return 0; -+ } ++static inline int au_test_msdos(struct super_block *sb) ++{ ++ return au_test_fat(sb); ++} + -+ err = 0; -+ if (!isdir) { -+ au_digen_dec(dentry); -+ if (inode) -+ au_iigen_dec(inode); -+ } else { -+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR); -+ if (inode) -+ err = hn_gen_tree(dentry); -+ } ++static inline int au_test_vfat(struct super_block *sb) ++{ ++ return au_test_fat(sb); ++} + -+ AuTraceErr(err); -+ return err; ++static inline int au_test_securityfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_SECURITYFS ++ return sb->s_magic == SECURITYFS_MAGIC; ++#else ++ return 0; ++#endif +} + -+/* ---------------------------------------------------------------------- */ ++static inline int au_test_squashfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE) ++ return sb->s_magic == SQUASHFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+/* hnotify job flags */ -+#define AuHnJob_XINO0 1 -+#define AuHnJob_GEN (1 << 1) -+#define AuHnJob_DIRENT (1 << 2) -+#define AuHnJob_ISDIR (1 << 3) -+#define AuHnJob_TRYXINO0 (1 << 4) -+#define AuHnJob_MNTPNT (1 << 5) -+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name) -+#define au_fset_hnjob(flags, name) \ -+ do { (flags) |= AuHnJob_##name; } while (0) -+#define au_fclr_hnjob(flags, name) \ -+ do { (flags) &= ~AuHnJob_##name; } while (0) ++static inline int au_test_btrfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE) ++ return sb->s_magic == BTRFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+enum { -+ AuHn_CHILD, -+ AuHn_PARENT, -+ AuHnLast -+}; ++static inline int au_test_xenfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE) ++ return sb->s_magic == XENFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+struct au_hnotify_args { -+ struct inode *h_dir, *dir, *h_child_inode; -+ u32 mask; -+ unsigned int flags[AuHnLast]; -+ unsigned int h_child_nlen; -+ char h_child_name[]; -+}; ++static inline int au_test_debugfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_DEBUG_FS ++ return sb->s_magic == DEBUGFS_MAGIC; ++#else ++ return 0; ++#endif ++} + -+struct hn_job_args { -+ unsigned int flags; -+ struct inode *inode, *h_inode, *dir, *h_dir; -+ struct dentry *dentry; -+ char *h_name; -+ int h_nlen; -+}; ++static inline int au_test_nilfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE) ++ return sb->s_magic == NILFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+static int hn_job(struct hn_job_args *a) ++static inline int au_test_hfsplus(struct super_block *sb __maybe_unused) +{ -+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR); ++#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE) ++ return sb->s_magic == HFSPLUS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} + -+ /* reset xino */ -+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode) -+ hn_xino(a->inode, a->h_inode); /* ignore this error */ ++/* ---------------------------------------------------------------------- */ ++/* ++ * they can't be an aufs branch. ++ */ ++static inline int au_test_fs_unsuppoted(struct super_block *sb) ++{ ++ return ++#ifndef CONFIG_AUFS_BR_RAMFS ++ au_test_ramfs(sb) || ++#endif ++ au_test_procfs(sb) ++ || au_test_sysfs(sb) ++ || au_test_configfs(sb) ++ || au_test_debugfs(sb) ++ || au_test_securityfs(sb) ++ || au_test_xenfs(sb) ++ || au_test_ecryptfs(sb) ++ /* || !strcmp(au_sbtype(sb), "unionfs") */ ++ || au_test_aufs(sb); /* will be supported in next version */ ++} + -+ if (au_ftest_hnjob(a->flags, TRYXINO0) -+ && a->inode -+ && a->h_inode) { -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ if (!a->h_inode->i_nlink) -+ hn_xino(a->inode, a->h_inode); /* ignore this error */ -+ mutex_unlock(&a->h_inode->i_mutex); -+ } ++static inline int au_test_fs_remote(struct super_block *sb) ++{ ++ return !au_test_tmpfs(sb) ++#ifdef CONFIG_AUFS_BR_RAMFS ++ && !au_test_ramfs(sb) ++#endif ++ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV); ++} + -+ /* make the generation obsolete */ -+ if (au_ftest_hnjob(a->flags, GEN)) { -+ int err = -1; -+ if (a->inode) -+ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode, -+ isdir); -+ if (err && a->dentry) -+ hn_gen_by_name(a->dentry, isdir); -+ /* ignore this error */ -+ } ++/* ---------------------------------------------------------------------- */ + -+ /* make dir entries obsolete */ -+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) { -+ struct au_vdir *vdir; ++/* ++ * Note: these functions (below) are created after reading ->getattr() in all ++ * filesystems under linux/fs. it means we have to do so in every update... ++ */ + -+ vdir = au_ivdir(a->inode); -+ if (vdir) -+ vdir->vd_jiffy = 0; -+ /* IMustLock(a->inode); */ -+ /* a->inode->i_version++; */ -+ } ++/* ++ * some filesystems require getattr to refresh the inode attributes before ++ * referencing. ++ * in most cases, we can rely on the inode attribute in NFS (or every remote fs) ++ * and leave the work for d_revalidate() ++ */ ++static inline int au_test_fs_refresh_iattr(struct super_block *sb) ++{ ++ return au_test_nfs(sb) ++ || au_test_fuse(sb) ++ /* || au_test_smbfs(sb) */ /* untested */ ++ /* || au_test_ocfs2(sb) */ /* untested */ ++ /* || au_test_btrfs(sb) */ /* untested */ ++ /* || au_test_coda(sb) */ /* untested */ ++ /* || au_test_v9fs(sb) */ /* untested */ ++ ; ++} + -+ /* can do nothing but warn */ -+ if (au_ftest_hnjob(a->flags, MNTPNT) -+ && a->dentry -+ && d_mountpoint(a->dentry)) -+ pr_warn("mount-point %.*s is removed or renamed\n", -+ AuDLNPair(a->dentry)); ++/* ++ * filesystems which don't maintain i_size or i_blocks. ++ */ ++static inline int au_test_fs_bad_iattr_size(struct super_block *sb) ++{ ++ return au_test_xfs(sb) ++ || au_test_btrfs(sb) ++ || au_test_ubifs(sb) ++ || au_test_hfsplus(sb) /* maintained, but incorrect */ ++ /* || au_test_ext4(sb) */ /* untested */ ++ /* || au_test_ocfs2(sb) */ /* untested */ ++ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */ ++ /* || au_test_sysv(sb) */ /* untested */ ++ /* || au_test_minix(sb) */ /* untested */ ++ ; ++} + -+ return 0; ++/* ++ * filesystems which don't store the correct value in some of their inode ++ * attributes. ++ */ ++static inline int au_test_fs_bad_iattr(struct super_block *sb) ++{ ++ return au_test_fs_bad_iattr_size(sb) ++ /* || au_test_cifs(sb) */ /* untested */ ++ || au_test_fat(sb) ++ || au_test_msdos(sb) ++ || au_test_vfat(sb); +} + -+/* ---------------------------------------------------------------------- */ ++/* they don't check i_nlink in link(2) */ ++static inline int au_test_fs_no_limit_nlink(struct super_block *sb) ++{ ++ return au_test_tmpfs(sb) ++#ifdef CONFIG_AUFS_BR_RAMFS ++ || au_test_ramfs(sb) ++#endif ++ || au_test_ubifs(sb) ++ || au_test_hfsplus(sb); ++} + -+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen, -+ struct inode *dir) ++/* ++ * filesystems which sets S_NOATIME and S_NOCMTIME. ++ */ ++static inline int au_test_fs_notime(struct super_block *sb) +{ -+ struct dentry *dentry, *d, *parent; -+ struct qstr *dname; ++ return au_test_nfs(sb) ++ || au_test_fuse(sb) ++ || au_test_ubifs(sb) ++ /* || au_test_cifs(sb) */ /* untested */ ++ ; ++} + -+ parent = d_find_alias(dir); -+ if (!parent) -+ return NULL; ++/* ++ * filesystems which requires replacing i_mapping. ++ */ ++static inline int au_test_fs_bad_mapping(struct super_block *sb) ++{ ++ return au_test_fuse(sb) ++ || au_test_ubifs(sb); ++} + -+ dentry = NULL; -+ spin_lock(&parent->d_lock); -+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) { -+ /* AuDbg("%.*s\n", AuDLNPair(d)); */ -+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); -+ dname = &d->d_name; -+ if (dname->len != nlen || memcmp(dname->name, name, nlen)) -+ goto cont_unlock; -+ if (au_di(d)) -+ au_digen_dec(d); -+ else -+ goto cont_unlock; -+ if (d->d_count) { -+ dentry = dget_dlock(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } ++/* temporary support for i#1 in cramfs */ ++static inline int au_test_fs_unique_ino(struct inode *inode) ++{ ++ if (au_test_cramfs(inode->i_sb)) ++ return inode->i_ino != 1; ++ return 1; ++} + -+ cont_unlock: -+ spin_unlock(&d->d_lock); -+ } -+ spin_unlock(&parent->d_lock); -+ dput(parent); ++/* ---------------------------------------------------------------------- */ + -+ if (dentry) -+ di_write_lock_child(dentry); ++/* ++ * the filesystem where the xino files placed must support i/o after unlink and ++ * maintain i_size and i_blocks. ++ */ ++static inline int au_test_fs_bad_xino(struct super_block *sb) ++{ ++ return au_test_fs_remote(sb) ++ || au_test_fs_bad_iattr_size(sb) ++ /* don't want unnecessary work for xino */ ++ || au_test_aufs(sb) ++ || au_test_ecryptfs(sb) ++ || au_test_nilfs(sb); ++} + -+ return dentry; ++static inline int au_test_fs_trunc_xino(struct super_block *sb) ++{ ++ return au_test_tmpfs(sb) ++ || au_test_ramfs(sb); +} + -+static struct inode *lookup_wlock_by_ino(struct super_block *sb, -+ aufs_bindex_t bindex, ino_t h_ino) ++/* ++ * test if the @sb is real-readonly. ++ */ ++static inline int au_test_fs_rr(struct super_block *sb) +{ -+ struct inode *inode; -+ ino_t ino; -+ int err; ++ return au_test_squashfs(sb) ++ || au_test_iso9660(sb) ++ || au_test_cramfs(sb) ++ || au_test_romfs(sb); ++} + -+ inode = NULL; -+ err = au_xino_read(sb, bindex, h_ino, &ino); -+ if (!err && ino) -+ inode = ilookup(sb, ino); -+ if (!inode) -+ goto out; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_FSTYPE_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/hfsnotify.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,296 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("wrong root branch\n"); -+ iput(inode); -+ inode = NULL; -+ goto out; -+ } ++/* ++ * fsnotify for the lower directories ++ */ + -+ ii_write_lock_child(inode); ++#include "aufs.h" + -+out: -+ return inode; ++/* FS_IN_IGNORED is unnecessary */ ++static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE ++ | FS_CREATE | FS_EVENT_ON_CHILD); ++static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq); ++static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0); ++ ++static void au_hfsn_free_mark(struct fsnotify_mark *mark) ++{ ++ struct au_hnotify *hn = container_of(mark, struct au_hnotify, ++ hn_mark); ++ AuDbg("here\n"); ++ au_cache_free_hnotify(hn); ++ smp_mb__before_atomic_dec(); ++ if (atomic64_dec_and_test(&au_hfsn_ifree)) ++ wake_up(&au_hfsn_wq); +} + -+static void au_hn_bh(void *_args) ++static int au_hfsn_alloc(struct au_hinode *hinode) +{ -+ struct au_hnotify_args *a = _args; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend, bfound; -+ unsigned char xino, try_iput; + int err; -+ struct inode *inode; -+ ino_t h_ino; -+ struct hn_job_args args; -+ struct dentry *dentry; -+ struct au_sbinfo *sbinfo; -+ -+ AuDebugOn(!_args); -+ AuDebugOn(!a->h_dir); -+ AuDebugOn(!a->dir); -+ AuDebugOn(!a->mask); -+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n", -+ a->mask, a->dir->i_ino, a->h_dir->i_ino, -+ a->h_child_inode ? a->h_child_inode->i_ino : 0); ++ struct au_hnotify *hn; ++ struct super_block *sb; ++ struct au_branch *br; ++ struct fsnotify_mark *mark; ++ aufs_bindex_t bindex; + -+ inode = NULL; -+ dentry = NULL; -+ /* -+ * do not lock a->dir->i_mutex here -+ * because of d_revalidate() may cause a deadlock. -+ */ -+ sb = a->dir->i_sb; -+ AuDebugOn(!sb); -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!sbinfo); -+ si_write_lock(sb, AuLock_NOPLMW); ++ hn = hinode->hi_notify; ++ sb = hn->hn_aufs_inode->i_sb; ++ bindex = au_br_index(sb, hinode->hi_id); ++ br = au_sbr(sb, bindex); ++ AuDebugOn(!br->br_hfsn); + -+ ii_read_lock_parent(a->dir); -+ bfound = -1; -+ bend = au_ibend(a->dir); -+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++) -+ if (au_h_iptr(a->dir, bindex) == a->h_dir) { -+ bfound = bindex; -+ break; -+ } -+ ii_read_unlock(a->dir); -+ if (unlikely(bfound < 0)) -+ goto out; ++ mark = &hn->hn_mark; ++ fsnotify_init_mark(mark, au_hfsn_free_mark); ++ mark->mask = AuHfsnMask; ++ /* ++ * by udba rename or rmdir, aufs assign a new inode to the known ++ * h_inode, so specify 1 to allow dups. ++ */ ++ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode, ++ /*mnt*/NULL, /*allow_dups*/1); ++ /* even if err */ ++ fsnotify_put_mark(mark); + -+ xino = !!au_opt_test(au_mntflags(sb), XINO); -+ h_ino = 0; -+ if (a->h_child_inode) -+ h_ino = a->h_child_inode->i_ino; ++ return err; ++} + -+ if (a->h_child_nlen -+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT))) -+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen, -+ a->dir); -+ try_iput = 0; -+ if (dentry) -+ inode = dentry->d_inode; -+ if (xino && !inode && h_ino -+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) { -+ inode = lookup_wlock_by_ino(sb, bfound, h_ino); -+ try_iput = 1; -+ } ++static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn) ++{ ++ struct fsnotify_mark *mark; ++ unsigned long long ull; ++ struct fsnotify_group *group; + -+ args.flags = a->flags[AuHn_CHILD]; -+ args.dentry = dentry; -+ args.inode = inode; -+ args.h_inode = a->h_child_inode; -+ args.dir = a->dir; -+ args.h_dir = a->h_dir; -+ args.h_name = a->h_child_name; -+ args.h_nlen = a->h_child_nlen; -+ err = hn_job(&args); -+ if (dentry) { -+ if (au_di(dentry)) -+ di_write_unlock(dentry); -+ dput(dentry); -+ } -+ if (inode && try_iput) { -+ ii_write_unlock(inode); -+ iput(inode); -+ } ++ ull = atomic64_inc_return(&au_hfsn_ifree); ++ BUG_ON(!ull); + -+ ii_write_lock_parent(a->dir); -+ args.flags = a->flags[AuHn_PARENT]; -+ args.dentry = NULL; -+ args.inode = a->dir; -+ args.h_inode = a->h_dir; -+ args.dir = NULL; -+ args.h_dir = NULL; -+ args.h_name = NULL; -+ args.h_nlen = 0; -+ err = hn_job(&args); -+ ii_write_unlock(a->dir); ++ mark = &hn->hn_mark; ++ spin_lock(&mark->lock); ++ group = mark->group; ++ fsnotify_get_group(group); ++ spin_unlock(&mark->lock); ++ fsnotify_destroy_mark(mark, group); ++ fsnotify_put_group(group); + -+out: -+ iput(a->h_child_inode); -+ iput(a->h_dir); -+ iput(a->dir); -+ si_write_unlock(sb); -+ au_nwt_done(&sbinfo->si_nowait); -+ kfree(a); ++ /* free hn by myself */ ++ return 0; +} + +/* ---------------------------------------------------------------------- */ + -+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, -+ struct qstr *h_child_qstr, struct inode *h_child_inode) ++static void au_hfsn_ctl(struct au_hinode *hinode, int do_set) +{ -+ int err, len; -+ unsigned int flags[AuHnLast], f; -+ unsigned char isdir, isroot, wh; -+ struct inode *dir; -+ struct au_hnotify_args *args; -+ char *p, *h_child_name; -+ -+ err = 0; -+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode); -+ dir = igrab(hnotify->hn_aufs_inode); -+ if (!dir) -+ goto out; ++ struct fsnotify_mark *mark; + -+ isroot = (dir->i_ino == AUFS_ROOT_INO); -+ wh = 0; -+ h_child_name = (void *)h_child_qstr->name; -+ len = h_child_qstr->len; -+ if (h_child_name) { -+ if (len > AUFS_WH_PFX_LEN -+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ h_child_name += AUFS_WH_PFX_LEN; -+ len -= AUFS_WH_PFX_LEN; -+ wh = 1; -+ } ++ mark = &hinode->hi_notify->hn_mark; ++ spin_lock(&mark->lock); ++ if (do_set) { ++ AuDebugOn(mark->mask & AuHfsnMask); ++ mark->mask |= AuHfsnMask; ++ } else { ++ AuDebugOn(!(mark->mask & AuHfsnMask)); ++ mark->mask &= ~AuHfsnMask; + } ++ spin_unlock(&mark->lock); ++ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */ ++} + -+ isdir = 0; -+ if (h_child_inode) -+ isdir = !!S_ISDIR(h_child_inode->i_mode); -+ flags[AuHn_PARENT] = AuHnJob_ISDIR; -+ flags[AuHn_CHILD] = 0; -+ if (isdir) -+ flags[AuHn_CHILD] = AuHnJob_ISDIR; -+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT); -+ au_fset_hnjob(flags[AuHn_CHILD], GEN); -+ switch (mask & FS_EVENTS_POSS_ON_CHILD) { -+ case FS_MOVED_FROM: -+ case FS_MOVED_TO: -+ au_fset_hnjob(flags[AuHn_CHILD], XINO0); -+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); -+ /*FALLTHROUGH*/ -+ case FS_CREATE: -+ AuDebugOn(!h_child_name || !h_child_inode); -+ break; ++/* ---------------------------------------------------------------------- */ + -+ case FS_DELETE: -+ /* -+ * aufs never be able to get this child inode. -+ * revalidation should be in d_revalidate() -+ * by checking i_nlink, i_generation or d_unhashed(). -+ */ -+ AuDebugOn(!h_child_name); -+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0); -+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); -+ break; ++/* #define AuDbgHnotify */ ++#ifdef AuDbgHnotify ++static char *au_hfsn_name(u32 mask) ++{ ++#ifdef CONFIG_AUFS_DEBUG ++#define test_ret(flag) \ ++ do { \ ++ if (mask & flag) \ ++ return #flag; \ ++ } while (0) ++ test_ret(FS_ACCESS); ++ test_ret(FS_MODIFY); ++ test_ret(FS_ATTRIB); ++ test_ret(FS_CLOSE_WRITE); ++ test_ret(FS_CLOSE_NOWRITE); ++ test_ret(FS_OPEN); ++ test_ret(FS_MOVED_FROM); ++ test_ret(FS_MOVED_TO); ++ test_ret(FS_CREATE); ++ test_ret(FS_DELETE); ++ test_ret(FS_DELETE_SELF); ++ test_ret(FS_MOVE_SELF); ++ test_ret(FS_UNMOUNT); ++ test_ret(FS_Q_OVERFLOW); ++ test_ret(FS_IN_IGNORED); ++ test_ret(FS_IN_ISDIR); ++ test_ret(FS_IN_ONESHOT); ++ test_ret(FS_EVENT_ON_CHILD); ++ return ""; ++#undef test_ret ++#else ++ return "??"; ++#endif ++} ++#endif + -+ default: -+ AuDebugOn(1); -+ } ++/* ---------------------------------------------------------------------- */ + -+ if (wh) -+ h_child_inode = NULL; ++static void au_hfsn_free_group(struct fsnotify_group *group) ++{ ++ struct au_br_hfsnotify *hfsn = group->private; + -+ err = -ENOMEM; -+ /* iput() and kfree() will be called in au_hnotify() */ -+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS); -+ if (unlikely(!args)) { -+ AuErr1("no memory\n"); -+ iput(dir); ++ AuDbg("here\n"); ++ kfree(hfsn); ++} ++ ++static int au_hfsn_handle_event(struct fsnotify_group *group, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ struct fsnotify_event *event) ++{ ++ int err; ++ struct au_hnotify *hnotify; ++ struct inode *h_dir, *h_inode; ++ __u32 mask; ++ struct qstr h_child_qstr = QSTR_INIT(event->file_name, event->name_len); ++ ++ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE); ++ ++ err = 0; ++ /* if FS_UNMOUNT happens, there must be another bug */ ++ mask = event->mask; ++ AuDebugOn(mask & FS_UNMOUNT); ++ if (mask & (FS_IN_IGNORED | FS_UNMOUNT)) + goto out; -+ } -+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT]; -+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD]; -+ args->mask = mask; -+ args->dir = dir; -+ args->h_dir = igrab(h_dir); -+ if (h_child_inode) -+ h_child_inode = igrab(h_child_inode); /* can be NULL */ -+ args->h_child_inode = h_child_inode; -+ args->h_child_nlen = len; -+ if (len) { -+ p = (void *)args; -+ p += sizeof(*args); -+ memcpy(p, h_child_name, len); -+ p[len] = 0; -+ } + -+ f = 0; -+ if (!dir->i_nlink) -+ f = AuWkq_NEST; -+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f); -+ if (unlikely(err)) { -+ pr_err("wkq %d\n", err); -+ iput(args->h_child_inode); -+ iput(args->h_dir); -+ iput(args->dir); -+ kfree(args); ++ h_dir = event->to_tell; ++ h_inode = event->inode; ++#ifdef AuDbgHnotify ++ au_debug(1); ++ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1 ++ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) { ++ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n", ++ h_dir->i_ino, mask, au_hfsn_name(mask), ++ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0); ++ /* WARN_ON(1); */ + } ++ au_debug(0); ++#endif ++ ++ AuDebugOn(!inode_mark); ++ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark); ++ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode); + +out: + return err; +} + ++/* isn't it waste to ask every registered 'group'? */ ++/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */ ++/* it should be exported to modules */ ++static bool au_hfsn_should_send_event(struct fsnotify_group *group, ++ struct inode *h_inode, ++ struct fsnotify_mark *inode_mark, ++ struct fsnotify_mark *vfsmount_mark, ++ __u32 mask, void *data, int data_type) ++{ ++ mask = (mask & ~FS_EVENT_ON_CHILD); ++ return inode_mark->mask & mask; ++} ++ ++static struct fsnotify_ops au_hfsn_ops = { ++ .should_send_event = au_hfsn_should_send_event, ++ .handle_event = au_hfsn_handle_event, ++ .free_group_priv = au_hfsn_free_group ++}; ++ +/* ---------------------------------------------------------------------- */ + -+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm) ++static void au_hfsn_fin_br(struct au_branch *br) ++{ ++ struct au_br_hfsnotify *hfsn; ++ ++ hfsn = br->br_hfsn; ++ if (hfsn) ++ fsnotify_put_group(hfsn->hfsn_group); ++} ++ ++static int au_hfsn_init_br(struct au_branch *br, int perm) +{ + int err; ++ struct fsnotify_group *group; ++ struct au_br_hfsnotify *hfsn; + -+ AuDebugOn(!(udba & AuOptMask_UDBA)); ++ err = 0; ++ br->br_hfsn = NULL; ++ if (!au_br_hnotifyable(perm)) ++ goto out; ++ ++ err = -ENOMEM; ++ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS); ++ if (unlikely(!hfsn)) ++ goto out; + + err = 0; -+ if (au_hnotify_op.reset_br) -+ err = au_hnotify_op.reset_br(udba, br, perm); ++ group = fsnotify_alloc_group(&au_hfsn_ops); ++ if (IS_ERR(group)) { ++ err = PTR_ERR(group); ++ pr_err("fsnotify_alloc_group() failed, %d\n", err); ++ goto out_hfsn; ++ } ++ ++ group->private = hfsn; ++ hfsn->hfsn_group = group; ++ br->br_hfsn = hfsn; ++ goto out; /* success */ + ++out_hfsn: ++ kfree(hfsn); ++out: + return err; +} + -+int au_hnotify_init_br(struct au_branch *br, int perm) ++static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm) +{ + int err; + + err = 0; -+ if (au_hnotify_op.init_br) -+ err = au_hnotify_op.init_br(br, perm); ++ if (!br->br_hfsn) ++ err = au_hfsn_init_br(br, perm); + + return err; +} + -+void au_hnotify_fin_br(struct au_branch *br) -+{ -+ if (au_hnotify_op.fin_br) -+ au_hnotify_op.fin_br(br); -+} ++/* ---------------------------------------------------------------------- */ + -+static void au_hn_destroy_cache(void) ++static void au_hfsn_fin(void) +{ -+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]); -+ au_cachep[AuCache_HNOTIFY] = NULL; ++ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree)); ++ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree)); +} + -+int __init au_hnotify_init(void) ++const struct au_hnotify_op au_hnotify_op = { ++ .ctl = au_hfsn_ctl, ++ .alloc = au_hfsn_alloc, ++ .free = au_hfsn_free, ++ ++ .fin = au_hfsn_fin, ++ ++ .reset_br = au_hfsn_reset_br, ++ .fin_br = au_hfsn_fin_br, ++ .init_br = au_hfsn_init_br ++}; +--- /dev/null ++++ linux-3.9/fs/aufs/hfsplus.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (C) 2010-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * special support for filesystems which aqucires an inode mutex ++ * at final closing a file, eg, hfsplus. ++ * ++ * This trick is very simple and stupid, just to open the file before really ++ * neceeary open to tell hfsplus that this is not the final closing. ++ * The caller should call au_h_open_pre() after acquiring the inode mutex, ++ * and au_h_open_post() after releasing it. ++ */ ++ ++#include "aufs.h" ++ ++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex) +{ -+ int err; ++ struct file *h_file; ++ struct dentry *h_dentry; + -+ err = -ENOMEM; -+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify); -+ if (au_cachep[AuCache_HNOTIFY]) { -+ err = 0; -+ if (au_hnotify_op.init) -+ err = au_hnotify_op.init(); -+ if (unlikely(err)) -+ au_hn_destroy_cache(); -+ } -+ AuTraceErr(err); -+ return err; ++ h_dentry = au_h_dptr(dentry, bindex); ++ AuDebugOn(!h_dentry); ++ AuDebugOn(!h_dentry->d_inode); ++ ++ h_file = NULL; ++ if (au_test_hfsplus(h_dentry->d_sb) ++ && S_ISREG(h_dentry->d_inode->i_mode)) ++ h_file = au_h_open(dentry, bindex, ++ O_RDONLY | O_NOATIME | O_LARGEFILE, ++ /*file*/NULL); ++ return h_file; +} + -+void au_hnotify_fin(void) ++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, ++ struct file *h_file) +{ -+ if (au_hnotify_op.fin) -+ au_hnotify_op.fin(); -+ /* cf. au_cache_fin() */ -+ if (au_cachep[AuCache_HNOTIFY]) -+ au_hn_destroy_cache(); ++ if (h_file) { ++ fput(h_file); ++ au_sbr_put(dentry->d_sb, bindex); ++ } +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/iinfo.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,276 @@ ++++ linux-3.9/fs/aufs/hnotify.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,712 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -13175,1352 +13051,1812 @@ + */ + +/* -+ * inode private data ++ * abstraction to notify the direct changes on lower directories + */ + +#include "aufs.h" + -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) ++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode) +{ -+ struct inode *h_inode; ++ int err; ++ struct au_hnotify *hn; + -+ IiMustAnyLock(inode); ++ err = -ENOMEM; ++ hn = au_cache_alloc_hnotify(); ++ if (hn) { ++ hn->hn_aufs_inode = inode; ++ hinode->hi_notify = hn; ++ err = au_hnotify_op.alloc(hinode); ++ AuTraceErr(err); ++ if (unlikely(err)) { ++ hinode->hi_notify = NULL; ++ au_cache_free_hnotify(hn); ++ /* ++ * The upper dir was removed by udba, but the same named ++ * dir left. In this case, aufs assignes a new inode ++ * number and set the monitor again. ++ * For the lower dir, the old monitnor is still left. ++ */ ++ if (err == -EEXIST) ++ err = 0; ++ } ++ } ++ ++ AuTraceErr(err); ++ return err; ++} ++ ++void au_hn_free(struct au_hinode *hinode) ++{ ++ struct au_hnotify *hn; ++ ++ hn = hinode->hi_notify; ++ if (hn) { ++ hinode->hi_notify = NULL; ++ if (au_hnotify_op.free(hinode, hn)) ++ au_cache_free_hnotify(hn); ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_hn_ctl(struct au_hinode *hinode, int do_set) ++{ ++ if (hinode->hi_notify) ++ au_hnotify_op.ctl(hinode, do_set); ++} ++ ++void au_hn_reset(struct inode *inode, unsigned int flags) ++{ ++ aufs_bindex_t bindex, bend; ++ struct inode *hi; ++ struct dentry *iwhdentry; ++ ++ bend = au_ibend(inode); ++ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { ++ hi = au_h_iptr(inode, bindex); ++ if (!hi) ++ continue; ++ ++ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */ ++ iwhdentry = au_hi_wh(inode, bindex); ++ if (iwhdentry) ++ dget(iwhdentry); ++ au_igrab(hi); ++ au_set_h_iptr(inode, bindex, NULL, 0); ++ au_set_h_iptr(inode, bindex, au_igrab(hi), ++ flags & ~AuHi_XINO); ++ iput(hi); ++ dput(iwhdentry); ++ /* mutex_unlock(&hi->i_mutex); */ ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int hn_xino(struct inode *inode, struct inode *h_inode) ++{ ++ int err; ++ aufs_bindex_t bindex, bend, bfound, bstart; ++ struct inode *h_i; ++ ++ err = 0; ++ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { ++ pr_warn("branch root dir was changed\n"); ++ goto out; ++ } ++ ++ bfound = -1; ++ bend = au_ibend(inode); ++ bstart = au_ibstart(inode); ++#if 0 /* reserved for future use */ ++ if (bindex == bend) { ++ /* keep this ino in rename case */ ++ goto out; ++ } ++#endif ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (au_h_iptr(inode, bindex) == h_inode) { ++ bfound = bindex; ++ break; ++ } ++ if (bfound < 0) ++ goto out; + -+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ return h_inode; -+} ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ h_i = au_h_iptr(inode, bindex); ++ if (!h_i) ++ continue; + -+/* todo: hard/soft set? */ -+void au_hiput(struct au_hinode *hinode) -+{ -+ au_hn_free(hinode); -+ dput(hinode->hi_whdentry); -+ iput(hinode->hi_inode); -+} ++ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0); ++ /* ignore this error */ ++ /* bad action? */ ++ } + -+unsigned int au_hi_flags(struct inode *inode, int isdir) -+{ -+ unsigned int flags; -+ const unsigned int mnt_flags = au_mntflags(inode->i_sb); ++ /* children inode number will be broken */ + -+ flags = 0; -+ if (au_opt_test(mnt_flags, XINO)) -+ au_fset_hi(flags, XINO); -+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY)) -+ au_fset_hi(flags, HNOTIFY); -+ return flags; ++out: ++ AuTraceErr(err); ++ return err; +} + -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags) ++static int hn_gen_tree(struct dentry *dentry) +{ -+ struct au_hinode *hinode; -+ struct inode *hi; -+ struct au_iinfo *iinfo = au_ii(inode); -+ -+ IiMustWriteLock(inode); ++ int err, i, j, ndentry; ++ struct au_dcsub_pages dpages; ++ struct au_dpage *dpage; ++ struct dentry **dentries; + -+ hinode = iinfo->ii_hinode + bindex; -+ hi = hinode->hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); ++ err = au_dpages_init(&dpages, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; ++ err = au_dcsub_pages(&dpages, dentry, NULL, NULL); ++ if (unlikely(err)) ++ goto out_dpages; + -+ if (hi) -+ au_hiput(hinode); -+ hinode->hi_inode = h_inode; -+ if (h_inode) { -+ int err; -+ struct super_block *sb = inode->i_sb; -+ struct au_branch *br; ++ for (i = 0; i < dpages.ndpage; i++) { ++ dpage = dpages.dpages + i; ++ dentries = dpage->dentries; ++ ndentry = dpage->ndentry; ++ for (j = 0; j < ndentry; j++) { ++ struct dentry *d; + -+ AuDebugOn(inode->i_mode -+ && (h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT)); -+ if (bindex == iinfo->ii_bstart) -+ au_cpup_igen(inode, h_inode); -+ br = au_sbr(sb, bindex); -+ hinode->hi_id = br->br_id; -+ if (au_ftest_hi(flags, XINO)) { -+ err = au_xino_write(sb, bindex, h_inode->i_ino, -+ inode->i_ino); -+ if (unlikely(err)) -+ AuIOErr1("failed au_xino_write() %d\n", err); -+ } ++ d = dentries[j]; ++ if (IS_ROOT(d)) ++ continue; + -+ if (au_ftest_hi(flags, HNOTIFY) -+ && au_br_hnotifyable(br->br_perm)) { -+ err = au_hn_alloc(hinode, inode); -+ if (unlikely(err)) -+ AuIOErr1("au_hn_alloc() %d\n", err); ++ au_digen_dec(d); ++ if (d->d_inode) ++ /* todo: reset children xino? ++ cached children only? */ ++ au_iigen_dec(d->d_inode); + } + } ++ ++out_dpages: ++ au_dpages_free(&dpages); ++ ++#if 0 ++ /* discard children */ ++ dentry_unhash(dentry); ++ dput(dentry); ++#endif ++out: ++ return err; +} + -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh) ++/* ++ * return 0 if processed. ++ */ ++static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode, ++ const unsigned int isdir) +{ -+ struct au_hinode *hinode; ++ int err; ++ struct dentry *d; ++ struct qstr *dname; + -+ IiMustWriteLock(inode); ++ err = 1; ++ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { ++ pr_warn("branch root dir was changed\n"); ++ err = 0; ++ goto out; ++ } + -+ hinode = au_ii(inode)->ii_hinode + bindex; -+ AuDebugOn(hinode->hi_whdentry); -+ hinode->hi_whdentry = h_wh; -+} ++ if (!isdir) { ++ AuDebugOn(!name); ++ au_iigen_dec(inode); ++ spin_lock(&inode->i_lock); ++ hlist_for_each_entry(d, &inode->i_dentry, d_alias) { ++ spin_lock(&d->d_lock); ++ dname = &d->d_name; ++ if (dname->len != nlen ++ && memcmp(dname->name, name, nlen)) { ++ spin_unlock(&d->d_lock); ++ continue; ++ } ++ err = 0; ++ au_digen_dec(d); ++ spin_unlock(&d->d_lock); ++ break; ++ } ++ spin_unlock(&inode->i_lock); ++ } else { ++ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR); ++ d = d_find_alias(inode); ++ if (!d) { ++ au_iigen_dec(inode); ++ goto out; ++ } + -+void au_update_iigen(struct inode *inode, int half) -+{ -+ struct au_iinfo *iinfo; -+ struct au_iigen *iigen; -+ unsigned int sigen; ++ spin_lock(&d->d_lock); ++ dname = &d->d_name; ++ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) { ++ spin_unlock(&d->d_lock); ++ err = hn_gen_tree(d); ++ spin_lock(&d->d_lock); ++ } ++ spin_unlock(&d->d_lock); ++ dput(d); ++ } + -+ sigen = au_sigen(inode->i_sb); -+ iinfo = au_ii(inode); -+ iigen = &iinfo->ii_generation; -+ spin_lock(&iinfo->ii_genspin); -+ iigen->ig_generation = sigen; -+ if (half) -+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED); -+ else -+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED); -+ spin_unlock(&iinfo->ii_genspin); ++out: ++ AuTraceErr(err); ++ return err; +} + -+/* it may be called at remount time, too */ -+void au_update_ibrange(struct inode *inode, int do_put_zero) ++static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir) +{ -+ struct au_iinfo *iinfo; -+ aufs_bindex_t bindex, bend; -+ -+ iinfo = au_ii(inode); -+ if (!iinfo) -+ return; -+ -+ IiMustWriteLock(inode); ++ int err; ++ struct inode *inode; + -+ if (do_put_zero && iinfo->ii_bstart >= 0) { -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++) { -+ struct inode *h_i; ++ inode = dentry->d_inode; ++ if (IS_ROOT(dentry) ++ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */ ++ ) { ++ pr_warn("branch root dir was changed\n"); ++ return 0; ++ } + -+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; -+ if (h_i && !h_i->i_nlink) -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ } ++ err = 0; ++ if (!isdir) { ++ au_digen_dec(dentry); ++ if (inode) ++ au_iigen_dec(inode); ++ } else { ++ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR); ++ if (inode) ++ err = hn_gen_tree(dentry); + } + -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ bend = au_sbend(inode->i_sb); -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (iinfo->ii_hinode[0 + bindex].hi_inode) { -+ iinfo->ii_bstart = bindex; -+ break; -+ } -+ if (iinfo->ii_bstart >= 0) -+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--) -+ if (iinfo->ii_hinode[0 + bindex].hi_inode) { -+ iinfo->ii_bend = bindex; -+ break; -+ } -+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend); ++ AuTraceErr(err); ++ return err; +} + -+/* ---------------------------------------------------------------------- */ ++/* ---------------------------------------------------------------------- */ ++ ++/* hnotify job flags */ ++#define AuHnJob_XINO0 1 ++#define AuHnJob_GEN (1 << 1) ++#define AuHnJob_DIRENT (1 << 2) ++#define AuHnJob_ISDIR (1 << 3) ++#define AuHnJob_TRYXINO0 (1 << 4) ++#define AuHnJob_MNTPNT (1 << 5) ++#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name) ++#define au_fset_hnjob(flags, name) \ ++ do { (flags) |= AuHnJob_##name; } while (0) ++#define au_fclr_hnjob(flags, name) \ ++ do { (flags) &= ~AuHnJob_##name; } while (0) ++ ++enum { ++ AuHn_CHILD, ++ AuHn_PARENT, ++ AuHnLast ++}; + -+void au_icntnr_init_once(void *_c) -+{ -+ struct au_icntnr *c = _c; -+ struct au_iinfo *iinfo = &c->iinfo; -+ static struct lock_class_key aufs_ii; ++struct au_hnotify_args { ++ struct inode *h_dir, *dir, *h_child_inode; ++ u32 mask; ++ unsigned int flags[AuHnLast]; ++ unsigned int h_child_nlen; ++ char h_child_name[]; ++}; + -+ spin_lock_init(&iinfo->ii_genspin); -+ au_rw_init(&iinfo->ii_rwsem); -+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii); -+ inode_init_once(&c->vfs_inode); -+} ++struct hn_job_args { ++ unsigned int flags; ++ struct inode *inode, *h_inode, *dir, *h_dir; ++ struct dentry *dentry; ++ char *h_name; ++ int h_nlen; ++}; + -+int au_iinfo_init(struct inode *inode) ++static int hn_job(struct hn_job_args *a) +{ -+ struct au_iinfo *iinfo; -+ struct super_block *sb; -+ int nbr, i; ++ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR); + -+ sb = inode->i_sb; -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ nbr = au_sbend(sb) + 1; -+ if (unlikely(nbr <= 0)) -+ nbr = 1; -+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); -+ if (iinfo->ii_hinode) { -+ au_ninodes_inc(sb); -+ for (i = 0; i < nbr; i++) -+ iinfo->ii_hinode[i].hi_id = -1; ++ /* reset xino */ ++ if (au_ftest_hnjob(a->flags, XINO0) && a->inode) ++ hn_xino(a->inode, a->h_inode); /* ignore this error */ + -+ iinfo->ii_generation.ig_generation = au_sigen(sb); -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ iinfo->ii_vdir = NULL; -+ return 0; ++ if (au_ftest_hnjob(a->flags, TRYXINO0) ++ && a->inode ++ && a->h_inode) { ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ if (!a->h_inode->i_nlink) ++ hn_xino(a->inode, a->h_inode); /* ignore this error */ ++ mutex_unlock(&a->h_inode->i_mutex); + } -+ return -ENOMEM; -+} + -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr) -+{ -+ int err, sz; -+ struct au_hinode *hip; ++ /* make the generation obsolete */ ++ if (au_ftest_hnjob(a->flags, GEN)) { ++ int err = -1; ++ if (a->inode) ++ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode, ++ isdir); ++ if (err && a->dentry) ++ hn_gen_by_name(a->dentry, isdir); ++ /* ignore this error */ ++ } + -+ AuRwMustWriteLock(&iinfo->ii_rwsem); ++ /* make dir entries obsolete */ ++ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) { ++ struct au_vdir *vdir; + -+ err = -ENOMEM; -+ sz = sizeof(*hip) * (iinfo->ii_bend + 1); -+ if (!sz) -+ sz = sizeof(*hip); -+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS); -+ if (hip) { -+ iinfo->ii_hinode = hip; -+ err = 0; ++ vdir = au_ivdir(a->inode); ++ if (vdir) ++ vdir->vd_jiffy = 0; ++ /* IMustLock(a->inode); */ ++ /* a->inode->i_version++; */ + } + -+ return err; ++ /* can do nothing but warn */ ++ if (au_ftest_hnjob(a->flags, MNTPNT) ++ && a->dentry ++ && d_mountpoint(a->dentry)) ++ pr_warn("mount-point %.*s is removed or renamed\n", ++ AuDLNPair(a->dentry)); ++ ++ return 0; +} + -+void au_iinfo_fin(struct inode *inode) ++/* ---------------------------------------------------------------------- */ ++ ++static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen, ++ struct inode *dir) +{ -+ struct au_iinfo *iinfo; -+ struct au_hinode *hi; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend; -+ const unsigned char unlinked = !inode->i_nlink; ++ struct dentry *dentry, *d, *parent; ++ struct qstr *dname; + -+ iinfo = au_ii(inode); -+ /* bad_inode case */ -+ if (!iinfo) -+ return; ++ parent = d_find_alias(dir); ++ if (!parent) ++ return NULL; + -+ sb = inode->i_sb; -+ au_ninodes_dec(sb); -+ if (si_pid_test(sb)) -+ au_xino_delete_inode(inode, unlinked); -+ else { -+ /* -+ * it is safe to hide the dependency between sbinfo and -+ * sb->s_umount. -+ */ -+ lockdep_off(); -+ si_noflush_read_lock(sb); -+ au_xino_delete_inode(inode, unlinked); -+ si_read_unlock(sb); -+ lockdep_on(); ++ dentry = NULL; ++ spin_lock(&parent->d_lock); ++ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) { ++ /* AuDbg("%.*s\n", AuDLNPair(d)); */ ++ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); ++ dname = &d->d_name; ++ if (dname->len != nlen || memcmp(dname->name, name, nlen)) ++ goto cont_unlock; ++ if (au_di(d)) ++ au_digen_dec(d); ++ else ++ goto cont_unlock; ++ if (d->d_count) { ++ dentry = dget_dlock(d); ++ spin_unlock(&d->d_lock); ++ break; ++ } ++ ++ cont_unlock: ++ spin_unlock(&d->d_lock); + } ++ spin_unlock(&parent->d_lock); ++ dput(parent); + -+ if (iinfo->ii_vdir) -+ au_vdir_free(iinfo->ii_vdir); ++ if (dentry) ++ di_write_lock_child(dentry); + -+ bindex = iinfo->ii_bstart; -+ if (bindex >= 0) { -+ hi = iinfo->ii_hinode + bindex; -+ bend = iinfo->ii_bend; -+ while (bindex++ <= bend) { -+ if (hi->hi_inode) -+ au_hiput(hi); -+ hi++; -+ } -+ } -+ kfree(iinfo->ii_hinode); -+ iinfo->ii_hinode = NULL; -+ AuRwDestroy(&iinfo->ii_rwsem); ++ return dentry; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/inode.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,492 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ + -+/* -+ * inode functions -+ */ ++static struct inode *lookup_wlock_by_ino(struct super_block *sb, ++ aufs_bindex_t bindex, ino_t h_ino) ++{ ++ struct inode *inode; ++ ino_t ino; ++ int err; + -+#include "aufs.h" ++ inode = NULL; ++ err = au_xino_read(sb, bindex, h_ino, &ino); ++ if (!err && ino) ++ inode = ilookup(sb, ino); ++ if (!inode) ++ goto out; + -+struct inode *au_igrab(struct inode *inode) -+{ -+ if (inode) { -+ AuDebugOn(!atomic_read(&inode->i_count)); -+ ihold(inode); ++ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { ++ pr_warn("wrong root branch\n"); ++ iput(inode); ++ inode = NULL; ++ goto out; + } -+ return inode; -+} + -+static void au_refresh_hinode_attr(struct inode *inode, int do_version) -+{ -+ au_cpup_attr_all(inode, /*force*/0); -+ au_update_iigen(inode, /*half*/1); -+ if (do_version) -+ inode->i_version++; ++ ii_write_lock_child(inode); ++ ++out: ++ return inode; +} + -+static int au_ii_refresh(struct inode *inode, int *update) ++static void au_hn_bh(void *_args) +{ -+ int err, e; -+ umode_t type; -+ aufs_bindex_t bindex, new_bindex; ++ struct au_hnotify_args *a = _args; + struct super_block *sb; -+ struct au_iinfo *iinfo; -+ struct au_hinode *p, *q, tmp; ++ aufs_bindex_t bindex, bend, bfound; ++ unsigned char xino, try_iput; ++ int err; ++ struct inode *inode; ++ ino_t h_ino; ++ struct hn_job_args args; ++ struct dentry *dentry; ++ struct au_sbinfo *sbinfo; ++ ++ AuDebugOn(!_args); ++ AuDebugOn(!a->h_dir); ++ AuDebugOn(!a->dir); ++ AuDebugOn(!a->mask); ++ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n", ++ a->mask, a->dir->i_ino, a->h_dir->i_ino, ++ a->h_child_inode ? a->h_child_inode->i_ino : 0); + -+ IiMustWriteLock(inode); ++ inode = NULL; ++ dentry = NULL; ++ /* ++ * do not lock a->dir->i_mutex here ++ * because of d_revalidate() may cause a deadlock. ++ */ ++ sb = a->dir->i_sb; ++ AuDebugOn(!sb); ++ sbinfo = au_sbi(sb); ++ AuDebugOn(!sbinfo); ++ si_write_lock(sb, AuLock_NOPLMW); + -+ *update = 0; -+ sb = inode->i_sb; -+ type = inode->i_mode & S_IFMT; -+ iinfo = au_ii(inode); -+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1); -+ if (unlikely(err)) ++ ii_read_lock_parent(a->dir); ++ bfound = -1; ++ bend = au_ibend(a->dir); ++ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++) ++ if (au_h_iptr(a->dir, bindex) == a->h_dir) { ++ bfound = bindex; ++ break; ++ } ++ ii_read_unlock(a->dir); ++ if (unlikely(bfound < 0)) + goto out; + -+ AuDebugOn(iinfo->ii_bstart < 0); -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++, p++) { -+ if (!p->hi_inode) -+ continue; -+ -+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT)); -+ new_bindex = au_br_index(sb, p->hi_id); -+ if (new_bindex == bindex) -+ continue; ++ xino = !!au_opt_test(au_mntflags(sb), XINO); ++ h_ino = 0; ++ if (a->h_child_inode) ++ h_ino = a->h_child_inode->i_ino; + -+ if (new_bindex < 0) { -+ *update = 1; -+ au_hiput(p); -+ p->hi_inode = NULL; -+ continue; -+ } ++ if (a->h_child_nlen ++ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN) ++ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT))) ++ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen, ++ a->dir); ++ try_iput = 0; ++ if (dentry) ++ inode = dentry->d_inode; ++ if (xino && !inode && h_ino ++ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0) ++ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0) ++ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) { ++ inode = lookup_wlock_by_ino(sb, bfound, h_ino); ++ try_iput = 1; ++ } + -+ if (new_bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = new_bindex; -+ if (iinfo->ii_bend < new_bindex) -+ iinfo->ii_bend = new_bindex; -+ /* swap two lower inode, and loop again */ -+ q = iinfo->ii_hinode + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hi_inode) { -+ bindex--; -+ p--; -+ } ++ args.flags = a->flags[AuHn_CHILD]; ++ args.dentry = dentry; ++ args.inode = inode; ++ args.h_inode = a->h_child_inode; ++ args.dir = a->dir; ++ args.h_dir = a->h_dir; ++ args.h_name = a->h_child_name; ++ args.h_nlen = a->h_child_nlen; ++ err = hn_job(&args); ++ if (dentry) { ++ if (au_di(dentry)) ++ di_write_unlock(dentry); ++ dput(dentry); + } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; ++ if (inode && try_iput) { ++ ii_write_unlock(inode); ++ iput(inode); ++ } ++ ++ ii_write_lock_parent(a->dir); ++ args.flags = a->flags[AuHn_PARENT]; ++ args.dentry = NULL; ++ args.inode = a->dir; ++ args.h_inode = a->h_dir; ++ args.dir = NULL; ++ args.h_dir = NULL; ++ args.h_name = NULL; ++ args.h_nlen = 0; ++ err = hn_job(&args); ++ ii_write_unlock(a->dir); + +out: -+ AuTraceErr(err); -+ return err; ++ iput(a->h_child_inode); ++ iput(a->h_dir); ++ iput(a->dir); ++ si_write_unlock(sb); ++ au_nwt_done(&sbinfo->si_nowait); ++ kfree(a); +} + -+int au_refresh_hinode_self(struct inode *inode) ++/* ---------------------------------------------------------------------- */ ++ ++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, ++ struct qstr *h_child_qstr, struct inode *h_child_inode) +{ -+ int err, update; ++ int err, len; ++ unsigned int flags[AuHnLast], f; ++ unsigned char isdir, isroot, wh; ++ struct inode *dir; ++ struct au_hnotify_args *args; ++ char *p, *h_child_name; + -+ err = au_ii_refresh(inode, &update); -+ if (!err) -+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); ++ err = 0; ++ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode); ++ dir = igrab(hnotify->hn_aufs_inode); ++ if (!dir) ++ goto out; + -+ AuTraceErr(err); -+ return err; -+} ++ isroot = (dir->i_ino == AUFS_ROOT_INO); ++ wh = 0; ++ h_child_name = (void *)h_child_qstr->name; ++ len = h_child_qstr->len; ++ if (h_child_name) { ++ if (len > AUFS_WH_PFX_LEN ++ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { ++ h_child_name += AUFS_WH_PFX_LEN; ++ len -= AUFS_WH_PFX_LEN; ++ wh = 1; ++ } ++ } + -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry) -+{ -+ int err, e, update; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bend; -+ unsigned char isdir; -+ struct au_hinode *p; -+ struct au_iinfo *iinfo; ++ isdir = 0; ++ if (h_child_inode) ++ isdir = !!S_ISDIR(h_child_inode->i_mode); ++ flags[AuHn_PARENT] = AuHnJob_ISDIR; ++ flags[AuHn_CHILD] = 0; ++ if (isdir) ++ flags[AuHn_CHILD] = AuHnJob_ISDIR; ++ au_fset_hnjob(flags[AuHn_PARENT], DIRENT); ++ au_fset_hnjob(flags[AuHn_CHILD], GEN); ++ switch (mask & FS_EVENTS_POSS_ON_CHILD) { ++ case FS_MOVED_FROM: ++ case FS_MOVED_TO: ++ au_fset_hnjob(flags[AuHn_CHILD], XINO0); ++ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); ++ /*FALLTHROUGH*/ ++ case FS_CREATE: ++ AuDebugOn(!h_child_name || !h_child_inode); ++ break; + -+ err = au_ii_refresh(inode, &update); -+ if (unlikely(err)) -+ goto out; ++ case FS_DELETE: ++ /* ++ * aufs never be able to get this child inode. ++ * revalidation should be in d_revalidate() ++ * by checking i_nlink, i_generation or d_unhashed(). ++ */ ++ AuDebugOn(!h_child_name); ++ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0); ++ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); ++ break; + -+ update = 0; -+ iinfo = au_ii(inode); -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ mode = (inode->i_mode & S_IFMT); -+ isdir = S_ISDIR(mode); -+ flags = au_hi_flags(inode, isdir); -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { -+ struct inode *h_i; -+ struct dentry *h_d; ++ default: ++ AuDebugOn(1); ++ } + -+ h_d = au_h_dptr(dentry, bindex); -+ if (!h_d || !h_d->d_inode) -+ continue; ++ if (wh) ++ h_child_inode = NULL; + -+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT)); -+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { -+ h_i = au_h_iptr(inode, bindex); -+ if (h_i) { -+ if (h_i == h_d->d_inode) -+ continue; -+ err = -EIO; -+ break; -+ } -+ } -+ if (bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = bindex; -+ if (iinfo->ii_bend < bindex) -+ iinfo->ii_bend = bindex; -+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags); -+ update = 1; ++ err = -ENOMEM; ++ /* iput() and kfree() will be called in au_hnotify() */ ++ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS); ++ if (unlikely(!args)) { ++ AuErr1("no memory\n"); ++ iput(dir); ++ goto out; ++ } ++ args->flags[AuHn_PARENT] = flags[AuHn_PARENT]; ++ args->flags[AuHn_CHILD] = flags[AuHn_CHILD]; ++ args->mask = mask; ++ args->dir = dir; ++ args->h_dir = igrab(h_dir); ++ if (h_child_inode) ++ h_child_inode = igrab(h_child_inode); /* can be NULL */ ++ args->h_child_inode = h_child_inode; ++ args->h_child_nlen = len; ++ if (len) { ++ p = (void *)args; ++ p += sizeof(*args); ++ memcpy(p, h_child_name, len); ++ p[len] = 0; ++ } ++ ++ f = 0; ++ if (!dir->i_nlink) ++ f = AuWkq_NEST; ++ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f); ++ if (unlikely(err)) { ++ pr_err("wkq %d\n", err); ++ iput(args->h_child_inode); ++ iput(args->h_dir); ++ iput(args->dir); ++ kfree(args); + } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; -+ if (!err) -+ au_refresh_hinode_attr(inode, update && isdir); + +out: -+ AuTraceErr(err); + return err; +} + -+static int set_inode(struct inode *inode, struct dentry *dentry) ++/* ---------------------------------------------------------------------- */ ++ ++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm) +{ + int err; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bstart, btail; -+ unsigned char isdir; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct au_iinfo *iinfo; + -+ IiMustWriteLock(inode); ++ AuDebugOn(!(udba & AuOptMask_UDBA)); + + err = 0; -+ isdir = 0; -+ bstart = au_dbstart(dentry); -+ h_inode = au_h_dptr(dentry, bstart)->d_inode; -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ inode->i_fop = &aufs_file_fop; -+ err = au_dy_iaop(inode, bstart, h_inode); -+ if (unlikely(err)) -+ goto out; -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ btail = au_dbtaildir(dentry); -+ inode->i_op = &aufs_dir_iop; -+ inode->i_fop = &aufs_dir_fop; -+ break; -+ case S_IFLNK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_symlink_iop; -+ break; -+ case S_IFBLK: -+ case S_IFCHR: -+ case S_IFIFO: -+ case S_IFSOCK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ au_init_special_fop(inode, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown file type 0%o\n", mode); -+ err = -EIO; -+ goto out; -+ } ++ if (au_hnotify_op.reset_br) ++ err = au_hnotify_op.reset_br(udba, br, perm); + -+ /* do not set hnotify for whiteouted dirs (SHWH mode) */ -+ flags = au_hi_flags(inode, isdir); -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) -+ && au_ftest_hi(flags, HNOTIFY) -+ && dentry->d_name.len > AUFS_WH_PFX_LEN -+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) -+ au_fclr_hi(flags, HNOTIFY); -+ iinfo = au_ii(inode); -+ iinfo->ii_bstart = bstart; -+ iinfo->ii_bend = btail; -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) -+ au_set_h_iptr(inode, bindex, -+ au_igrab(h_dentry->d_inode), flags); -+ } -+ au_cpup_attr_all(inode, /*force*/1); ++ return err; ++} + -+out: ++int au_hnotify_init_br(struct au_branch *br, int perm) ++{ ++ int err; ++ ++ err = 0; ++ if (au_hnotify_op.init_br) ++ err = au_hnotify_op.init_br(br, perm); ++ ++ return err; ++} ++ ++void au_hnotify_fin_br(struct au_branch *br) ++{ ++ if (au_hnotify_op.fin_br) ++ au_hnotify_op.fin_br(br); ++} ++ ++static void au_hn_destroy_cache(void) ++{ ++ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]); ++ au_cachep[AuCache_HNOTIFY] = NULL; ++} ++ ++int __init au_hnotify_init(void) ++{ ++ int err; ++ ++ err = -ENOMEM; ++ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify); ++ if (au_cachep[AuCache_HNOTIFY]) { ++ err = 0; ++ if (au_hnotify_op.init) ++ err = au_hnotify_op.init(); ++ if (unlikely(err)) ++ au_hn_destroy_cache(); ++ } ++ AuTraceErr(err); + return err; +} + ++void au_hnotify_fin(void) ++{ ++ if (au_hnotify_op.fin) ++ au_hnotify_op.fin(); ++ /* cf. au_cache_fin() */ ++ if (au_cachep[AuCache_HNOTIFY]) ++ au_hn_destroy_cache(); ++} +--- /dev/null ++++ linux-3.9/fs/aufs/i_op.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,1107 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +/* -+ * successful returns with iinfo write_locked -+ * minus: errno -+ * zero: success, matched -+ * plus: no error, but unmatched ++ * inode operations (except add/del/rename) + */ -+static int reval_inode(struct inode *inode, struct dentry *dentry) ++ ++#include ++#include ++#include ++#include ++#include ++#include "aufs.h" ++ ++static int h_permission(struct inode *h_inode, int mask, ++ struct vfsmount *h_mnt, int brperm) +{ + int err; -+ unsigned int gen; -+ struct au_iigen iigen; -+ aufs_bindex_t bindex, bend; -+ struct inode *h_inode, *h_dinode; ++ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); + -+ /* -+ * before this function, if aufs got any iinfo lock, it must be only -+ * one, the parent dir. -+ * it can happen by UDBA and the obsoleted inode number. -+ */ -+ err = -EIO; -+ if (unlikely(inode->i_ino == parent_ino(dentry))) ++ err = -EACCES; ++ if ((write_mask && IS_IMMUTABLE(h_inode)) ++ || ((mask & MAY_EXEC) ++ && S_ISREG(h_inode->i_mode) ++ && ((h_mnt->mnt_flags & MNT_NOEXEC) ++ || !(h_inode->i_mode & S_IXUGO)))) + goto out; + -+ err = 1; -+ ii_write_lock_new_child(inode); -+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (!h_inode || h_inode != h_dinode) -+ continue; ++ /* ++ * - skip the lower fs test in the case of write to ro branch. ++ * - nfs dir permission write check is optimized, but a policy for ++ * link/rename requires a real check. ++ */ ++ if ((write_mask && !au_br_writable(brperm)) ++ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode) ++ && write_mask && !(mask & MAY_READ)) ++ || !h_inode->i_op->permission) { ++ /* AuLabel(generic_permission); */ ++ err = generic_permission(h_inode, mask); ++ } else { ++ /* AuLabel(h_inode->permission); */ ++ err = h_inode->i_op->permission(h_inode, mask); ++ AuTraceErr(err); ++ } + -+ err = 0; -+ gen = au_iigen(inode, &iigen); -+ if (gen == au_digen(dentry) -+ && !au_ig_ftest(iigen.ig_flags, HALF_REFRESHED)) -+ break; ++ if (!err) ++ err = devcgroup_inode_permission(h_inode, mask); ++ if (!err) ++ err = security_inode_permission(h_inode, mask); + -+ /* fully refresh inode using dentry */ -+ err = au_refresh_hinode(inode, dentry); -+ if (!err) -+ au_update_iigen(inode, /*half*/0); -+ break; ++#if 0 ++ if (!err) { ++ /* todo: do we need to call ima_path_check()? */ ++ struct path h_path = { ++ .dentry = ++ .mnt = h_mnt ++ }; ++ err = ima_path_check(&h_path, ++ mask & (MAY_READ | MAY_WRITE | MAY_EXEC), ++ IMA_COUNT_LEAVE); + } ++#endif + -+ if (unlikely(err)) -+ ii_write_unlock(inode); +out: + return err; +} + -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino) ++static int aufs_permission(struct inode *inode, int mask) +{ + int err; -+ struct mutex *mtx; -+ -+ /* prevent hardlinked inode number from race condition */ -+ mtx = NULL; -+ if (d_type != DT_DIR) { -+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx; -+ mutex_lock(mtx); -+ } -+ err = au_xino_read(sb, bindex, h_ino, ino); ++ aufs_bindex_t bindex, bend; ++ const unsigned char isdir = !!S_ISDIR(inode->i_mode), ++ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); ++ struct inode *h_inode; ++ struct super_block *sb; ++ struct au_branch *br; ++ ++ /* todo: support rcu-walk? */ ++ if (mask & MAY_NOT_BLOCK) ++ return -ECHILD; ++ ++ sb = inode->i_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ ii_read_lock_child(inode); ++#if 0 ++ err = au_iigen_test(inode, au_sigen(sb)); + if (unlikely(err)) + goto out; ++#endif + -+ if (!*ino) { -+ err = -EIO; -+ *ino = au_xino_new_ino(sb); -+ if (unlikely(!*ino)) -+ goto out; -+ err = au_xino_write(sb, bindex, h_ino, *ino); -+ if (unlikely(err)) ++ if (!isdir || write_mask) { ++ err = au_busy_or_stale(); ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ if (unlikely(!h_inode ++ || (h_inode->i_mode & S_IFMT) ++ != (inode->i_mode & S_IFMT))) + goto out; ++ ++ err = 0; ++ bindex = au_ibstart(inode); ++ br = au_sbr(sb, bindex); ++ err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm); ++ if (write_mask ++ && !err ++ && !special_file(h_inode->i_mode)) { ++ /* test whether the upper writable branch exists */ ++ err = -EROFS; ++ for (; bindex >= 0; bindex--) ++ if (!au_br_rdonly(au_sbr(sb, bindex))) { ++ err = 0; ++ break; ++ } ++ } ++ goto out; ++ } ++ ++ /* non-write to dir */ ++ err = 0; ++ bend = au_ibend(inode); ++ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (h_inode) { ++ err = au_busy_or_stale(); ++ if (unlikely(!S_ISDIR(h_inode->i_mode))) ++ break; ++ ++ br = au_sbr(sb, bindex); ++ err = h_permission(h_inode, mask, au_br_mnt(br), ++ br->br_perm); ++ } + } + +out: -+ if (mtx) -+ mutex_unlock(mtx); ++ ii_read_unlock(inode); ++ si_read_unlock(sb); + return err; +} + -+/* successful returns with iinfo write_locked */ -+/* todo: return with unlocked? */ -+struct inode *au_new_inode(struct dentry *dentry, int must_new) ++/* ---------------------------------------------------------------------- */ ++ ++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, ++ unsigned int flags) +{ -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry; ++ struct dentry *ret, *parent; ++ struct inode *inode; + struct super_block *sb; -+ struct mutex *mtx; -+ ino_t h_ino, ino; -+ int err; -+ aufs_bindex_t bstart; ++ int err, npositive; + -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ h_dentry = au_h_dptr(dentry, bstart); -+ h_inode = h_dentry->d_inode; -+ h_ino = h_inode->i_ino; ++ IMustLock(dir); + -+ /* -+ * stop 'race'-ing between hardlinks under different -+ * parents. -+ */ -+ mtx = NULL; -+ if (!S_ISDIR(h_inode->i_mode)) -+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx; ++ /* todo: support rcu-walk? */ ++ ret = ERR_PTR(-ECHILD); ++ if (flags & LOOKUP_RCU) ++ goto out; + -+new_ino: -+ if (mtx) -+ mutex_lock(mtx); -+ err = au_xino_read(sb, bstart, h_ino, &ino); -+ inode = ERR_PTR(err); ++ ret = ERR_PTR(-ENAMETOOLONG); ++ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ goto out; ++ ++ sb = dir->i_sb; ++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); ++ ret = ERR_PTR(err); + if (unlikely(err)) + goto out; + -+ if (!ino) { -+ ino = au_xino_new_ino(sb); -+ if (unlikely(!ino)) { -+ inode = ERR_PTR(-EIO); -+ goto out; -+ } ++ err = au_di_init(dentry); ++ ret = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_si; ++ ++ inode = NULL; ++ npositive = 0; /* suppress a warning */ ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_read_lock_parent(parent, AuLock_IR); ++ err = au_alive_dir(parent); ++ if (!err) ++ err = au_digen_test(parent, au_sigen(sb)); ++ if (!err) { ++ npositive = au_lkup_dentry(dentry, au_dbstart(parent), ++ /*type*/0); ++ err = npositive; ++ } ++ di_read_unlock(parent, AuLock_IR); ++ ret = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ ++ if (npositive) { ++ inode = au_new_inode(dentry, /*must_new*/0); ++ ret = (void *)inode; ++ } ++ if (IS_ERR(inode)) { ++ inode = NULL; ++ goto out_unlock; + } + -+ AuDbg("i%lu\n", (unsigned long)ino); -+ inode = au_iget_locked(sb, ino); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out; ++ ret = d_splice_alias(inode, dentry); ++#if 0 ++ if (unlikely(d_need_lookup(dentry))) { ++ spin_lock(&dentry->d_lock); ++ dentry->d_flags &= ~DCACHE_NEED_LOOKUP; ++ spin_unlock(&dentry->d_lock); ++ } else ++#endif ++ if (unlikely(IS_ERR(ret) && inode)) { ++ ii_write_unlock(inode); ++ iput(inode); ++ inode = NULL; ++ } + -+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); -+ if (inode->i_state & I_NEW) { ++out_unlock: ++ di_write_unlock(dentry); ++ if (inode) { + /* verbose coding for lock class name */ -+ if (unlikely(S_ISLNK(h_inode->i_mode))) -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcSymlink_IIINFO); -+ else if (unlikely(S_ISDIR(h_inode->i_mode))) -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcDir_IIINFO); ++ if (unlikely(S_ISLNK(inode->i_mode))) ++ au_rw_class(&au_di(dentry)->di_rwsem, ++ au_lc_key + AuLcSymlink_DIINFO); ++ else if (unlikely(S_ISDIR(inode->i_mode))) ++ au_rw_class(&au_di(dentry)->di_rwsem, ++ au_lc_key + AuLcDir_DIINFO); + else /* likely */ -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcNonDir_IIINFO); ++ au_rw_class(&au_di(dentry)->di_rwsem, ++ au_lc_key + AuLcNonDir_DIINFO); ++ } ++out_si: ++ si_read_unlock(sb); ++out: ++ return ret; ++} + -+ ii_write_lock_new_child(inode); -+ err = set_inode(inode, dentry); -+ if (!err) { -+ unlock_new_inode(inode); -+ goto out; /* success */ -+ } ++/* ---------------------------------------------------------------------- */ + -+ /* -+ * iget_failed() calls iput(), but we need to call -+ * ii_write_unlock() after iget_failed(). so dirty hack for -+ * i_count. -+ */ -+ atomic_inc(&inode->i_count); -+ iget_failed(inode); -+ ii_write_unlock(inode); -+ au_xino_write(sb, bstart, h_ino, /*ino*/0); -+ /* ignore this error */ -+ goto out_iput; -+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) { -+ /* -+ * horrible race condition between lookup, readdir and copyup -+ * (or something). -+ */ -+ if (mtx) -+ mutex_unlock(mtx); -+ err = reval_inode(inode, dentry); -+ if (unlikely(err < 0)) { -+ mtx = NULL; -+ goto out_iput; -+ } ++static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent, ++ const unsigned char add_entry, aufs_bindex_t bcpup, ++ aufs_bindex_t bstart) ++{ ++ int err; ++ struct dentry *h_parent; ++ struct inode *h_dir; + -+ if (!err) { -+ mtx = NULL; -+ goto out; /* success */ -+ } else if (mtx) -+ mutex_lock(mtx); ++ if (add_entry) ++ IMustLock(parent->d_inode); ++ else ++ di_write_lock_parent(parent); ++ ++ err = 0; ++ if (!au_h_dptr(parent, bcpup)) { ++ if (bstart < bcpup) ++ err = au_cpdown_dirs(dentry, bcpup); ++ else ++ err = au_cpup_dirs(dentry, bcpup); + } ++ if (!err && add_entry) { ++ h_parent = au_h_dptr(parent, bcpup); ++ h_dir = h_parent->d_inode; ++ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); ++ err = au_lkup_neg(dentry, bcpup, ++ au_ftest_wrdir(add_entry, TMP_WHENTRY)); ++ /* todo: no unlock here */ ++ mutex_unlock(&h_dir->i_mutex); + -+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode))) -+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir," -+ " b%d, %s, %.*s, hi%lu, i%lu.\n", -+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry), -+ (unsigned long)h_ino, (unsigned long)ino); -+ ino = 0; -+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0); -+ if (!err) { -+ iput(inode); -+ if (mtx) -+ mutex_unlock(mtx); -+ goto new_ino; ++ AuDbg("bcpup %d\n", bcpup); ++ if (!err) { ++ if (!dentry->d_inode) ++ au_set_h_dptr(dentry, bstart, NULL); ++ au_update_dbrange(dentry, /*do_put_zero*/0); ++ } + } + -+out_iput: -+ iput(inode); -+ inode = ERR_PTR(err); -+out: -+ if (mtx) -+ mutex_unlock(mtx); -+ return inode; -+} ++ if (!add_entry) ++ di_write_unlock(parent); ++ if (!err) ++ err = bcpup; /* success */ + -+/* ---------------------------------------------------------------------- */ ++ AuTraceErr(err); ++ return err; ++} + -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode) ++/* ++ * decide the branch and the parent dir where we will create a new entry. ++ * returns new bindex or an error. ++ * copyup the parent dir if needed. ++ */ ++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, ++ struct au_wr_dir_args *args) +{ + int err; ++ aufs_bindex_t bcpup, bstart, src_bstart; ++ const unsigned char add_entry ++ = au_ftest_wrdir(args->flags, ADD_ENTRY) ++ | au_ftest_wrdir(args->flags, TMP_WHENTRY); ++ struct super_block *sb; ++ struct dentry *parent; ++ struct au_sbinfo *sbinfo; + -+ err = au_br_rdonly(au_sbr(sb, bindex)); ++ sb = dentry->d_sb; ++ sbinfo = au_sbi(sb); ++ parent = dget_parent(dentry); ++ bstart = au_dbstart(dentry); ++ bcpup = bstart; ++ if (args->force_btgt < 0) { ++ if (src_dentry) { ++ src_bstart = au_dbstart(src_dentry); ++ if (src_bstart < bstart) ++ bcpup = src_bstart; ++ } else if (add_entry) { ++ err = AuWbrCreate(sbinfo, dentry, ++ au_ftest_wrdir(args->flags, ISDIR)); ++ bcpup = err; ++ } + -+ /* pseudo-link after flushed may happen out of bounds */ -+ if (!err -+ && inode -+ && au_ibstart(inode) <= bindex -+ && bindex <= au_ibend(inode)) { -+ /* -+ * permission check is unnecessary since vfsub routine -+ * will be called later -+ */ -+ struct inode *hi = au_h_iptr(inode, bindex); -+ if (hi) -+ err = IS_IMMUTABLE(hi) ? -EROFS : 0; ++ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) { ++ if (add_entry) ++ err = AuWbrCopyup(sbinfo, dentry); ++ else { ++ if (!IS_ROOT(dentry)) { ++ di_read_lock_parent(parent, !AuLock_IR); ++ err = AuWbrCopyup(sbinfo, dentry); ++ di_read_unlock(parent, !AuLock_IR); ++ } else ++ err = AuWbrCopyup(sbinfo, dentry); ++ } ++ bcpup = err; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else { ++ bcpup = args->force_btgt; ++ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode)); ++ } ++ ++ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup); ++ err = bcpup; ++ if (bcpup == bstart) ++ goto out; /* success */ ++ ++ /* copyup the new parent into the branch we process */ ++ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart); ++ if (err >= 0) { ++ if (!dentry->d_inode) { ++ au_set_h_dptr(dentry, bstart, NULL); ++ au_set_dbstart(dentry, bcpup); ++ au_set_dbend(dentry, bcpup); ++ } ++ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup)); + } + ++out: ++ dput(parent); + return err; +} + -+int au_test_h_perm(struct inode *h_inode, int mask) -+{ -+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) -+ return 0; -+ return inode_permission(h_inode, mask); -+} ++/* ---------------------------------------------------------------------- */ + -+int au_test_h_perm_sio(struct inode *h_inode, int mask) ++void au_pin_hdir_unlock(struct au_pin *p) +{ -+ if (au_test_nfs(h_inode->i_sb) -+ && (mask & MAY_WRITE) -+ && S_ISDIR(h_inode->i_mode)) -+ mask |= MAY_READ; /* force permission check */ -+ return au_test_h_perm(h_inode, mask); ++ if (p->hdir) ++ au_hn_imtx_unlock(p->hdir); +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/inode.h 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,588 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * inode operations -+ */ + -+#ifndef __AUFS_INODE_H__ -+#define __AUFS_INODE_H__ ++static int au_pin_hdir_lock(struct au_pin *p) ++{ ++ int err; + -+#ifdef __KERNEL__ ++ err = 0; ++ if (!p->hdir) ++ goto out; + -+#include -+#include "rwsem.h" ++ /* even if an error happens later, keep this lock */ ++ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi); + -+struct vfsmount; ++ err = -EBUSY; ++ if (unlikely(p->hdir->hi_inode != p->h_parent->d_inode)) ++ goto out; + -+struct au_hnotify { -+#ifdef CONFIG_AUFS_HNOTIFY -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ /* never use fsnotify_add_vfsmount_mark() */ -+ struct fsnotify_mark hn_mark; -+#endif -+ struct inode *hn_aufs_inode; /* no get/put */ -+#endif -+} ____cacheline_aligned_in_smp; ++ err = 0; ++ if (p->h_dentry) ++ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode, ++ p->h_parent, p->br); + -+struct au_hinode { -+ struct inode *hi_inode; -+ aufs_bindex_t hi_id; -+#ifdef CONFIG_AUFS_HNOTIFY -+ struct au_hnotify *hi_notify; -+#endif ++out: ++ return err; ++} + -+ /* reference to the copied-up whiteout with get/put */ -+ struct dentry *hi_whdentry; -+}; ++int au_pin_hdir_relock(struct au_pin *p) ++{ ++ int err, i; ++ struct inode *h_i; ++ struct dentry *h_d[] = { ++ p->h_dentry, ++ p->h_parent ++ }; + -+/* ig_flags */ -+#define AuIG_HALF_REFRESHED 1 -+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name) -+#define au_ig_fset(flags, name) \ -+ do { (flags) |= AuIG_##name; } while (0) -+#define au_ig_fclr(flags, name) \ -+ do { (flags) &= ~AuIG_##name; } while (0) ++ err = au_pin_hdir_lock(p); ++ if (unlikely(err)) ++ goto out; + -+struct au_iigen { -+ __u32 ig_generation, ig_flags; -+}; ++ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) { ++ if (!h_d[i]) ++ continue; ++ h_i = h_d[i]->d_inode; ++ if (h_i) ++ err = !h_i->i_nlink; ++ } + -+struct au_vdir; -+struct au_iinfo { -+ spinlock_t ii_genspin; -+ struct au_iigen ii_generation; -+ struct super_block *ii_hsb1; /* no get/put */ ++out: ++ return err; ++} + -+ struct au_rwsem ii_rwsem; -+ aufs_bindex_t ii_bstart, ii_bend; -+ __u32 ii_higen; -+ struct au_hinode *ii_hinode; -+ struct au_vdir *ii_vdir; -+}; ++void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task) ++{ ++#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) ++ p->hdir->hi_inode->i_mutex.owner = task; ++#endif ++} + -+struct au_icntnr { -+ struct au_iinfo iinfo; -+ struct inode vfs_inode; -+} ____cacheline_aligned_in_smp; ++void au_pin_hdir_acquire_nest(struct au_pin *p) ++{ ++ if (p->hdir) { ++ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map, ++ p->lsc_hi, 0, NULL, _RET_IP_); ++ au_pin_hdir_set_owner(p, current); ++ } ++} + -+/* au_pin flags */ -+#define AuPin_DI_LOCKED 1 -+#define AuPin_MNT_WRITE (1 << 1) -+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) -+#define au_fset_pin(flags, name) \ -+ do { (flags) |= AuPin_##name; } while (0) -+#define au_fclr_pin(flags, name) \ -+ do { (flags) &= ~AuPin_##name; } while (0) ++void au_pin_hdir_release(struct au_pin *p) ++{ ++ if (p->hdir) { ++ au_pin_hdir_set_owner(p, p->task); ++ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_); ++ } ++} + -+struct au_pin { -+ /* input */ -+ struct dentry *dentry; -+ unsigned int udba; -+ unsigned char lsc_di, lsc_hi, flags; -+ aufs_bindex_t bindex; ++struct dentry *au_pinned_h_parent(struct au_pin *pin) ++{ ++ if (pin && pin->parent) ++ return au_h_dptr(pin->parent, pin->bindex); ++ return NULL; ++} + -+ /* output */ -+ struct dentry *parent; -+ struct au_hinode *hdir; -+ struct vfsmount *h_mnt; -+}; ++void au_unpin(struct au_pin *p) ++{ ++ if (p->hdir) ++ au_pin_hdir_unlock(p); ++ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE)) ++ vfsub_mnt_drop_write(p->h_mnt); ++ if (!p->hdir) ++ return; + -+/* ---------------------------------------------------------------------- */ ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_unlock(p->parent, AuLock_IR); ++ iput(p->hdir->hi_inode); ++ dput(p->parent); ++ p->parent = NULL; ++ p->hdir = NULL; ++ p->h_mnt = NULL; ++ /* do not clear p->task */ ++} + -+static inline struct au_iinfo *au_ii(struct inode *inode) ++int au_do_pin(struct au_pin *p) +{ -+ struct au_iinfo *iinfo; ++ int err; ++ struct super_block *sb; ++ struct inode *h_dir; + -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ if (iinfo->ii_hinode) -+ return iinfo; -+ return NULL; /* debugging bad_inode case */ -+} ++ err = 0; ++ sb = p->dentry->d_sb; ++ p->br = au_sbr(sb, p->bindex); ++ if (IS_ROOT(p->dentry)) { ++ if (au_ftest_pin(p->flags, MNT_WRITE)) { ++ p->h_mnt = au_br_mnt(p->br); ++ err = vfsub_mnt_want_write(p->h_mnt); ++ if (unlikely(err)) { ++ au_fclr_pin(p->flags, MNT_WRITE); ++ goto out_err; ++ } ++ } ++ goto out; ++ } + -+/* ---------------------------------------------------------------------- */ ++ p->h_dentry = NULL; ++ if (p->bindex <= au_dbend(p->dentry)) ++ p->h_dentry = au_h_dptr(p->dentry, p->bindex); + -+/* inode.c */ -+struct inode *au_igrab(struct inode *inode); -+int au_refresh_hinode_self(struct inode *inode); -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry); -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino); -+struct inode *au_new_inode(struct dentry *dentry, int must_new); -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode); -+int au_test_h_perm(struct inode *h_inode, int mask); -+int au_test_h_perm_sio(struct inode *h_inode, int mask); ++ p->parent = dget_parent(p->dentry); ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_lock(p->parent, AuLock_IR, p->lsc_di); + -+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, -+ ino_t h_ino, unsigned int d_type, ino_t *ino) -+{ -+#ifdef CONFIG_AUFS_SHWH -+ return au_ino(sb, bindex, h_ino, d_type, ino); -+#else -+ return 0; -+#endif -+} ++ h_dir = NULL; ++ p->h_parent = au_h_dptr(p->parent, p->bindex); ++ p->hdir = au_hi(p->parent->d_inode, p->bindex); ++ if (p->hdir) ++ h_dir = p->hdir->hi_inode; + -+/* i_op.c */ -+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; ++ /* ++ * udba case, or ++ * if DI_LOCKED is not set, then p->parent may be different ++ * and h_parent can be NULL. ++ */ ++ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) { ++ err = -EBUSY; ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_unlock(p->parent, AuLock_IR); ++ dput(p->parent); ++ p->parent = NULL; ++ goto out_err; ++ } + -+/* au_wr_dir flags */ -+#define AuWrDir_ADD_ENTRY 1 -+#define AuWrDir_ISDIR (1 << 1) -+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) -+#define au_fset_wrdir(flags, name) \ -+ do { (flags) |= AuWrDir_##name; } while (0) -+#define au_fclr_wrdir(flags, name) \ -+ do { (flags) &= ~AuWrDir_##name; } while (0) ++ if (au_ftest_pin(p->flags, MNT_WRITE)) { ++ p->h_mnt = au_br_mnt(p->br); ++ err = vfsub_mnt_want_write(p->h_mnt); ++ if (unlikely(err)) { ++ au_fclr_pin(p->flags, MNT_WRITE); ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_unlock(p->parent, AuLock_IR); ++ dput(p->parent); ++ p->parent = NULL; ++ goto out_err; ++ } ++ } + -+struct au_wr_dir_args { -+ aufs_bindex_t force_btgt; -+ unsigned char flags; -+}; -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args); ++ au_igrab(h_dir); ++ err = au_pin_hdir_lock(p); ++ if (!err) ++ goto out; /* success */ + -+struct dentry *au_pinned_h_parent(struct au_pin *pin); -+void au_pin_init(struct au_pin *pin, struct dentry *dentry, ++out_err: ++ pr_err("err %d\n", err); ++ err = au_busy_or_stale(); ++out: ++ return err; ++} ++ ++void au_pin_init(struct au_pin *p, struct dentry *dentry, + aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags); -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) __must_check; -+int au_do_pin(struct au_pin *pin) __must_check; -+void au_unpin(struct au_pin *pin); ++ unsigned int udba, unsigned char flags) ++{ ++ p->dentry = dentry; ++ p->udba = udba; ++ p->lsc_di = lsc_di; ++ p->lsc_hi = lsc_hi; ++ p->flags = flags; ++ p->bindex = bindex; + -+/* i_op_add.c */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t dev); -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); -+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, -+ bool want_excl); -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry); -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); ++ p->parent = NULL; ++ p->hdir = NULL; ++ p->h_mnt = NULL; + -+/* i_op_del.c */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_unlink(struct inode *dir, struct dentry *dentry); -+int aufs_rmdir(struct inode *dir, struct dentry *dentry); ++ p->h_dentry = NULL; ++ p->h_parent = NULL; ++ p->br = NULL; ++ p->task = current; ++} + -+/* i_op_ren.c */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); -+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, -+ struct inode *dir, struct dentry *dentry); ++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, ++ unsigned int udba, unsigned char flags) ++{ ++ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2, ++ udba, flags); ++ return au_do_pin(pin); ++} + -+/* iinfo.c */ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); -+void au_hiput(struct au_hinode *hinode); -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh); -+unsigned int au_hi_flags(struct inode *inode, int isdir); ++/* ---------------------------------------------------------------------- */ + -+/* hinode flags */ -+#define AuHi_XINO 1 -+#define AuHi_HNOTIFY (1 << 1) -+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) -+#define au_fset_hi(flags, name) \ -+ do { (flags) |= AuHi_##name; } while (0) -+#define au_fclr_hi(flags, name) \ -+ do { (flags) &= ~AuHi_##name; } while (0) ++/* ++ * ->setattr() and ->getattr() are called in various cases. ++ * chmod, stat: dentry is revalidated. ++ * fchmod, fstat: file and dentry are not revalidated, additionally they may be ++ * unhashed. ++ * for ->setattr(), ia->ia_file is passed from ftruncate only. ++ */ ++/* todo: consolidate with do_refresh() and simple_reval_dpath() */ ++static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen) ++{ ++ int err; ++ struct inode *inode; ++ struct dentry *parent; + -+#ifndef CONFIG_AUFS_HNOTIFY -+#undef AuHi_HNOTIFY -+#define AuHi_HNOTIFY 0 -+#endif ++ err = 0; ++ inode = dentry->d_inode; ++ if (au_digen_test(dentry, sigen)) { ++ parent = dget_parent(dentry); ++ di_read_lock_parent(parent, AuLock_IR); ++ err = au_refresh_dentry(dentry, parent); ++ di_read_unlock(parent, AuLock_IR); ++ dput(parent); ++ } + -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags); ++ AuTraceErr(err); ++ return err; ++} + -+void au_update_iigen(struct inode *inode, int half); -+void au_update_ibrange(struct inode *inode, int do_put_zero); ++#define AuIcpup_DID_CPUP 1 ++#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) ++#define au_fset_icpup(flags, name) \ ++ do { (flags) |= AuIcpup_##name; } while (0) ++#define au_fclr_icpup(flags, name) \ ++ do { (flags) &= ~AuIcpup_##name; } while (0) + -+void au_icntnr_init_once(void *_c); -+int au_iinfo_init(struct inode *inode); -+void au_iinfo_fin(struct inode *inode); -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr); ++struct au_icpup_args { ++ unsigned char flags; ++ unsigned char pin_flags; ++ aufs_bindex_t btgt; ++ unsigned int udba; ++ struct au_pin pin; ++ struct path h_path; ++ struct inode *h_inode; ++}; + -+#ifdef CONFIG_PROC_FS -+/* plink.c */ -+int au_plink_maint(struct super_block *sb, int flags); -+void au_plink_maint_leave(struct au_sbinfo *sbinfo); -+int au_plink_maint_enter(struct super_block *sb); -+#ifdef CONFIG_AUFS_DEBUG -+void au_plink_list(struct super_block *sb); -+#else -+AuStubVoid(au_plink_list, struct super_block *sb) -+#endif -+int au_plink_test(struct inode *inode); -+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); -+void au_plink_append(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+void au_plink_put(struct super_block *sb, int verbose); -+void au_plink_clean(struct super_block *sb, int verbose); -+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); -+#else -+AuStubInt0(au_plink_maint, struct super_block *sb, int flags); -+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo); -+AuStubInt0(au_plink_maint_enter, struct super_block *sb); -+AuStubVoid(au_plink_list, struct super_block *sb); -+AuStubInt0(au_plink_test, struct inode *inode); -+AuStub(struct dentry *, au_plink_lkup, return NULL, -+ struct inode *inode, aufs_bindex_t bindex); -+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+AuStubVoid(au_plink_put, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id); -+#endif /* CONFIG_PROC_FS */ ++static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia, ++ struct au_icpup_args *a) ++{ ++ int err; ++ loff_t sz; ++ aufs_bindex_t bstart, ibstart; ++ struct dentry *hi_wh, *parent; ++ struct inode *inode; ++ struct file *h_file; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = 0 ++ }; + -+/* ---------------------------------------------------------------------- */ ++ bstart = au_dbstart(dentry); ++ inode = dentry->d_inode; ++ if (S_ISDIR(inode->i_mode)) ++ au_fset_wrdir(wr_dir_args.flags, ISDIR); ++ /* plink or hi_wh() case */ ++ ibstart = au_ibstart(inode); ++ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode)) ++ wr_dir_args.force_btgt = ibstart; ++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); ++ if (unlikely(err < 0)) ++ goto out; ++ a->btgt = err; ++ if (err != bstart) ++ au_fset_icpup(a->flags, DID_CPUP); + -+/* lock subclass for iinfo */ -+enum { -+ AuLsc_II_CHILD, /* child first */ -+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */ -+ AuLsc_II_CHILD3, /* copyup dirs */ -+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ -+ AuLsc_II_PARENT2, -+ AuLsc_II_PARENT3, /* copyup dirs */ -+ AuLsc_II_NEW_CHILD -+}; ++ err = 0; ++ a->pin_flags = AuPin_MNT_WRITE; ++ parent = NULL; ++ if (!IS_ROOT(dentry)) { ++ au_fset_pin(a->pin_flags, DI_LOCKED); ++ parent = dget_parent(dentry); ++ di_write_lock_parent(parent); ++ } + -+/* -+ * ii_read_lock_child, ii_write_lock_child, -+ * ii_read_lock_child2, ii_write_lock_child2, -+ * ii_read_lock_child3, ii_write_lock_child3, -+ * ii_read_lock_parent, ii_write_lock_parent, -+ * ii_read_lock_parent2, ii_write_lock_parent2, -+ * ii_read_lock_parent3, ii_write_lock_parent3, -+ * ii_read_lock_new_child, ii_write_lock_new_child, -+ */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void ii_read_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} ++ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags); ++ if (unlikely(err)) ++ goto out_parent; + -+#define AuWriteLockFunc(name, lsc) \ -+static inline void ii_write_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} ++ a->h_path.dentry = au_h_dptr(dentry, bstart); ++ a->h_inode = a->h_path.dentry->d_inode; ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ sz = -1; ++ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode)) ++ sz = ia->ia_size; ++ mutex_unlock(&a->h_inode->i_mutex); + -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) ++ h_file = NULL; ++ hi_wh = NULL; ++ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) { ++ hi_wh = au_hi_wh(inode, a->btgt); ++ if (!hi_wh) { ++ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL, ++ &a->pin); ++ if (unlikely(err)) ++ goto out_unlock; ++ hi_wh = au_hi_wh(inode, a->btgt); ++ /* todo: revalidate hi_wh? */ ++ } ++ } + -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); -+AuRWLockFuncs(new_child, NEW_CHILD); ++ if (parent) { ++ au_pin_set_parent_lflag(&a->pin, /*lflag*/0); ++ di_downgrade_lock(parent, AuLock_IR); ++ dput(parent); ++ parent = NULL; ++ } ++ if (!au_ftest_icpup(a->flags, DID_CPUP)) ++ goto out; /* success */ + -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs ++ if (!d_unhashed(dentry)) { ++ h_file = au_h_open_pre(dentry, bstart); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ err = au_sio_cpup_simple(dentry, a->btgt, sz, ++ AuCpup_DTIME, &a->pin); ++ au_h_open_post(dentry, bstart, h_file); ++ } ++ if (!err) ++ a->h_path.dentry = au_h_dptr(dentry, a->btgt); ++ } else if (!hi_wh) ++ a->h_path.dentry = au_h_dptr(dentry, a->btgt); ++ else ++ a->h_path.dentry = hi_wh; /* do not dget here */ + -+/* -+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock -+ */ -+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem); ++out_unlock: ++ a->h_inode = a->h_path.dentry->d_inode; ++ if (!err) ++ goto out; /* success */ ++ au_unpin(&a->pin); ++out_parent: ++ if (parent) { ++ di_write_unlock(parent); ++ dput(parent); ++ } ++out: ++ if (!err) ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ return err; ++} + -+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) -+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) -+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) ++static int aufs_setattr(struct dentry *dentry, struct iattr *ia) ++{ ++ int err; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *file; ++ struct au_icpup_args *a; + -+/* ---------------------------------------------------------------------- */ ++ inode = dentry->d_inode; ++ IMustLock(inode); + -+static inline void au_icntnr_init(struct au_icntnr *c) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+ c->vfs_inode.i_mode = 0; -+#endif -+} ++ err = -ENOMEM; ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; + -+static inline unsigned int au_iigen(struct inode *inode, struct au_iigen *iigen) -+{ -+ unsigned int gen; -+ struct au_iinfo *iinfo; ++ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ++ ia->ia_valid &= ~ATTR_MODE; + -+ iinfo = au_ii(inode); -+ spin_lock(&iinfo->ii_genspin); -+ if (iigen) -+ *iigen = iinfo->ii_generation; -+ gen = iinfo->ii_generation.ig_generation; -+ spin_unlock(&iinfo->ii_genspin); ++ file = NULL; ++ sb = dentry->d_sb; ++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); ++ if (unlikely(err)) ++ goto out_kfree; + -+ return gen; -+} ++ if (ia->ia_valid & ATTR_FILE) { ++ /* currently ftruncate(2) only */ ++ AuDebugOn(!S_ISREG(inode->i_mode)); ++ file = ia->ia_file; ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out_si; ++ ia->ia_file = au_hf_top(file); ++ a->udba = AuOpt_UDBA_NONE; ++ } else { ++ /* fchmod() doesn't pass ia_file */ ++ a->udba = au_opt_udba(sb); ++ di_write_lock_child(dentry); ++ /* no d_unlinked(), to set UDBA_NONE for root */ ++ if (d_unhashed(dentry)) ++ a->udba = AuOpt_UDBA_NONE; ++ if (a->udba != AuOpt_UDBA_NONE) { ++ AuDebugOn(IS_ROOT(dentry)); ++ err = au_reval_for_attr(dentry, au_sigen(sb)); ++ if (unlikely(err)) ++ goto out_dentry; ++ } ++ } + -+/* tiny test for inode number */ -+/* tmpfs generation is too rough */ -+static inline int au_test_higen(struct inode *inode, struct inode *h_inode) -+{ -+ struct au_iinfo *iinfo; ++ err = au_pin_and_icpup(dentry, ia, a); ++ if (unlikely(err < 0)) ++ goto out_dentry; ++ if (au_ftest_icpup(a->flags, DID_CPUP)) { ++ ia->ia_file = NULL; ++ ia->ia_valid &= ~ATTR_FILE; ++ } + -+ iinfo = au_ii(inode); -+ AuRwMustAnyLock(&iinfo->ii_rwsem); -+ return !(iinfo->ii_hsb1 == h_inode->i_sb -+ && iinfo->ii_higen == h_inode->i_generation); -+} ++ a->h_path.mnt = au_sbr_mnt(sb, a->btgt); ++ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME)) ++ == (ATTR_MODE | ATTR_CTIME)) { ++ err = security_path_chmod(&a->h_path, ia->ia_mode); ++ if (unlikely(err)) ++ goto out_unlock; ++ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID)) ++ && (ia->ia_valid & ATTR_CTIME)) { ++ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid); ++ if (unlikely(err)) ++ goto out_unlock; ++ } + -+static inline void au_iigen_dec(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; ++ if (ia->ia_valid & ATTR_SIZE) { ++ struct file *f; + -+ iinfo = au_ii(inode); -+ spin_lock(&iinfo->ii_genspin); -+ iinfo->ii_generation.ig_generation--; -+ spin_unlock(&iinfo->ii_genspin); ++ if (ia->ia_size < i_size_read(inode)) ++ /* unmap only */ ++ truncate_setsize(inode, ia->ia_size); ++ ++ f = NULL; ++ if (ia->ia_valid & ATTR_FILE) ++ f = ia->ia_file; ++ mutex_unlock(&a->h_inode->i_mutex); ++ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f); ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ } else ++ err = vfsub_notify_change(&a->h_path, ia); ++ if (!err) ++ au_cpup_attr_changeable(inode); ++ ++out_unlock: ++ mutex_unlock(&a->h_inode->i_mutex); ++ au_unpin(&a->pin); ++ if (unlikely(err)) ++ au_update_dbstart(dentry); ++out_dentry: ++ di_write_unlock(dentry); ++ if (file) { ++ fi_write_unlock(file); ++ ia->ia_file = file; ++ ia->ia_valid |= ATTR_FILE; ++ } ++out_si: ++ si_read_unlock(sb); ++out_kfree: ++ kfree(a); ++out: ++ AuTraceErr(err); ++ return err; +} + -+static inline int au_iigen_test(struct inode *inode, unsigned int sigen) ++static void au_refresh_iattr(struct inode *inode, struct kstat *st, ++ unsigned int nlink) +{ -+ int err; -+ -+ err = 0; -+ if (unlikely(inode && au_iigen(inode, NULL) != sigen)) -+ err = -EIO; -+ -+ return err; -+} ++ unsigned int n; + -+/* ---------------------------------------------------------------------- */ ++ inode->i_mode = st->mode; ++ /* don't i_[ug]id_write() here */ ++ inode->i_uid = st->uid; ++ inode->i_gid = st->gid; ++ inode->i_atime = st->atime; ++ inode->i_mtime = st->mtime; ++ inode->i_ctime = st->ctime; + -+static inline aufs_bindex_t au_ii_br_id(struct inode *inode, -+ aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id; -+} ++ au_cpup_attr_nlink(inode, /*force*/0); ++ if (S_ISDIR(inode->i_mode)) { ++ n = inode->i_nlink; ++ n -= nlink; ++ n += st->nlink; ++ /* 0 can happen */ ++ set_nlink(inode, n); ++ } + -+static inline aufs_bindex_t au_ibstart(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_bstart; ++ spin_lock(&inode->i_lock); ++ inode->i_blocks = st->blocks; ++ i_size_write(inode, st->size); ++ spin_unlock(&inode->i_lock); +} + -+static inline aufs_bindex_t au_ibend(struct inode *inode) ++static int aufs_getattr(struct vfsmount *mnt __maybe_unused, ++ struct dentry *dentry, struct kstat *st) +{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_bend; -+} ++ int err; ++ unsigned int mnt_flags; ++ aufs_bindex_t bindex; ++ unsigned char udba_none, positive; ++ struct super_block *sb, *h_sb; ++ struct inode *inode; ++ struct path h_path; + -+static inline struct au_vdir *au_ivdir(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_vdir; -+} ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); ++ if (unlikely(err)) ++ goto out; ++ mnt_flags = au_mntflags(sb); ++ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE); + -+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry; -+} ++ /* support fstat(2) */ ++ if (!d_unlinked(dentry) && !udba_none) { ++ unsigned int sigen = au_sigen(sb); ++ err = au_digen_test(dentry, sigen); ++ if (!err) { ++ di_read_lock_child(dentry, AuLock_IR); ++ err = au_dbrange_test(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ } else { ++ AuDebugOn(IS_ROOT(dentry)); ++ di_write_lock_child(dentry); ++ err = au_dbrange_test(dentry); ++ if (!err) ++ err = au_reval_for_attr(dentry, sigen); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; ++ } ++ } else ++ di_read_lock_child(dentry, AuLock_IR); + -+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_bstart = bindex; -+} ++ bindex = au_ibstart(inode); ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ h_sb = h_path.mnt->mnt_sb; ++ if (!au_test_fs_bad_iattr(h_sb) && udba_none) ++ goto out_fill; /* success */ + -+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_bend = bindex; -+} ++ h_path.dentry = NULL; ++ if (au_dbstart(dentry) == bindex) ++ h_path.dentry = dget(au_h_dptr(dentry, bindex)); ++ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) { ++ h_path.dentry = au_plink_lkup(inode, bindex); ++ if (IS_ERR(h_path.dentry)) ++ goto out_fill; /* pretending success */ ++ } ++ /* illegally overlapped or something */ ++ if (unlikely(!h_path.dentry)) ++ goto out_fill; /* pretending success */ + -+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_vdir = vdir; -+} ++ positive = !!h_path.dentry->d_inode; ++ if (positive) ++ err = vfs_getattr(&h_path, st); ++ dput(h_path.dentry); ++ if (!err) { ++ if (positive) ++ au_refresh_iattr(inode, st, ++ h_path.dentry->d_inode->i_nlink); ++ goto out_fill; /* success */ ++ } ++ AuTraceErr(err); ++ goto out_unlock; + -+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode + bindex; ++out_fill: ++ generic_fillattr(inode, st); ++out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ si_read_unlock(sb); ++out: ++ AuTraceErr(err); ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+static inline struct dentry *au_pinned_parent(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->parent; -+ return NULL; -+} -+ -+static inline struct inode *au_pinned_h_dir(struct au_pin *pin) ++static int h_readlink(struct dentry *dentry, int bindex, char __user *buf, ++ int bufsiz) +{ -+ if (pin && pin->hdir) -+ return pin->hdir->hi_inode; -+ return NULL; -+} ++ int err; ++ struct super_block *sb; ++ struct dentry *h_dentry; + -+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->hdir; -+ return NULL; -+} ++ err = -EINVAL; ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (unlikely(!h_dentry->d_inode->i_op->readlink)) ++ goto out; + -+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) -+{ -+ if (pin) -+ pin->dentry = dentry; -+} ++ err = security_inode_readlink(h_dentry); ++ if (unlikely(err)) ++ goto out; + -+static inline void au_pin_set_parent_lflag(struct au_pin *pin, -+ unsigned char lflag) -+{ -+ if (pin) { -+ if (lflag) -+ au_fset_pin(pin->flags, DI_LOCKED); -+ else -+ au_fclr_pin(pin->flags, DI_LOCKED); ++ sb = dentry->d_sb; ++ if (!au_test_ro(sb, bindex, dentry->d_inode)) { ++ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry); ++ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode); + } ++ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz); ++ ++out: ++ return err; +} + -+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) ++static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ -+ if (pin) { -+ dput(pin->parent); -+ pin->parent = dget(parent); -+ } -+} ++ int err; + -+/* ---------------------------------------------------------------------- */ ++ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); ++ if (unlikely(err)) ++ goto out; ++ err = au_d_hashed_positive(dentry); ++ if (!err) ++ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz); ++ aufs_read_unlock(dentry, AuLock_IR); + -+struct au_branch; -+#ifdef CONFIG_AUFS_HNOTIFY -+struct au_hnotify_op { -+ void (*ctl)(struct au_hinode *hinode, int do_set); -+ int (*alloc)(struct au_hinode *hinode); ++out: ++ return err; ++} + -+ /* -+ * if it returns true, the the caller should free hinode->hi_notify, -+ * otherwise ->free() frees it. -+ */ -+ int (*free)(struct au_hinode *hinode, -+ struct au_hnotify *hn) __must_check; ++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ int err; ++ mm_segment_t old_fs; ++ union { ++ char *k; ++ char __user *u; ++ } buf; + -+ void (*fin)(void); -+ int (*init)(void); ++ err = -ENOMEM; ++ buf.k = (void *)__get_free_page(GFP_NOFS); ++ if (unlikely(!buf.k)) ++ goto out; + -+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm); -+ void (*fin_br)(struct au_branch *br); -+ int (*init_br)(struct au_branch *br, int perm); -+}; ++ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); ++ if (unlikely(err)) ++ goto out_name; + -+/* hnotify.c */ -+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode); -+void au_hn_free(struct au_hinode *hinode); -+void au_hn_ctl(struct au_hinode *hinode, int do_set); -+void au_hn_reset(struct inode *inode, unsigned int flags); -+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, -+ struct qstr *h_child_qstr, struct inode *h_child_inode); -+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm); -+int au_hnotify_init_br(struct au_branch *br, int perm); -+void au_hnotify_fin_br(struct au_branch *br); -+int __init au_hnotify_init(void); -+void au_hnotify_fin(void); ++ err = au_d_hashed_positive(dentry); ++ if (!err) { ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX); ++ set_fs(old_fs); ++ } ++ aufs_read_unlock(dentry, AuLock_IR); + -+/* hfsnotify.c */ -+extern const struct au_hnotify_op au_hnotify_op; ++ if (err >= 0) { ++ buf.k[err] = 0; ++ /* will be freed by put_link */ ++ nd_set_link(nd, buf.k); ++ return NULL; /* success */ ++ } + -+static inline -+void au_hn_init(struct au_hinode *hinode) -+{ -+ hinode->hi_notify = NULL; ++out_name: ++ free_page((unsigned long)buf.k); ++out: ++ AuTraceErr(err); ++ return ERR_PTR(err); +} + -+static inline struct au_hnotify *au_hn(struct au_hinode *hinode) ++static void aufs_put_link(struct dentry *dentry __maybe_unused, ++ struct nameidata *nd, void *cookie __maybe_unused) +{ -+ return hinode->hi_notify; -+} ++ char *p; + -+#else -+static inline -+int au_hn_alloc(struct au_hinode *hinode __maybe_unused, -+ struct inode *inode __maybe_unused) -+{ -+ return -EOPNOTSUPP; ++ p = nd_get_link(nd); ++ if (!IS_ERR_OR_NULL(p)) ++ free_page((unsigned long)p); +} + -+static inline struct au_hnotify *au_hn(struct au_hinode *hinode) ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags) +{ -+ return NULL; ++ int err; ++ struct super_block *sb; ++ struct inode *h_inode; ++ ++ sb = inode->i_sb; ++ /* mmap_sem might be acquired already, cf. aufs_mmap() */ ++ lockdep_off(); ++ si_read_lock(sb, AuLock_FLUSH); ++ ii_write_lock_child(inode); ++ lockdep_on(); ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ err = vfsub_update_time(h_inode, ts, flags); ++ lockdep_off(); ++ ii_write_unlock(inode); ++ si_read_unlock(sb); ++ lockdep_on(); ++ return err; +} + -+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused) -+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused, -+ int do_set __maybe_unused) -+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused, -+ unsigned int flags __maybe_unused) -+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused, -+ struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused) -+AuStubInt0(__init au_hnotify_init, void) -+AuStubVoid(au_hnotify_fin, void) -+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused) -+#endif /* CONFIG_AUFS_HNOTIFY */ ++/* ---------------------------------------------------------------------- */ + -+static inline void au_hn_suspend(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/0); -+} ++struct inode_operations aufs_symlink_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr, + -+static inline void au_hn_resume(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/1); -+} ++ .readlink = aufs_readlink, ++ .follow_link = aufs_follow_link, ++ .put_link = aufs_put_link, + -+static inline void au_hn_imtx_lock(struct au_hinode *hdir) -+{ -+ mutex_lock(&hdir->hi_inode->i_mutex); -+ au_hn_suspend(hdir); -+} ++ /* .update_time = aufs_update_time */ ++}; + -+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir, -+ unsigned int sc __maybe_unused) -+{ -+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc); -+ au_hn_suspend(hdir); -+} ++struct inode_operations aufs_dir_iop = { ++ .create = aufs_create, ++ .lookup = aufs_lookup, ++ .link = aufs_link, ++ .unlink = aufs_unlink, ++ .symlink = aufs_symlink, ++ .mkdir = aufs_mkdir, ++ .rmdir = aufs_rmdir, ++ .mknod = aufs_mknod, ++ .rename = aufs_rename, + -+static inline void au_hn_imtx_unlock(struct au_hinode *hdir) -+{ -+ au_hn_resume(hdir); -+ mutex_unlock(&hdir->hi_inode->i_mutex); -+} ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr, + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_INODE_H__ */ ++ .update_time = aufs_update_time ++ /* no support for atomic_open() */ ++}; ++ ++struct inode_operations aufs_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr, ++ ++ .update_time = aufs_update_time ++}; --- /dev/null -+++ linux-3.x-rcN/fs/aufs/ioctl.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,196 @@ ++++ linux-3.9/fs/aufs/i_op_add.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,722 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -14540,486 +14876,696 @@ + */ + +/* -+ * ioctl -+ * plink-management and readdir in userspace. -+ * assist the pathconf(3) wrapper library. ++ * inode operations (add entry) ++ */ ++ ++#include "aufs.h" ++ ++/* ++ * final procedure of adding a new entry, except link(2). ++ * remove whiteout, instantiate, copyup the parent dir's times and size ++ * and update version. ++ * if it failed, re-create the removed whiteout. ++ */ ++static int epilog(struct inode *dir, aufs_bindex_t bindex, ++ struct dentry *wh_dentry, struct dentry *dentry) ++{ ++ int err, rerr; ++ aufs_bindex_t bwh; ++ struct path h_path; ++ struct inode *inode, *h_dir; ++ struct dentry *wh; ++ ++ bwh = -1; ++ if (wh_dentry) { ++ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ ++ IMustLock(h_dir); ++ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); ++ bwh = au_dbwh(dentry); ++ h_path.dentry = wh_dentry; ++ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex); ++ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, ++ dentry); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ inode = au_new_inode(dentry, /*must_new*/1); ++ if (!IS_ERR(inode)) { ++ d_instantiate(dentry, inode); ++ dir = dentry->d_parent->d_inode; /* dir inode is locked */ ++ IMustLock(dir); ++ if (au_ibstart(dir) == au_dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; ++ return 0; /* success */ ++ } ++ ++ err = PTR_ERR(inode); ++ if (!wh_dentry) ++ goto out; ++ ++ /* revert */ ++ /* dir inode is locked */ ++ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); ++ rerr = PTR_ERR(wh); ++ if (IS_ERR(wh)) { ++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } else ++ dput(wh); ++ ++out: ++ return err; ++} ++ ++static int au_d_may_add(struct dentry *dentry) ++{ ++ int err; ++ ++ err = 0; ++ if (unlikely(d_unhashed(dentry))) ++ err = -ENOENT; ++ if (unlikely(dentry->d_inode)) ++ err = -EEXIST; ++ return err; ++} ++ ++/* ++ * simple tests for the adding inode operations. ++ * following the checks in vfs, plus the parent-child relationship. ++ */ ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir) ++{ ++ int err; ++ umode_t h_mode; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ ++ err = -ENAMETOOLONG; ++ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ goto out; ++ ++ h_dentry = au_h_dptr(dentry, bindex); ++ h_inode = h_dentry->d_inode; ++ if (!dentry->d_inode) { ++ err = -EEXIST; ++ if (unlikely(h_inode)) ++ goto out; ++ } else { ++ /* rename(2) case */ ++ err = -EIO; ++ if (unlikely(!h_inode || !h_inode->i_nlink)) ++ goto out; ++ ++ h_mode = h_inode->i_mode; ++ if (!isdir) { ++ err = -EISDIR; ++ if (unlikely(S_ISDIR(h_mode))) ++ goto out; ++ } else if (unlikely(!S_ISDIR(h_mode))) { ++ err = -ENOTDIR; ++ goto out; ++ } ++ } ++ ++ err = 0; ++ /* expected parent dir is locked */ ++ if (unlikely(h_parent != h_dentry->d_parent)) ++ err = -EIO; ++ ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++/* ++ * initial procedure of adding a new entry. ++ * prepare writable branch and the parent dir, lock it, ++ * and lookup whiteout for the new entry. + */ -+ -+#include "aufs.h" -+ -+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg) ++static struct dentry* ++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, ++ struct dentry *src_dentry, struct au_pin *pin, ++ struct au_wr_dir_args *wr_dir_args) +{ -+ int err, fd; -+ aufs_bindex_t wbi, bindex, bend; -+ struct file *h_file; ++ struct dentry *wh_dentry, *h_parent; + struct super_block *sb; -+ struct dentry *root; + struct au_branch *br; -+ struct aufs_wbr_fd wbrfd = { -+ .oflags = au_dir_roflags, -+ .brid = -1 -+ }; -+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY -+ | O_NOATIME | O_CLOEXEC; -+ -+ AuDebugOn(wbrfd.oflags & ~valid); -+ -+ if (arg) { -+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ goto out; -+ } ++ int err; ++ unsigned int udba; ++ aufs_bindex_t bcpup; + -+ err = -EINVAL; -+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid); -+ wbrfd.oflags |= au_dir_roflags; -+ AuDbg("0%o\n", wbrfd.oflags); -+ if (unlikely(wbrfd.oflags & ~valid)) -+ goto out; -+ } ++ AuDbg("%.*s\n", AuDLNPair(dentry)); + -+ fd = get_unused_fd(); -+ err = fd; -+ if (unlikely(fd < 0)) ++ err = au_wr_dir(dentry, src_dentry, wr_dir_args); ++ bcpup = err; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err < 0)) + goto out; + -+ h_file = ERR_PTR(-EINVAL); -+ wbi = 0; -+ br = NULL; -+ sb = path->dentry->d_sb; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_IR); -+ bend = au_sbend(sb); -+ if (wbrfd.brid >= 0) { -+ wbi = au_br_index(sb, wbrfd.brid); -+ if (unlikely(wbi < 0 || wbi > bend)) -+ goto out_unlock; -+ } ++ sb = dentry->d_sb; ++ udba = au_opt_udba(sb); ++ err = au_pin(pin, dentry, bcpup, udba, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; + -+ h_file = ERR_PTR(-ENOENT); -+ br = au_sbr(sb, wbi); -+ if (!au_br_writable(br->br_perm)) { -+ if (arg) -+ goto out_unlock; ++ h_parent = au_pinned_h_parent(pin); ++ if (udba != AuOpt_UDBA_NONE ++ && au_dbstart(dentry) == bcpup) ++ err = au_may_add(dentry, bcpup, h_parent, ++ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); ++ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ err = -ENAMETOOLONG; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_unpin; + -+ bindex = wbi + 1; -+ wbi = -1; -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (au_br_writable(br->br_perm)) { -+ wbi = bindex; -+ br = au_sbr(sb, wbi); -+ break; -+ } -+ } ++ br = au_sbr(sb, bcpup); ++ if (dt) { ++ struct path tmp = { ++ .dentry = h_parent, ++ .mnt = au_br_mnt(br) ++ }; ++ au_dtime_store(dt, au_pinned_parent(pin), &tmp); + } -+ AuDbg("wbi %d\n", wbi); -+ if (wbi >= 0) -+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL); + -+out_unlock: -+ aufs_read_unlock(root, AuLock_IR); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out_fd; ++ wh_dentry = NULL; ++ if (bcpup != au_dbwh(dentry)) ++ goto out; /* success */ + -+ atomic_dec(&br->br_count); /* cf. au_h_open() */ -+ fd_install(fd, h_file); -+ err = fd; -+ goto out; /* success */ ++ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); + -+out_fd: -+ put_unused_fd(fd); ++out_unpin: ++ if (IS_ERR(wh_dentry)) ++ au_unpin(pin); +out: -+ AuTraceErr(err); -+ return err; ++ return wh_dentry; +} + +/* ---------------------------------------------------------------------- */ + -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg) ++enum { Mknod, Symlink, Creat }; ++struct simple_arg { ++ int type; ++ union { ++ struct { ++ umode_t mode; ++ bool want_excl; ++ } c; ++ struct { ++ const char *symname; ++ } s; ++ struct { ++ umode_t mode; ++ dev_t dev; ++ } m; ++ } u; ++}; ++ ++static int add_simple(struct inode *dir, struct dentry *dentry, ++ struct simple_arg *arg) +{ -+ long err; ++ int err; ++ aufs_bindex_t bstart; ++ unsigned char created; ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct path h_path; ++ struct dentry *wh_dentry, *parent; ++ struct inode *h_dir; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = AuWrDir_ADD_ENTRY ++ }; + -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_ioctl(file, cmd, arg); -+ break; ++ AuDbg("%.*s\n", AuDLNPair(dentry)); ++ IMustLock(dir); + -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); -+ break; ++ parent = dentry->d_parent; /* dir inode is locked */ ++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); ++ if (unlikely(err)) ++ goto out; ++ err = au_d_may_add(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, ++ &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; + -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_ioctl(file, arg); ++ bstart = au_dbstart(dentry); ++ h_path.dentry = au_h_dptr(dentry, bstart); ++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); ++ h_dir = au_pinned_h_dir(&pin); ++ switch (arg->type) { ++ case Creat: ++ err = vfsub_create(h_dir, &h_path, arg->u.c.mode, ++ arg->u.c.want_excl); + break; -+ -+ default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); ++ case Symlink: ++ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname); ++ break; ++ case Mknod: ++ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev); + break; -+ + default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; ++ BUG(); + } ++ created = !err; ++ if (!err) ++ err = epilog(dir, bstart, wh_dentry, dentry); + -+ AuTraceErr(err); -+ return err; -+} -+ -+#ifdef CONFIG_COMPAT -+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_compat_ioctl(file, cmd, arg); -+ break; ++ /* revert */ ++ if (unlikely(created && err && h_path.dentry->d_inode)) { ++ int rerr; ++ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ if (rerr) { ++ AuIOErr("%.*s revert failure(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&dt); ++ } + -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_compat_ioctl(file, arg); -+ break; ++ au_unpin(&pin); ++ dput(wh_dentry); + -+ default: -+ err = aufs_ioctl_dir(file, cmd, arg); ++out_parent: ++ di_write_unlock(parent); ++out_unlock: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); + } -+ -+ AuTraceErr(err); ++ aufs_read_unlock(dentry, AuLock_DW); ++out: + return err; +} + -+#if 0 /* unused yet */ -+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, -+ unsigned long arg) ++int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t dev) +{ -+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg)); ++ struct simple_arg arg = { ++ .type = Mknod, ++ .u.m = { ++ .mode = mode, ++ .dev = dev ++ } ++ }; ++ return add_simple(dir, dentry, &arg); +} -+#endif -+#endif ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/i_op_add.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,713 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * inode operations (add entry) -+ */ + -+#include "aufs.h" ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) ++{ ++ struct simple_arg arg = { ++ .type = Symlink, ++ .u.s.symname = symname ++ }; ++ return add_simple(dir, dentry, &arg); ++} + -+/* -+ * final procedure of adding a new entry, except link(2). -+ * remove whiteout, instantiate, copyup the parent dir's times and size -+ * and update version. -+ * if it failed, re-create the removed whiteout. -+ */ -+static int epilog(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct dentry *dentry) ++int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ++ bool want_excl) +{ -+ int err, rerr; -+ aufs_bindex_t bwh; -+ struct path h_path; -+ struct inode *inode, *h_dir; -+ struct dentry *wh; ++ struct simple_arg arg = { ++ .type = Creat, ++ .u.c = { ++ .mode = mode, ++ .want_excl = want_excl ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} + -+ bwh = -1; -+ if (wh_dentry) { -+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(h_dir); -+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); -+ bwh = au_dbwh(dentry); -+ h_path.dentry = wh_dentry; -+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex); -+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out; -+ } ++/* ---------------------------------------------------------------------- */ + -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (!IS_ERR(inode)) { -+ d_instantiate(dentry, inode); -+ dir = dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(dir); -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; -+ return 0; /* success */ -+ } ++struct au_link_args { ++ aufs_bindex_t bdst, bsrc; ++ struct au_pin pin; ++ struct path h_path; ++ struct dentry *src_parent, *parent; ++}; + -+ err = PTR_ERR(inode); -+ if (!wh_dentry) ++static int au_cpup_before_link(struct dentry *src_dentry, ++ struct au_link_args *a) ++{ ++ int err; ++ struct dentry *h_src_dentry; ++ struct file *h_file; ++ ++ di_read_lock_parent(a->src_parent, AuLock_IR); ++ err = au_test_and_cpup_dirs(src_dentry, a->bdst); ++ if (unlikely(err)) + goto out; + -+ /* revert */ -+ /* dir inode is locked */ -+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); -+ rerr = PTR_ERR(wh); -+ if (IS_ERR(wh)) { -+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } else -+ dput(wh); ++ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); ++ err = au_pin(&a->pin, src_dentry, a->bdst, ++ au_opt_udba(src_dentry->d_sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out; ++ h_file = au_h_open_pre(src_dentry, a->bsrc); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ err = au_sio_cpup_simple(src_dentry, a->bdst, -1, ++ AuCpup_DTIME /* | AuCpup_KEEPLINO */, ++ &a->pin); ++ au_h_open_post(src_dentry, a->bsrc, h_file); ++ } ++ au_unpin(&a->pin); + +out: ++ di_read_unlock(a->src_parent, AuLock_IR); + return err; +} + -+static int au_d_may_add(struct dentry *dentry) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(d_unhashed(dentry))) -+ err = -ENOENT; -+ if (unlikely(dentry->d_inode)) -+ err = -EEXIST; -+ return err; -+} -+ -+/* -+ * simple tests for the adding inode operations. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) ++static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry, ++ struct au_link_args *a) +{ + int err; -+ umode_t h_mode; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ -+ err = -ENAMETOOLONG; -+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ goto out; ++ unsigned char plink; ++ aufs_bindex_t bend; ++ struct dentry *h_src_dentry; ++ struct inode *h_inode, *inode; ++ struct super_block *sb; ++ struct file *h_file; + -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ if (!dentry->d_inode) { -+ err = -EEXIST; -+ if (unlikely(h_inode)) -+ goto out; ++ plink = 0; ++ h_inode = NULL; ++ sb = src_dentry->d_sb; ++ inode = src_dentry->d_inode; ++ if (au_ibstart(inode) <= a->bdst) ++ h_inode = au_h_iptr(inode, a->bdst); ++ if (!h_inode || !h_inode->i_nlink) { ++ /* copyup src_dentry as the name of dentry. */ ++ bend = au_dbend(dentry); ++ if (bend < a->bsrc) ++ au_set_dbend(dentry, a->bsrc); ++ au_set_h_dptr(dentry, a->bsrc, ++ dget(au_h_dptr(src_dentry, a->bsrc))); ++ dget(a->h_path.dentry); ++ au_set_h_dptr(dentry, a->bdst, NULL); ++ dentry->d_inode = src_dentry->d_inode; /* tmp */ ++ h_file = au_h_open_pre(dentry, a->bsrc); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ err = au_sio_cpup_simple(dentry, a->bdst, -1, ++ AuCpup_KEEPLINO, &a->pin); ++ au_h_open_post(dentry, a->bsrc, h_file); ++ if (!err) { ++ dput(a->h_path.dentry); ++ a->h_path.dentry = au_h_dptr(dentry, a->bdst); ++ } else ++ au_set_h_dptr(dentry, a->bdst, ++ a->h_path.dentry); ++ } ++ dentry->d_inode = NULL; /* restore */ ++ au_set_h_dptr(dentry, a->bsrc, NULL); ++ au_set_dbend(dentry, bend); + } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) ++ /* the inode of src_dentry already exists on a.bdst branch */ ++ h_src_dentry = d_find_alias(h_inode); ++ if (!h_src_dentry && au_plink_test(inode)) { ++ plink = 1; ++ h_src_dentry = au_plink_lkup(inode, a->bdst); ++ err = PTR_ERR(h_src_dentry); ++ if (IS_ERR(h_src_dentry)) + goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; ++ ++ if (unlikely(!h_src_dentry->d_inode)) { ++ dput(h_src_dentry); ++ h_src_dentry = NULL; ++ } ++ ++ } ++ if (h_src_dentry) { ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ dput(h_src_dentry); ++ } else { ++ AuIOErr("no dentry found for hi%lu on b%d\n", ++ h_inode->i_ino, a->bdst); ++ err = -EIO; + } + } + -+ err = 0; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ err = -EIO; ++ if (!err && !plink) ++ au_plink_append(inode, a->bdst, a->h_path.dentry); + +out: + AuTraceErr(err); + return err; +} + -+/* -+ * initial procedure of adding a new entry. -+ * prepare writable branch and the parent dir, lock it, -+ * and lookup whiteout for the new entry. -+ */ -+static struct dentry* -+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, -+ struct dentry *src_dentry, struct au_pin *pin, -+ struct au_wr_dir_args *wr_dir_args) ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry) +{ -+ struct dentry *wh_dentry, *h_parent; ++ int err, rerr; ++ struct au_dtime dt; ++ struct au_link_args *a; ++ struct dentry *wh_dentry, *h_src_dentry; ++ struct inode *inode; + struct super_block *sb; -+ struct au_branch *br; -+ int err; -+ unsigned int udba; -+ aufs_bindex_t bcpup; ++ struct au_wr_dir_args wr_dir_args = { ++ /* .force_btgt = -1, */ ++ .flags = AuWrDir_ADD_ENTRY ++ }; + -+ AuDbg("%.*s\n", AuDLNPair(dentry)); ++ IMustLock(dir); ++ inode = src_dentry->d_inode; ++ IMustLock(inode); + -+ err = au_wr_dir(dentry, src_dentry, wr_dir_args); -+ bcpup = err; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err < 0)) ++ err = -ENOMEM; ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) + goto out; + -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); ++ a->parent = dentry->d_parent; /* dir inode is locked */ ++ err = aufs_read_and_write_lock2(dentry, src_dentry, ++ AuLock_NOPLM | AuLock_GEN); + if (unlikely(err)) -+ goto out; ++ goto out_kfree; ++ err = au_d_hashed_positive(src_dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ err = au_d_may_add(dentry); ++ if (unlikely(err)) ++ goto out_unlock; + -+ h_parent = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbstart(dentry) == bcpup) -+ err = au_may_add(dentry, bcpup, h_parent, -+ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); -+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ err = -ENAMETOOLONG; -+ wh_dentry = ERR_PTR(err); ++ a->src_parent = dget_parent(src_dentry); ++ wr_dir_args.force_btgt = au_ibstart(inode); ++ ++ di_write_lock_parent(a->parent); ++ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, ++ &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; ++ ++ err = 0; ++ sb = dentry->d_sb; ++ a->bdst = au_dbstart(dentry); ++ a->h_path.dentry = au_h_dptr(dentry, a->bdst); ++ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); ++ a->bsrc = au_ibstart(inode); ++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); ++ if (!h_src_dentry) { ++ a->bsrc = au_dbstart(src_dentry); ++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); ++ AuDebugOn(!h_src_dentry); ++ } else if (IS_ERR(h_src_dentry)) ++ goto out_parent; ++ ++ if (au_opt_test(au_mntflags(sb), PLINK)) { ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) ++ err = au_cpup_or_link(src_dentry, dentry, a); ++ else ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ dput(h_src_dentry); ++ } else { ++ /* ++ * copyup src_dentry to the branch we process, ++ * and then link(2) to it. ++ */ ++ dput(h_src_dentry); ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { ++ au_unpin(&a->pin); ++ di_write_unlock(a->parent); ++ err = au_cpup_before_link(src_dentry, a); ++ di_write_lock_parent(a->parent); ++ if (!err) ++ err = au_pin(&a->pin, dentry, a->bdst, ++ au_opt_udba(sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out_wh; ++ } ++ if (!err) { ++ h_src_dentry = au_h_dptr(src_dentry, a->bdst); ++ err = -ENOENT; ++ if (h_src_dentry && h_src_dentry->d_inode) ++ err = vfsub_link(h_src_dentry, ++ au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ } ++ } + if (unlikely(err)) + goto out_unpin; + -+ br = au_sbr(sb, bcpup); -+ if (dt) { -+ struct path tmp = { -+ .dentry = h_parent, -+ .mnt = br->br_mnt -+ }; -+ au_dtime_store(dt, au_pinned_parent(pin), &tmp); ++ if (wh_dentry) { ++ a->h_path.dentry = wh_dentry; ++ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, ++ dentry); ++ if (unlikely(err)) ++ goto out_revert; + } + -+ wh_dentry = NULL; -+ if (bcpup != au_dbwh(dentry)) -+ goto out; /* success */ -+ -+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); ++ dir->i_version++; ++ if (au_ibstart(dir) == au_dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ inc_nlink(inode); ++ inode->i_ctime = dir->i_ctime; ++ d_instantiate(dentry, au_igrab(inode)); ++ if (d_unhashed(a->h_path.dentry)) ++ /* some filesystem calls d_drop() */ ++ d_drop(dentry); ++ goto out_unpin; /* success */ + ++out_revert: ++ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0); ++ if (unlikely(rerr)) { ++ AuIOErr("%.*s reverting failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&dt); +out_unpin: -+ if (IS_ERR(wh_dentry)) -+ au_unpin(pin); ++ au_unpin(&a->pin); ++out_wh: ++ dput(wh_dentry); ++out_parent: ++ di_write_unlock(a->parent); ++ dput(a->src_parent); ++out_unlock: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ aufs_read_and_write_unlock2(dentry, src_dentry); ++out_kfree: ++ kfree(a); +out: -+ return wh_dentry; ++ AuTraceErr(err); ++ return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+enum { Mknod, Symlink, Creat }; -+struct simple_arg { -+ int type; -+ union { -+ struct { -+ umode_t mode; -+ bool want_excl; -+ } c; -+ struct { -+ const char *symname; -+ } s; -+ struct { -+ umode_t mode; -+ dev_t dev; -+ } m; -+ } u; -+}; -+ -+static int add_simple(struct inode *dir, struct dentry *dentry, -+ struct simple_arg *arg) ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ -+ int err; -+ aufs_bindex_t bstart; -+ unsigned char created; -+ struct au_dtime dt; -+ struct au_pin pin; ++ int err, rerr; ++ aufs_bindex_t bindex; ++ unsigned char diropq; + struct path h_path; -+ struct dentry *wh_dentry, *parent; -+ struct inode *h_dir; ++ struct dentry *wh_dentry, *parent, *opq_dentry; ++ struct mutex *h_mtx; ++ struct super_block *sb; ++ struct { ++ struct au_pin pin; ++ struct au_dtime dt; ++ } *a; /* reduce the stack usage */ + struct au_wr_dir_args wr_dir_args = { + .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY ++ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR + }; + -+ AuDbg("%.*s\n", AuDLNPair(dentry)); + IMustLock(dir); + -+ parent = dentry->d_parent; /* dir inode is locked */ ++ err = -ENOMEM; ++ a = kmalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; ++ + err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); + if (unlikely(err)) -+ goto out; ++ goto out_free; + err = au_d_may_add(dentry); + if (unlikely(err)) + goto out_unlock; ++ ++ parent = dentry->d_parent; /* dir inode is locked */ + di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, -+ &wr_dir_args); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, ++ &a->pin, &wr_dir_args); + err = PTR_ERR(wh_dentry); + if (IS_ERR(wh_dentry)) + goto out_parent; + -+ bstart = au_dbstart(dentry); -+ h_path.dentry = au_h_dptr(dentry, bstart); -+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ h_dir = au_pinned_h_dir(&pin); -+ switch (arg->type) { -+ case Creat: -+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode, -+ arg->u.c.want_excl); -+ break; -+ case Symlink: -+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname); -+ break; -+ case Mknod: -+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev); -+ break; -+ default: -+ BUG(); ++ sb = dentry->d_sb; ++ bindex = au_dbstart(dentry); ++ h_path.dentry = au_h_dptr(dentry, bindex); ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode); ++ if (unlikely(err)) ++ goto out_unpin; ++ ++ /* make the dir opaque */ ++ diropq = 0; ++ h_mtx = &h_path.dentry->d_inode->i_mutex; ++ if (wh_dentry ++ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ opq_dentry = au_diropq_create(dentry, bindex); ++ mutex_unlock(h_mtx); ++ err = PTR_ERR(opq_dentry); ++ if (IS_ERR(opq_dentry)) ++ goto out_dir; ++ dput(opq_dentry); ++ diropq = 1; ++ } ++ ++ err = epilog(dir, bindex, wh_dentry, dentry); ++ if (!err) { ++ inc_nlink(dir); ++ goto out_unpin; /* success */ + } -+ created = !err; -+ if (!err) -+ err = epilog(dir, bstart, wh_dentry, dentry); + + /* revert */ -+ if (unlikely(created && err && h_path.dentry->d_inode)) { -+ int rerr; -+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ if (diropq) { ++ AuLabel(revert opq); ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ rerr = au_diropq_remove(dentry, bindex); ++ mutex_unlock(h_mtx); + if (rerr) { -+ AuIOErr("%.*s revert failure(%d, %d)\n", ++ AuIOErr("%.*s reverting diropq failed(%d, %d)\n", + AuDLNPair(dentry), err, rerr); + err = -EIO; + } -+ au_dtime_revert(&dt); + } + -+ au_unpin(&pin); ++out_dir: ++ AuLabel(revert dir); ++ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path); ++ if (rerr) { ++ AuIOErr("%.*s reverting dir failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&a->dt); ++out_unpin: ++ au_unpin(&a->pin); + dput(wh_dentry); -+ +out_parent: + di_write_unlock(parent); +out_unlock: @@ -15028,1447 +15574,1550 @@ + d_drop(dentry); + } + aufs_read_unlock(dentry, AuLock_DW); ++out_free: ++ kfree(a); +out: + return err; +} +--- /dev/null ++++ linux-3.9/fs/aufs/i_op_del.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,477 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t dev) -+{ -+ struct simple_arg arg = { -+ .type = Mknod, -+ .u.m = { -+ .mode = mode, -+ .dev = dev -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} ++/* ++ * inode operations (del entry) ++ */ + -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) -+{ -+ struct simple_arg arg = { -+ .type = Symlink, -+ .u.s.symname = symname -+ }; -+ return add_simple(dir, dentry, &arg); -+} ++#include "aufs.h" + -+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, -+ bool want_excl) ++/* ++ * decide if a new whiteout for @dentry is necessary or not. ++ * when it is necessary, prepare the parent dir for the upper branch whose ++ * branch index is @bcpup for creation. the actual creation of the whiteout will ++ * be done by caller. ++ * return value: ++ * 0: wh is unnecessary ++ * plus: wh is necessary ++ * minus: error ++ */ ++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup) +{ -+ struct simple_arg arg = { -+ .type = Creat, -+ .u.c = { -+ .mode = mode, -+ .want_excl = want_excl -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_link_args { -+ aufs_bindex_t bdst, bsrc; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *src_parent, *parent; -+}; ++ int need_wh, err; ++ aufs_bindex_t bstart; ++ struct super_block *sb; + -+static int au_cpup_before_link(struct dentry *src_dentry, -+ struct au_link_args *a) -+{ -+ int err; -+ struct dentry *h_src_dentry; -+ struct mutex *h_mtx; -+ struct file *h_file; ++ sb = dentry->d_sb; ++ bstart = au_dbstart(dentry); ++ if (*bcpup < 0) { ++ *bcpup = bstart; ++ if (au_test_ro(sb, bstart, dentry->d_inode)) { ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ *bcpup = err; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else ++ AuDebugOn(bstart < *bcpup ++ || au_test_ro(sb, *bcpup, dentry->d_inode)); ++ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart); + -+ di_read_lock_parent(a->src_parent, AuLock_IR); -+ err = au_test_and_cpup_dirs(src_dentry, a->bdst); -+ if (unlikely(err)) -+ goto out; ++ if (*bcpup != bstart) { ++ err = au_cpup_dirs(dentry, *bcpup); ++ if (unlikely(err)) ++ goto out; ++ need_wh = 1; ++ } else { ++ struct au_dinfo *dinfo, *tmp; + -+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); -+ h_mtx = &h_src_dentry->d_inode->i_mutex; -+ err = au_pin(&a->pin, src_dentry, a->bdst, -+ au_opt_udba(src_dentry->d_sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ h_file = au_h_open_pre(src_dentry, a->bsrc); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else -+ err = au_sio_cpup_simple(src_dentry, a->bdst, -1, -+ AuCpup_DTIME /* | AuCpup_KEEPLINO */); -+ mutex_unlock(h_mtx); -+ au_h_open_post(src_dentry, a->bsrc, h_file); -+ au_unpin(&a->pin); ++ need_wh = -ENOMEM; ++ dinfo = au_di(dentry); ++ tmp = au_di_alloc(sb, AuLsc_DI_TMP); ++ if (tmp) { ++ au_di_cp(tmp, dinfo); ++ au_di_swap(tmp, dinfo); ++ /* returns the number of positive dentries */ ++ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0); ++ au_di_swap(tmp, dinfo); ++ au_rw_write_unlock(&tmp->di_rwsem); ++ au_di_free(tmp); ++ } ++ } ++ AuDbg("need_wh %d\n", need_wh); ++ err = need_wh; + +out: -+ di_read_unlock(a->src_parent, AuLock_IR); + return err; +} + -+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a) ++/* ++ * simple tests for the del-entry operations. ++ * following the checks in vfs, plus the parent-child relationship. ++ */ ++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir) +{ + int err; -+ unsigned char plink; -+ struct inode *h_inode, *inode; -+ struct dentry *h_src_dentry; -+ struct super_block *sb; -+ struct file *h_file; -+ -+ plink = 0; -+ h_inode = NULL; -+ sb = src_dentry->d_sb; -+ inode = src_dentry->d_inode; -+ if (au_ibstart(inode) <= a->bdst) -+ h_inode = au_h_iptr(inode, a->bdst); -+ if (!h_inode || !h_inode->i_nlink) { -+ /* copyup src_dentry as the name of dentry. */ -+ au_set_dbstart(src_dentry, a->bdst); -+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry)); -+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode; -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ h_file = au_h_open_pre(src_dentry, a->bsrc); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else -+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -+ -1, AuCpup_KEEPLINO, -+ a->parent); -+ mutex_unlock(&h_inode->i_mutex); -+ au_h_open_post(src_dentry, a->bsrc, h_file); -+ au_set_h_dptr(src_dentry, a->bdst, NULL); -+ au_set_dbstart(src_dentry, a->bsrc); -+ } else { -+ /* the inode of src_dentry already exists on a.bdst branch */ -+ h_src_dentry = d_find_alias(h_inode); -+ if (!h_src_dentry && au_plink_test(inode)) { -+ plink = 1; -+ h_src_dentry = au_plink_lkup(inode, a->bdst); -+ err = PTR_ERR(h_src_dentry); -+ if (IS_ERR(h_src_dentry)) -+ goto out; ++ umode_t h_mode; ++ struct dentry *h_dentry, *h_latest; ++ struct inode *h_inode; + -+ if (unlikely(!h_src_dentry->d_inode)) { -+ dput(h_src_dentry); -+ h_src_dentry = NULL; -+ } ++ h_dentry = au_h_dptr(dentry, bindex); ++ h_inode = h_dentry->d_inode; ++ if (dentry->d_inode) { ++ err = -ENOENT; ++ if (unlikely(!h_inode || !h_inode->i_nlink)) ++ goto out; + ++ h_mode = h_inode->i_mode; ++ if (!isdir) { ++ err = -EISDIR; ++ if (unlikely(S_ISDIR(h_mode))) ++ goto out; ++ } else if (unlikely(!S_ISDIR(h_mode))) { ++ err = -ENOTDIR; ++ goto out; + } -+ if (h_src_dentry) { -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ dput(h_src_dentry); -+ } else { -+ AuIOErr("no dentry found for hi%lu on b%d\n", -+ h_inode->i_ino, a->bdst); -+ err = -EIO; -+ } ++ } else { ++ /* rename(2) case */ ++ err = -EIO; ++ if (unlikely(h_inode)) ++ goto out; + } + -+ if (!err && !plink) -+ au_plink_append(inode, a->bdst, a->h_path.dentry); ++ err = -ENOENT; ++ /* expected parent dir is locked */ ++ if (unlikely(h_parent != h_dentry->d_parent)) ++ goto out; ++ err = 0; ++ ++ /* ++ * rmdir a dir may break the consistency on some filesystem. ++ * let's try heavy test. ++ */ ++ err = -EACCES; ++ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE))) ++ goto out; ++ ++ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent, ++ au_sbr(dentry->d_sb, bindex)); ++ err = -EIO; ++ if (IS_ERR(h_latest)) ++ goto out; ++ if (h_latest == h_dentry) ++ err = 0; ++ dput(h_latest); + +out: -+ AuTraceErr(err); + return err; +} + -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry) ++/* ++ * decide the branch where we operate for @dentry. the branch index will be set ++ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent ++ * dir for reverting. ++ * when a new whiteout is necessary, create it. ++ */ ++static struct dentry* ++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup, ++ struct au_dtime *dt, struct au_pin *pin) +{ -+ int err, rerr; -+ struct au_dtime dt; -+ struct au_link_args *a; -+ struct dentry *wh_dentry, *h_src_dentry; -+ struct inode *inode; ++ struct dentry *wh_dentry; + struct super_block *sb; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ IMustLock(dir); -+ inode = src_dentry->d_inode; -+ IMustLock(inode); ++ struct path h_path; ++ int err, need_wh; ++ unsigned int udba; ++ aufs_bindex_t bcpup; + -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) ++ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup); ++ wh_dentry = ERR_PTR(need_wh); ++ if (unlikely(need_wh < 0)) + goto out; + -+ a->parent = dentry->d_parent; /* dir inode is locked */ -+ err = aufs_read_and_write_lock2(dentry, src_dentry, -+ AuLock_NOPLM | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_kfree; -+ err = au_d_hashed_positive(src_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ err = au_d_may_add(dentry); ++ sb = dentry->d_sb; ++ udba = au_opt_udba(sb); ++ bcpup = *rbcpup; ++ err = au_pin(pin, dentry, bcpup, udba, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ wh_dentry = ERR_PTR(err); + if (unlikely(err)) -+ goto out_unlock; ++ goto out; + -+ a->src_parent = dget_parent(src_dentry); -+ wr_dir_args.force_btgt = au_ibstart(inode); ++ h_path.dentry = au_pinned_h_parent(pin); ++ if (udba != AuOpt_UDBA_NONE ++ && au_dbstart(dentry) == bcpup) { ++ err = au_may_del(dentry, bcpup, h_path.dentry, isdir); ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_unpin; ++ } + -+ di_write_lock_parent(a->parent); -+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); ++ h_path.mnt = au_sbr_mnt(sb, bcpup); ++ au_dtime_store(dt, au_pinned_parent(pin), &h_path); ++ wh_dentry = NULL; ++ if (!need_wh) ++ goto out; /* success, no need to create whiteout */ ++ ++ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry); + if (IS_ERR(wh_dentry)) -+ goto out_parent; ++ goto out_unpin; ++ ++ /* returns with the parent is locked and wh_dentry is dget-ed */ ++ goto out; /* success */ ++ ++out_unpin: ++ au_unpin(pin); ++out: ++ return wh_dentry; ++} ++ ++/* ++ * when removing a dir, rename it to a unique temporary whiteout-ed name first ++ * in order to be revertible and save time for removing many child whiteouts ++ * under the dir. ++ * returns 1 when there are too many child whiteout and caller should remove ++ * them asynchronously. returns 0 when the number of children is enough small to ++ * remove now or the branch fs is a remote fs. ++ * otherwise return an error. ++ */ ++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, ++ struct au_nhash *whlist, struct inode *dir) ++{ ++ int rmdir_later, err, dirwh; ++ struct dentry *h_dentry; ++ struct super_block *sb; + -+ err = 0; + sb = dentry->d_sb; -+ a->bdst = au_dbstart(dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); -+ a->bsrc = au_ibstart(inode); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ if (!h_src_dentry) { -+ a->bsrc = au_dbstart(src_dentry); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ AuDebugOn(!h_src_dentry); -+ } else if (IS_ERR(h_src_dentry)) -+ goto out_parent; ++ SiMustAnyLock(sb); ++ h_dentry = au_h_dptr(dentry, bindex); ++ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex)); ++ if (unlikely(err)) ++ goto out; + -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) -+ err = au_cpup_or_link(src_dentry, a); -+ else -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ dput(h_src_dentry); -+ } else { -+ /* -+ * copyup src_dentry to the branch we process, -+ * and then link(2) to it. -+ */ -+ dput(h_src_dentry); -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { -+ au_unpin(&a->pin); -+ di_write_unlock(a->parent); -+ err = au_cpup_before_link(src_dentry, a); -+ di_write_lock_parent(a->parent); -+ if (!err) -+ err = au_pin(&a->pin, dentry, a->bdst, -+ au_opt_udba(sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_wh; -+ } -+ if (!err) { -+ h_src_dentry = au_h_dptr(src_dentry, a->bdst); -+ err = -ENOENT; -+ if (h_src_dentry && h_src_dentry->d_inode) -+ err = vfsub_link(h_src_dentry, -+ au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ } ++ /* stop monitoring */ ++ au_hn_free(au_hi(dentry->d_inode, bindex)); ++ ++ if (!au_test_fs_remote(h_dentry->d_sb)) { ++ dirwh = au_sbi(sb)->si_dirwh; ++ rmdir_later = (dirwh <= 1); ++ if (!rmdir_later) ++ rmdir_later = au_nhash_test_longer_wh(whlist, bindex, ++ dirwh); ++ if (rmdir_later) ++ return rmdir_later; + } -+ if (unlikely(err)) -+ goto out_unpin; + -+ if (wh_dentry) { -+ a->h_path.dentry = wh_dentry; -+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out_revert; ++ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist); ++ if (unlikely(err)) { ++ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n", ++ AuDLNPair(h_dentry), bindex, err); ++ err = 0; + } + -+ dir->i_version++; -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ inc_nlink(inode); ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++/* ++ * final procedure for deleting a entry. ++ * maintain dentry and iattr. ++ */ ++static void epilog(struct inode *dir, struct dentry *dentry, ++ aufs_bindex_t bindex) ++{ ++ struct inode *inode; ++ ++ inode = dentry->d_inode; ++ d_drop(dentry); + inode->i_ctime = dir->i_ctime; -+ d_instantiate(dentry, au_igrab(inode)); -+ if (d_unhashed(a->h_path.dentry)) -+ /* some filesystem calls d_drop() */ -+ d_drop(dentry); -+ goto out_unpin; /* success */ + -+out_revert: -+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0); -+ if (unlikely(rerr)) { -+ AuIOErr("%.*s reverting failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&dt); -+out_unpin: -+ au_unpin(&a->pin); -+out_wh: -+ dput(wh_dentry); -+out_parent: -+ di_write_unlock(a->parent); -+ dput(a->src_parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); ++ if (au_ibstart(dir) == bindex) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; ++} ++ ++/* ++ * when an error happened, remove the created whiteout and revert everything. ++ */ ++static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex, ++ aufs_bindex_t bwh, struct dentry *wh_dentry, ++ struct dentry *dentry, struct au_dtime *dt) ++{ ++ int rerr; ++ struct path h_path = { ++ .dentry = wh_dentry, ++ .mnt = au_sbr_mnt(dir->i_sb, bindex) ++ }; ++ ++ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry); ++ if (!rerr) { ++ au_set_dbwh(dentry, bwh); ++ au_dtime_revert(dt); ++ return 0; + } -+ aufs_read_and_write_unlock2(dentry, src_dentry); -+out_kfree: -+ kfree(a); -+out: -+ return err; ++ ++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ return -EIO; +} + -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ++/* ---------------------------------------------------------------------- */ ++ ++int aufs_unlink(struct inode *dir, struct dentry *dentry) +{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ unsigned char diropq; ++ int err; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct au_dtime dt; ++ struct au_pin pin; + struct path h_path; -+ struct dentry *wh_dentry, *parent, *opq_dentry; -+ struct mutex *h_mtx; -+ struct super_block *sb; -+ struct { -+ struct au_pin pin; -+ struct au_dtime dt; -+ } *a; /* reduce the stack usage */ -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR -+ }; ++ struct inode *inode, *h_dir; ++ struct dentry *parent, *wh_dentry; + + IMustLock(dir); + -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ + err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); + if (unlikely(err)) -+ goto out_free; -+ err = au_d_may_add(dentry); ++ goto out; ++ err = au_d_hashed_positive(dentry); + if (unlikely(err)) + goto out_unlock; ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ err = -EISDIR; ++ if (unlikely(S_ISDIR(inode->i_mode))) ++ goto out_unlock; /* possible? */ + ++ bstart = au_dbstart(dentry); ++ bwh = au_dbwh(dentry); ++ bindex = -1; + parent = dentry->d_parent; /* dir inode is locked */ + di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, -+ &a->pin, &wr_dir_args); ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin); + err = PTR_ERR(wh_dentry); + if (IS_ERR(wh_dentry)) + goto out_parent; + -+ sb = dentry->d_sb; -+ bindex = au_dbstart(dentry); -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode); -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ /* make the dir opaque */ -+ diropq = 0; -+ h_mtx = &h_path.dentry->d_inode->i_mutex; -+ if (wh_dentry -+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ opq_dentry = au_diropq_create(dentry, bindex); -+ mutex_unlock(h_mtx); -+ err = PTR_ERR(opq_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out_dir; -+ dput(opq_dentry); -+ diropq = 1; ++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); ++ h_path.dentry = au_h_dptr(dentry, bstart); ++ dget(h_path.dentry); ++ if (bindex == bstart) { ++ h_dir = au_pinned_h_dir(&pin); ++ err = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ } else { ++ /* dir inode is locked */ ++ h_dir = wh_dentry->d_parent->d_inode; ++ IMustLock(h_dir); ++ err = 0; + } + -+ err = epilog(dir, bindex, wh_dentry, dentry); + if (!err) { -+ inc_nlink(dir); ++ vfsub_drop_nlink(inode); ++ epilog(dir, dentry, bindex); ++ ++ /* update target timestamps */ ++ if (bindex == bstart) { ++ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/ ++ inode->i_ctime = h_path.dentry->d_inode->i_ctime; ++ } else ++ /* todo: this timestamp may be reverted later */ ++ inode->i_ctime = h_dir->i_ctime; + goto out_unpin; /* success */ + } + + /* revert */ -+ if (diropq) { -+ AuLabel(revert opq); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(dentry, bindex); -+ mutex_unlock(h_mtx); -+ if (rerr) { -+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } -+ } ++ if (wh_dentry) { ++ int rerr; + -+out_dir: -+ AuLabel(revert dir); -+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path); -+ if (rerr) { -+ AuIOErr("%.*s reverting dir failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; ++ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt); ++ if (rerr) ++ err = rerr; + } -+ au_dtime_revert(&a->dt); ++ +out_unpin: -+ au_unpin(&a->pin); ++ au_unpin(&pin); + dput(wh_dentry); ++ dput(h_path.dentry); +out_parent: + di_write_unlock(parent); +out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } + aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ kfree(a); +out: + return err; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/i_op.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,1030 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * inode operations (except add/del/rename) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "aufs.h" + -+static int h_permission(struct inode *h_inode, int mask, -+ struct vfsmount *h_mnt, int brperm) ++int aufs_rmdir(struct inode *dir, struct dentry *dentry) +{ -+ int err; -+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); -+ -+ err = -EACCES; -+ if ((write_mask && IS_IMMUTABLE(h_inode)) -+ || ((mask & MAY_EXEC) -+ && S_ISREG(h_inode->i_mode) -+ && ((h_mnt->mnt_flags & MNT_NOEXEC) -+ || !(h_inode->i_mode & S_IXUGO)))) -+ goto out; -+ -+ /* -+ * - skip the lower fs test in the case of write to ro branch. -+ * - nfs dir permission write check is optimized, but a policy for -+ * link/rename requires a real check. -+ */ -+ if ((write_mask && !au_br_writable(brperm)) -+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode) -+ && write_mask && !(mask & MAY_READ)) -+ || !h_inode->i_op->permission) { -+ /* AuLabel(generic_permission); */ -+ err = generic_permission(h_inode, mask); -+ } else { -+ /* AuLabel(h_inode->permission); */ -+ err = h_inode->i_op->permission(h_inode, mask); -+ AuTraceErr(err); -+ } -+ -+ if (!err) -+ err = devcgroup_inode_permission(h_inode, mask); -+ if (!err) -+ err = security_inode_permission(h_inode, mask); -+ -+#if 0 -+ if (!err) { -+ /* todo: do we need to call ima_path_check()? */ -+ struct path h_path = { -+ .dentry = -+ .mnt = h_mnt -+ }; -+ err = ima_path_check(&h_path, -+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC), -+ IMA_COUNT_LEAVE); -+ } -+#endif ++ int err, rmdir_later; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct inode *inode; ++ struct dentry *parent, *wh_dentry, *h_dentry; ++ struct au_whtmp_rmdir *args; + -+out: -+ return err; -+} ++ IMustLock(dir); + -+static int aufs_permission(struct inode *inode, int mask) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ const unsigned char isdir = !!S_ISDIR(inode->i_mode), -+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; ++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN); ++ if (unlikely(err)) ++ goto out; ++ err = au_alive_dir(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ err = -ENOTDIR; ++ if (unlikely(!S_ISDIR(inode->i_mode))) ++ goto out_unlock; /* possible? */ + -+ /* todo: support rcu-walk? */ -+ if (mask & MAY_NOT_BLOCK) -+ return -ECHILD; ++ err = -ENOMEM; ++ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS); ++ if (unlikely(!args)) ++ goto out_unlock; + -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ ii_read_lock_child(inode); -+#if 0 -+ err = au_iigen_test(inode, au_sigen(sb)); ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_write_lock_parent(parent); ++ err = au_test_empty(dentry, &args->whlist); + if (unlikely(err)) -+ goto out; -+#endif ++ goto out_parent; + -+ if (!isdir || write_mask) { -+ err = au_busy_or_stale(); -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ if (unlikely(!h_inode -+ || (h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT))) -+ goto out; ++ bstart = au_dbstart(dentry); ++ bwh = au_dbwh(dentry); ++ bindex = -1; ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; + -+ err = 0; -+ bindex = au_ibstart(inode); -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm); -+ if (write_mask -+ && !err -+ && !special_file(h_inode->i_mode)) { -+ /* test whether the upper writable branch exists */ -+ err = -EROFS; -+ for (; bindex >= 0; bindex--) -+ if (!au_br_rdonly(au_sbr(sb, bindex))) { -+ err = 0; -+ break; -+ } ++ h_dentry = au_h_dptr(dentry, bstart); ++ dget(h_dentry); ++ rmdir_later = 0; ++ if (bindex == bstart) { ++ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir); ++ if (err > 0) { ++ rmdir_later = err; ++ err = 0; + } -+ goto out; ++ } else { ++ /* stop monitoring */ ++ au_hn_free(au_hi(inode, bstart)); ++ ++ /* dir inode is locked */ ++ IMustLock(wh_dentry->d_parent->d_inode); ++ err = 0; + } + -+ /* non-write to dir */ -+ err = 0; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode) { -+ err = au_busy_or_stale(); -+ if (unlikely(!S_ISDIR(h_inode->i_mode))) -+ break; ++ if (!err) { ++ vfsub_dead_dir(inode); ++ au_set_dbdiropq(dentry, -1); ++ epilog(dir, dentry, bindex); + -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, br->br_mnt, -+ br->br_perm); ++ if (rmdir_later) { ++ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args); ++ args = NULL; + } ++ ++ goto out_unpin; /* success */ ++ } ++ ++ /* revert */ ++ AuLabel(revert); ++ if (wh_dentry) { ++ int rerr; ++ ++ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt); ++ if (rerr) ++ err = rerr; + } + ++out_unpin: ++ au_unpin(&pin); ++ dput(wh_dentry); ++ dput(h_dentry); ++out_parent: ++ di_write_unlock(parent); ++ if (args) ++ au_whtmp_rmdir_free(args); ++out_unlock: ++ aufs_read_unlock(dentry, AuLock_DW); +out: -+ ii_read_unlock(inode); -+ si_read_unlock(sb); ++ AuTraceErr(err); + return err; +} +--- /dev/null ++++ linux-3.9/fs/aufs/i_op_ren.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,1053 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+/* ---------------------------------------------------------------------- */ ++/* ++ * inode operation (rename entry) ++ * todo: this is crazy monster ++ */ + -+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, -+ unsigned int flags) -+{ -+ struct dentry *ret, *parent; -+ struct inode *inode; -+ struct super_block *sb; -+ int err, npositive; ++#include "aufs.h" + -+ IMustLock(dir); ++enum { AuSRC, AuDST, AuSrcDst }; ++enum { AuPARENT, AuCHILD, AuParentChild }; + -+ /* todo: support rcu-walk? */ -+ ret = ERR_PTR(-ECHILD); -+ if (flags & LOOKUP_RCU) -+ goto out; ++#define AuRen_ISDIR 1 ++#define AuRen_ISSAMEDIR (1 << 1) ++#define AuRen_WHSRC (1 << 2) ++#define AuRen_WHDST (1 << 3) ++#define AuRen_MNT_WRITE (1 << 4) ++#define AuRen_DT_DSTDIR (1 << 5) ++#define AuRen_DIROPQ (1 << 6) ++#define AuRen_CPUP (1 << 7) ++#define au_ftest_ren(flags, name) ((flags) & AuRen_##name) ++#define au_fset_ren(flags, name) \ ++ do { (flags) |= AuRen_##name; } while (0) ++#define au_fclr_ren(flags, name) \ ++ do { (flags) &= ~AuRen_##name; } while (0) + -+ ret = ERR_PTR(-ENAMETOOLONG); -+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ goto out; ++struct au_ren_args { ++ struct { ++ struct dentry *dentry, *h_dentry, *parent, *h_parent, ++ *wh_dentry; ++ struct inode *dir, *inode; ++ struct au_hinode *hdir; ++ struct au_dtime dt[AuParentChild]; ++ aufs_bindex_t bstart; ++ } sd[AuSrcDst]; + -+ sb = dir->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ ret = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; ++#define src_dentry sd[AuSRC].dentry ++#define src_dir sd[AuSRC].dir ++#define src_inode sd[AuSRC].inode ++#define src_h_dentry sd[AuSRC].h_dentry ++#define src_parent sd[AuSRC].parent ++#define src_h_parent sd[AuSRC].h_parent ++#define src_wh_dentry sd[AuSRC].wh_dentry ++#define src_hdir sd[AuSRC].hdir ++#define src_h_dir sd[AuSRC].hdir->hi_inode ++#define src_dt sd[AuSRC].dt ++#define src_bstart sd[AuSRC].bstart + -+ err = au_di_init(dentry); -+ ret = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_si; ++#define dst_dentry sd[AuDST].dentry ++#define dst_dir sd[AuDST].dir ++#define dst_inode sd[AuDST].inode ++#define dst_h_dentry sd[AuDST].h_dentry ++#define dst_parent sd[AuDST].parent ++#define dst_h_parent sd[AuDST].h_parent ++#define dst_wh_dentry sd[AuDST].wh_dentry ++#define dst_hdir sd[AuDST].hdir ++#define dst_h_dir sd[AuDST].hdir->hi_inode ++#define dst_dt sd[AuDST].dt ++#define dst_bstart sd[AuDST].bstart + -+ inode = NULL; -+ npositive = 0; /* suppress a warning */ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_alive_dir(parent); -+ if (!err) -+ err = au_digen_test(parent, au_sigen(sb)); -+ if (!err) { -+ npositive = au_lkup_dentry(dentry, au_dbstart(parent), -+ /*type*/0); -+ err = npositive; -+ } -+ di_read_unlock(parent, AuLock_IR); -+ ret = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out_unlock; ++ struct dentry *h_trap; ++ struct au_branch *br; ++ struct au_hinode *src_hinode; ++ struct path h_path; ++ struct au_nhash whlist; ++ aufs_bindex_t btgt, src_bwh, src_bdiropq; + -+ if (npositive) { -+ inode = au_new_inode(dentry, /*must_new*/0); -+ ret = (void *)inode; -+ } -+ if (IS_ERR(inode)) { -+ inode = NULL; -+ goto out_unlock; -+ } ++ unsigned int flags; + -+ ret = d_splice_alias(inode, dentry); -+#if 0 -+ if (unlikely(d_need_lookup(dentry))) { -+ spin_lock(&dentry->d_lock); -+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP; -+ spin_unlock(&dentry->d_lock); -+ } else -+#endif -+ if (unlikely(IS_ERR(ret) && inode)) { -+ ii_write_unlock(inode); -+ iput(inode); -+ inode = NULL; -+ } ++ struct au_whtmp_rmdir *thargs; ++ struct dentry *h_dst; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * functions for reverting. ++ * when an error happened in a single rename systemcall, we should revert ++ * everything as if nothing happend. ++ * we don't need to revert the copied-up/down the parent dir since they are ++ * harmless. ++ */ ++ ++#define RevertFailure(fmt, ...) do { \ ++ AuIOErr("revert failure: " fmt " (%d, %d)\n", \ ++ ##__VA_ARGS__, err, rerr); \ ++ err = -EIO; \ ++} while (0) ++ ++static void au_ren_rev_diropq(int err, struct au_ren_args *a) ++{ ++ int rerr; ++ ++ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); ++ rerr = au_diropq_remove(a->src_dentry, a->btgt); ++ au_hn_imtx_unlock(a->src_hinode); ++ au_set_dbdiropq(a->src_dentry, a->src_bdiropq); ++ if (rerr) ++ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry)); ++} + -+out_unlock: -+ di_write_unlock(dentry); -+ if (inode) { -+ /* verbose coding for lock class name */ -+ if (unlikely(S_ISLNK(inode->i_mode))) -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcSymlink_DIINFO); -+ else if (unlikely(S_ISDIR(inode->i_mode))) -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcDir_DIINFO); -+ else /* likely */ -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcNonDir_DIINFO); ++static void au_ren_rev_rename(int err, struct au_ren_args *a) ++{ ++ int rerr; ++ ++ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name, ++ a->src_h_parent); ++ rerr = PTR_ERR(a->h_path.dentry); ++ if (IS_ERR(a->h_path.dentry)) { ++ RevertFailure("lkup one %.*s", AuDLNPair(a->src_dentry)); ++ return; + } -+out_si: -+ si_read_unlock(sb); -+out: -+ return ret; -+} + -+/* ---------------------------------------------------------------------- */ ++ rerr = vfsub_rename(a->dst_h_dir, ++ au_h_dptr(a->src_dentry, a->btgt), ++ a->src_h_dir, &a->h_path); ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */ ++ if (rerr) ++ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry)); ++} + -+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent, -+ const unsigned char add_entry, aufs_bindex_t bcpup, -+ aufs_bindex_t bstart) ++static void au_ren_rev_cpup(int err, struct au_ren_args *a) +{ -+ int err; -+ struct dentry *h_parent; -+ struct inode *h_dir; ++ int rerr; + -+ if (add_entry) -+ IMustLock(parent->d_inode); -+ else -+ di_write_lock_parent(parent); ++ a->h_path.dentry = a->dst_h_dentry; ++ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0); ++ au_set_h_dptr(a->src_dentry, a->btgt, NULL); ++ au_set_dbstart(a->src_dentry, a->src_bstart); ++ if (rerr) ++ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry)); ++} + -+ err = 0; -+ if (!au_h_dptr(parent, bcpup)) { -+ if (bstart < bcpup) -+ err = au_cpdown_dirs(dentry, bcpup); -+ else -+ err = au_cpup_dirs(dentry, bcpup); -+ } -+ if (!err && add_entry) { -+ h_parent = au_h_dptr(parent, bcpup); -+ h_dir = h_parent->d_inode; -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ err = au_lkup_neg(dentry, bcpup); -+ /* todo: no unlock here */ -+ mutex_unlock(&h_dir->i_mutex); ++static void au_ren_rev_whtmp(int err, struct au_ren_args *a) ++{ ++ int rerr; + -+ AuDbg("bcpup %d\n", bcpup); -+ if (!err) { -+ if (!dentry->d_inode) -+ au_set_h_dptr(dentry, bstart, NULL); -+ au_update_dbrange(dentry, /*do_put_zero*/0); -+ } ++ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name, ++ a->dst_h_parent); ++ rerr = PTR_ERR(a->h_path.dentry); ++ if (IS_ERR(a->h_path.dentry)) { ++ RevertFailure("lkup one %.*s", AuDLNPair(a->dst_dentry)); ++ return; ++ } ++ if (a->h_path.dentry->d_inode) { ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ return; + } + -+ if (!add_entry) -+ di_write_unlock(parent); -+ if (!err) -+ err = bcpup; /* success */ ++ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path); ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ if (!rerr) ++ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst)); ++ else ++ RevertFailure("rename %.*s", AuDLNPair(a->h_dst)); ++} + -+ AuTraceErr(err); -+ return err; ++static void au_ren_rev_whsrc(int err, struct au_ren_args *a) ++{ ++ int rerr; ++ ++ a->h_path.dentry = a->src_wh_dentry; ++ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry); ++ au_set_dbwh(a->src_dentry, a->src_bwh); ++ if (rerr) ++ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry)); +} ++#undef RevertFailure ++ ++/* ---------------------------------------------------------------------- */ + +/* -+ * decide the branch and the parent dir where we will create a new entry. -+ * returns new bindex or an error. -+ * copyup the parent dir if needed. ++ * when we have to copyup the renaming entry, do it with the rename-target name ++ * in order to minimize the cost (the later actual rename is unnecessary). ++ * otherwise rename it on the target branch. + */ -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args) ++static int au_ren_or_cpup(struct au_ren_args *a) +{ + int err; -+ aufs_bindex_t bcpup, bstart, src_bstart; -+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags, -+ ADD_ENTRY); -+ struct super_block *sb; -+ struct dentry *parent; -+ struct au_sbinfo *sbinfo; -+ -+ sb = dentry->d_sb; -+ sbinfo = au_sbi(sb); -+ parent = dget_parent(dentry); -+ bstart = au_dbstart(dentry); -+ bcpup = bstart; -+ if (args->force_btgt < 0) { -+ if (src_dentry) { -+ src_bstart = au_dbstart(src_dentry); -+ if (src_bstart < bstart) -+ bcpup = src_bstart; -+ } else if (add_entry) { -+ err = AuWbrCreate(sbinfo, dentry, -+ au_ftest_wrdir(args->flags, ISDIR)); -+ bcpup = err; -+ } ++ struct dentry *d; + -+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) { -+ if (add_entry) -+ err = AuWbrCopyup(sbinfo, dentry); -+ else { -+ if (!IS_ROOT(dentry)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(sbinfo, dentry); -+ di_read_unlock(parent, !AuLock_IR); -+ } else -+ err = AuWbrCopyup(sbinfo, dentry); -+ } -+ bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; -+ } ++ d = a->src_dentry; ++ if (au_dbstart(d) == a->btgt) { ++ a->h_path.dentry = a->dst_h_dentry; ++ if (au_ftest_ren(a->flags, DIROPQ) ++ && au_dbdiropq(d) == a->btgt) ++ au_fclr_ren(a->flags, DIROPQ); ++ AuDebugOn(au_dbstart(d) != a->btgt); ++ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt), ++ a->dst_h_dir, &a->h_path); + } else { -+ bcpup = args->force_btgt; -+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode)); -+ } -+ -+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup); -+ err = bcpup; -+ if (bcpup == bstart) -+ goto out; /* success */ ++#if 1 ++ BUG(); ++#else ++ struct file *h_file; + -+ /* copyup the new parent into the branch we process */ -+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart); -+ if (err >= 0) { -+ if (!dentry->d_inode) { -+ au_set_h_dptr(dentry, bstart, NULL); -+ au_set_dbstart(dentry, bcpup); -+ au_set_dbend(dentry, bcpup); ++ au_fset_ren(a->flags, CPUP); ++ au_set_dbstart(d, a->btgt); ++ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry)); ++ h_file = au_h_open_pre(d, a->src_bstart); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1, ++ !AuCpup_DTIME, a->dst_parent); ++ au_h_open_post(d, a->src_bstart, h_file); + } -+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup)); ++ if (!err) { ++ d = a->dst_dentry; ++ au_set_h_dptr(d, a->btgt, NULL); ++ au_update_dbstart(d); ++ } else { ++ au_set_h_dptr(d, a->btgt, NULL); ++ au_set_dbstart(d, a->src_bstart); ++ } ++#endif + } ++ if (!err && a->h_dst) ++ /* it will be set to dinfo later */ ++ dget(a->h_dst); + -+out: -+ dput(parent); + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+struct dentry *au_pinned_h_parent(struct au_pin *pin) -+{ -+ if (pin && pin->parent) -+ return au_h_dptr(pin->parent, pin->bindex); -+ return NULL; -+} -+ -+void au_unpin(struct au_pin *p) ++/* cf. aufs_rmdir() */ ++static int au_ren_del_whtmp(struct au_ren_args *a) +{ -+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE)) -+ vfsub_mnt_drop_write(p->h_mnt); -+ if (!p->hdir) -+ return; ++ int err; ++ struct inode *dir; + -+ au_hn_imtx_unlock(p->hdir); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ iput(p->hdir->hi_inode); -+ dput(p->parent); -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; ++ dir = a->dst_dir; ++ SiMustAnyLock(dir->i_sb); ++ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt, ++ au_sbi(dir->i_sb)->si_dirwh) ++ || au_test_fs_remote(a->h_dst->d_sb)) { ++ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist); ++ if (unlikely(err)) ++ pr_warn("failed removing whtmp dir %.*s (%d), " ++ "ignored.\n", AuDLNPair(a->h_dst), err); ++ } else { ++ au_nhash_wh_free(&a->thargs->whlist); ++ a->thargs->whlist = a->whlist; ++ a->whlist.nh_num = 0; ++ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs); ++ dput(a->h_dst); ++ a->thargs = NULL; ++ } ++ ++ return 0; +} + -+int au_do_pin(struct au_pin *p) ++/* make it 'opaque' dir. */ ++static int au_ren_diropq(struct au_ren_args *a) +{ + int err; -+ struct super_block *sb; -+ struct dentry *h_dentry, *h_parent; -+ struct au_branch *br; -+ struct inode *h_dir; ++ struct dentry *diropq; + + err = 0; -+ sb = p->dentry->d_sb; -+ br = au_sbr(sb, p->bindex); -+ if (IS_ROOT(p->dentry)) { -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = br->br_mnt; -+ err = vfsub_mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_err; -+ } -+ } -+ goto out; -+ } -+ -+ h_dentry = NULL; -+ if (p->bindex <= au_dbend(p->dentry)) -+ h_dentry = au_h_dptr(p->dentry, p->bindex); ++ a->src_bdiropq = au_dbdiropq(a->src_dentry); ++ a->src_hinode = au_hi(a->src_inode, a->btgt); ++ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); ++ diropq = au_diropq_create(a->src_dentry, a->btgt); ++ au_hn_imtx_unlock(a->src_hinode); ++ if (IS_ERR(diropq)) ++ err = PTR_ERR(diropq); ++ dput(diropq); + -+ p->parent = dget_parent(p->dentry); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_lock(p->parent, AuLock_IR, p->lsc_di); ++ return err; ++} + -+ h_dir = NULL; -+ h_parent = au_h_dptr(p->parent, p->bindex); -+ p->hdir = au_hi(p->parent->d_inode, p->bindex); -+ if (p->hdir) -+ h_dir = p->hdir->hi_inode; ++static int do_rename(struct au_ren_args *a) ++{ ++ int err; ++ struct dentry *d, *h_d; + -+ /* -+ * udba case, or -+ * if DI_LOCKED is not set, then p->parent may be different -+ * and h_parent can be NULL. -+ */ -+ if (unlikely(!p->hdir || !h_dir || !h_parent)) { -+ err = -EBUSY; -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ dput(p->parent); -+ p->parent = NULL; -+ goto out_err; ++ /* prepare workqueue args for asynchronous rmdir */ ++ h_d = a->dst_h_dentry; ++ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) { ++ err = -ENOMEM; ++ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS); ++ if (unlikely(!a->thargs)) ++ goto out; ++ a->h_dst = dget(h_d); + } + -+ au_igrab(h_dir); -+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi); ++ /* create whiteout for src_dentry */ ++ if (au_ftest_ren(a->flags, WHSRC)) { ++ a->src_bwh = au_dbwh(a->src_dentry); ++ AuDebugOn(a->src_bwh >= 0); ++ a->src_wh_dentry ++ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent); ++ err = PTR_ERR(a->src_wh_dentry); ++ if (IS_ERR(a->src_wh_dentry)) ++ goto out_thargs; ++ } + -+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) { -+ err = -EBUSY; -+ goto out_unpin; ++ /* lookup whiteout for dentry */ ++ if (au_ftest_ren(a->flags, WHDST)) { ++ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name, ++ a->br); ++ err = PTR_ERR(h_d); ++ if (IS_ERR(h_d)) ++ goto out_whsrc; ++ if (!h_d->d_inode) ++ dput(h_d); ++ else ++ a->dst_wh_dentry = h_d; + } -+ if (h_dentry) { -+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_unpin; -+ } ++ ++ /* rename dentry to tmpwh */ ++ if (a->thargs) { ++ err = au_whtmp_ren(a->dst_h_dentry, a->br); ++ if (unlikely(err)) ++ goto out_whdst; ++ ++ d = a->dst_dentry; ++ au_set_h_dptr(d, a->btgt, NULL); ++ err = au_lkup_neg(d, a->btgt, /*wh*/0); ++ if (unlikely(err)) ++ goto out_whtmp; ++ a->dst_h_dentry = au_h_dptr(d, a->btgt); + } + -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = br->br_mnt; -+ err = vfsub_mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_unpin; ++ /* cpup src */ ++ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) { ++#if 1 ++ BUG(); ++#else ++ struct file *h_file; ++ ++ h_file = au_h_open_pre(a->src_dentry, a->src_bstart); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1, ++ !AuCpup_DTIME); ++ au_h_open_post(a->src_dentry, a->src_bstart, h_file); + } ++ if (unlikely(err)) ++ goto out_whtmp; ++#endif + } -+ goto out; /* success */ -+ -+out_unpin: -+ au_unpin(p); -+out_err: -+ pr_err("err %d\n", err); -+ err = au_busy_or_stale(); -+out: -+ return err; -+} + -+void au_pin_init(struct au_pin *p, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags) -+{ -+ p->dentry = dentry; -+ p->udba = udba; -+ p->lsc_di = lsc_di; -+ p->lsc_hi = lsc_hi; -+ p->flags = flags; -+ p->bindex = bindex; ++ /* rename by vfs_rename or cpup */ ++ d = a->dst_dentry; ++ if (au_ftest_ren(a->flags, ISDIR) ++ && (a->dst_wh_dentry ++ || au_dbdiropq(d) == a->btgt ++ /* hide the lower to keep xino */ ++ || a->btgt < au_dbend(d) ++ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ))) ++ au_fset_ren(a->flags, DIROPQ); ++ err = au_ren_or_cpup(a); ++ if (unlikely(err)) ++ /* leave the copied-up one */ ++ goto out_whtmp; + -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; -+} ++ /* make dir opaque */ ++ if (au_ftest_ren(a->flags, DIROPQ)) { ++ err = au_ren_diropq(a); ++ if (unlikely(err)) ++ goto out_rename; ++ } + -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) -+{ -+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2, -+ udba, flags); -+ return au_do_pin(pin); -+} ++ /* update target timestamps */ ++ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt); ++ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt); ++ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/ ++ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; + -+/* ---------------------------------------------------------------------- */ ++ /* remove whiteout for dentry */ ++ if (a->dst_wh_dentry) { ++ a->h_path.dentry = a->dst_wh_dentry; ++ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path, ++ a->dst_dentry); ++ if (unlikely(err)) ++ goto out_diropq; ++ } + -+/* -+ * ->setattr() and ->getattr() are called in various cases. -+ * chmod, stat: dentry is revalidated. -+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be -+ * unhashed. -+ * for ->setattr(), ia->ia_file is passed from ftruncate only. -+ */ -+/* todo: consolidate with do_refresh() and simple_reval_dpath() */ -+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; -+ struct inode *inode; -+ struct dentry *parent; ++ /* remove whtmp */ ++ if (a->thargs) ++ au_ren_del_whtmp(a); /* ignore this error */ + + err = 0; -+ inode = dentry->d_inode; -+ if (au_digen_test(dentry, sigen)) { -+ parent = dget_parent(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_refresh_dentry(dentry, parent); -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+ } -+ -+ AuTraceErr(err); -+ return err; -+} ++ goto out_success; + -+#define AuIcpup_DID_CPUP 1 -+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) -+#define au_fset_icpup(flags, name) \ -+ do { (flags) |= AuIcpup_##name; } while (0) -+#define au_fclr_icpup(flags, name) \ -+ do { (flags) &= ~AuIcpup_##name; } while (0) ++out_diropq: ++ if (au_ftest_ren(a->flags, DIROPQ)) ++ au_ren_rev_diropq(err, a); ++out_rename: ++ if (!au_ftest_ren(a->flags, CPUP)) ++ au_ren_rev_rename(err, a); ++ else ++ au_ren_rev_cpup(err, a); ++ dput(a->h_dst); ++out_whtmp: ++ if (a->thargs) ++ au_ren_rev_whtmp(err, a); ++out_whdst: ++ dput(a->dst_wh_dentry); ++ a->dst_wh_dentry = NULL; ++out_whsrc: ++ if (a->src_wh_dentry) ++ au_ren_rev_whsrc(err, a); ++out_success: ++ dput(a->src_wh_dentry); ++ dput(a->dst_wh_dentry); ++out_thargs: ++ if (a->thargs) { ++ dput(a->h_dst); ++ au_whtmp_rmdir_free(a->thargs); ++ a->thargs = NULL; ++ } ++out: ++ return err; ++} + -+struct au_icpup_args { -+ unsigned char flags; -+ unsigned char pin_flags; -+ aufs_bindex_t btgt; -+ unsigned int udba; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *h_inode; -+}; ++/* ---------------------------------------------------------------------- */ + -+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia, -+ struct au_icpup_args *a) ++/* ++ * test if @dentry dir can be rename destination or not. ++ * success means, it is a logically empty dir. ++ */ ++static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist) ++{ ++ return au_test_empty(dentry, whlist); ++} ++ ++/* ++ * test if @dentry dir can be rename source or not. ++ * if it can, return 0 and @children is filled. ++ * success means, ++ * - it is a logically empty dir. ++ * - or, it exists on writable branch and has no children including whiteouts ++ * on the lower branch. ++ */ ++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) +{ + int err; -+ loff_t sz; -+ aufs_bindex_t bstart, ibstart; -+ struct dentry *hi_wh, *parent; -+ struct inode *inode; -+ struct file *h_file; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = 0 -+ }; ++ unsigned int rdhash; ++ aufs_bindex_t bstart; + + bstart = au_dbstart(dentry); -+ inode = dentry->d_inode; -+ if (S_ISDIR(inode->i_mode)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ /* plink or hi_wh() case */ -+ ibstart = au_ibstart(inode); -+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode)) -+ wr_dir_args.force_btgt = ibstart; -+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); -+ if (unlikely(err < 0)) -+ goto out; -+ a->btgt = err; -+ if (err != bstart) -+ au_fset_icpup(a->flags, DID_CPUP); ++ if (bstart != btgt) { ++ struct au_nhash whlist; + -+ err = 0; -+ a->pin_flags = AuPin_MNT_WRITE; -+ parent = NULL; -+ if (!IS_ROOT(dentry)) { -+ au_fset_pin(a->pin_flags, DI_LOCKED); -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); ++ SiMustAnyLock(dentry->d_sb); ++ rdhash = au_sbi(dentry->d_sb)->si_rdhash; ++ if (!rdhash) ++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, ++ dentry)); ++ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; ++ err = au_test_empty(dentry, &whlist); ++ au_nhash_wh_free(&whlist); ++ goto out; + } + -+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags); -+ if (unlikely(err)) -+ goto out_parent; ++ if (bstart == au_dbtaildir(dentry)) ++ return 0; /* success */ + -+ a->h_path.dentry = au_h_dptr(dentry, bstart); -+ a->h_inode = a->h_path.dentry->d_inode; -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ sz = -1; -+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode)) -+ sz = ia->ia_size; ++ err = au_test_empty_lower(dentry); + -+ h_file = NULL; -+ hi_wh = NULL; -+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) { -+ hi_wh = au_hi_wh(inode, a->btgt); -+ if (!hi_wh) { -+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL); -+ if (unlikely(err)) -+ goto out_unlock; -+ hi_wh = au_hi_wh(inode, a->btgt); -+ /* todo: revalidate hi_wh? */ -+ } ++out: ++ if (err == -ENOTEMPTY) { ++ AuWarn1("renaming dir who has child(ren) on multiple branches," ++ " is not supported\n"); ++ err = -EXDEV; + } ++ return err; ++} + -+ if (parent) { -+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0); -+ di_downgrade_lock(parent, AuLock_IR); -+ dput(parent); -+ parent = NULL; -+ } -+ if (!au_ftest_icpup(a->flags, DID_CPUP)) -+ goto out; /* success */ ++/* side effect: sets whlist and h_dentry */ ++static int au_ren_may_dir(struct au_ren_args *a) ++{ ++ int err; ++ unsigned int rdhash; ++ struct dentry *d; + -+ if (!d_unhashed(dentry)) { -+ h_file = au_h_open_pre(dentry, bstart); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else -+ err = au_sio_cpup_simple(dentry, a->btgt, sz, -+ AuCpup_DTIME); -+ if (!err) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ } else if (!hi_wh) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ else -+ a->h_path.dentry = hi_wh; /* do not dget here */ ++ d = a->dst_dentry; ++ SiMustAnyLock(d->d_sb); + -+out_unlock: -+ mutex_unlock(&a->h_inode->i_mutex); -+ au_h_open_post(dentry, bstart, h_file); -+ a->h_inode = a->h_path.dentry->d_inode; -+ if (!err) { -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ goto out; /* success */ ++ err = 0; ++ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) { ++ rdhash = au_sbi(d->d_sb)->si_rdhash; ++ if (!rdhash) ++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d)); ++ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; ++ ++ au_set_dbstart(d, a->dst_bstart); ++ err = may_rename_dstdir(d, &a->whlist); ++ au_set_dbstart(d, a->btgt); + } ++ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d)); ++ if (unlikely(err)) ++ goto out; + -+ au_unpin(&a->pin); -+out_parent: -+ if (parent) { -+ di_write_unlock(parent); -+ dput(parent); ++ d = a->src_dentry; ++ a->src_h_dentry = au_h_dptr(d, au_dbstart(d)); ++ if (au_ftest_ren(a->flags, ISDIR)) { ++ err = may_rename_srcdir(d, a->btgt); ++ if (unlikely(err)) { ++ au_nhash_wh_free(&a->whlist); ++ a->whlist.nh_num = 0; ++ } + } +out: + return err; +} + -+static int aufs_setattr(struct dentry *dentry, struct iattr *ia) -+{ -+ int err; -+ struct inode *inode; -+ struct super_block *sb; -+ struct file *file; -+ struct au_icpup_args *a; -+ -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) -+ ia->ia_valid &= ~ATTR_MODE; ++/* ---------------------------------------------------------------------- */ + -+ file = NULL; -+ sb = dentry->d_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out_kfree; ++/* ++ * simple tests for rename. ++ * following the checks in vfs, plus the parent-child relationship. ++ */ ++static int au_may_ren(struct au_ren_args *a) ++{ ++ int err, isdir; ++ struct inode *h_inode; + -+ if (ia->ia_valid & ATTR_FILE) { -+ /* currently ftruncate(2) only */ -+ AuDebugOn(!S_ISREG(inode->i_mode)); -+ file = ia->ia_file; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (a->src_bstart == a->btgt) { ++ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent, ++ au_ftest_ren(a->flags, ISDIR)); + if (unlikely(err)) -+ goto out_si; -+ ia->ia_file = au_hf_top(file); -+ a->udba = AuOpt_UDBA_NONE; -+ } else { -+ /* fchmod() doesn't pass ia_file */ -+ a->udba = au_opt_udba(sb); -+ di_write_lock_child(dentry); -+ /* no d_unlinked(), to set UDBA_NONE for root */ -+ if (d_unhashed(dentry)) -+ a->udba = AuOpt_UDBA_NONE; -+ if (a->udba != AuOpt_UDBA_NONE) { -+ AuDebugOn(IS_ROOT(dentry)); -+ err = au_reval_for_attr(dentry, au_sigen(sb)); -+ if (unlikely(err)) -+ goto out_dentry; -+ } ++ goto out; ++ err = -EINVAL; ++ if (unlikely(a->src_h_dentry == a->h_trap)) ++ goto out; + } + -+ err = au_pin_and_icpup(dentry, ia, a); -+ if (unlikely(err < 0)) -+ goto out_dentry; -+ if (au_ftest_icpup(a->flags, DID_CPUP)) { -+ ia->ia_file = NULL; -+ ia->ia_valid &= ~ATTR_FILE; -+ } ++ err = 0; ++ if (a->dst_bstart != a->btgt) ++ goto out; + -+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt); -+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME)) -+ == (ATTR_MODE | ATTR_CTIME)) { -+ err = security_path_chmod(&a->h_path, ia->ia_mode); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID)) -+ && (ia->ia_valid & ATTR_CTIME)) { -+ err = security_path_chown(&a->h_path, vfsub_ia_uid(ia), -+ vfsub_ia_gid(ia)); ++ err = -ENOTEMPTY; ++ if (unlikely(a->dst_h_dentry == a->h_trap)) ++ goto out; ++ ++ err = -EIO; ++ h_inode = a->dst_h_dentry->d_inode; ++ isdir = !!au_ftest_ren(a->flags, ISDIR); ++ if (!a->dst_dentry->d_inode) { ++ if (unlikely(h_inode)) ++ goto out; ++ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent, ++ isdir); ++ } else { ++ if (unlikely(!h_inode || !h_inode->i_nlink)) ++ goto out; ++ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent, ++ isdir); + if (unlikely(err)) -+ goto out_unlock; ++ goto out; + } + -+ if (ia->ia_valid & ATTR_SIZE) { -+ struct file *f; ++out: ++ if (unlikely(err == -ENOENT || err == -EEXIST)) ++ err = -EIO; ++ AuTraceErr(err); ++ return err; ++} + -+ if (ia->ia_size < i_size_read(inode)) -+ /* unmap only */ -+ truncate_setsize(inode, ia->ia_size); ++/* ---------------------------------------------------------------------- */ + -+ f = NULL; -+ if (ia->ia_valid & ATTR_FILE) -+ f = ia->ia_file; -+ mutex_unlock(&a->h_inode->i_mutex); -+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f); -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ } else -+ err = vfsub_notify_change(&a->h_path, ia); -+ if (!err) -+ au_cpup_attr_changeable(inode); ++/* ++ * locking order ++ * (VFS) ++ * - src_dir and dir by lock_rename() ++ * - inode if exitsts ++ * (aufs) ++ * - lock all ++ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls, ++ * + si_read_lock ++ * + di_write_lock2_child() ++ * + di_write_lock_child() ++ * + ii_write_lock_child() ++ * + di_write_lock_child2() ++ * + ii_write_lock_child2() ++ * + src_parent and parent ++ * + di_write_lock_parent() ++ * + ii_write_lock_parent() ++ * + di_write_lock_parent2() ++ * + ii_write_lock_parent2() ++ * + lower src_dir and dir by vfsub_lock_rename() ++ * + verify the every relationships between child and parent. if any ++ * of them failed, unlock all and return -EBUSY. ++ */ ++static void au_ren_unlock(struct au_ren_args *a) ++{ ++ vfsub_unlock_rename(a->src_h_parent, a->src_hdir, ++ a->dst_h_parent, a->dst_hdir); ++ if (au_ftest_ren(a->flags, MNT_WRITE)) ++ vfsub_mnt_drop_write(au_br_mnt(a->br)); ++} + -+out_unlock: -+ mutex_unlock(&a->h_inode->i_mutex); -+ au_unpin(&a->pin); ++static int au_ren_lock(struct au_ren_args *a) ++{ ++ int err; ++ unsigned int udba; ++ ++ err = 0; ++ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt); ++ a->src_hdir = au_hi(a->src_dir, a->btgt); ++ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt); ++ a->dst_hdir = au_hi(a->dst_dir, a->btgt); ++ ++ err = vfsub_mnt_want_write(au_br_mnt(a->br)); + if (unlikely(err)) -+ au_update_dbstart(dentry); -+out_dentry: -+ di_write_unlock(dentry); -+ if (file) { -+ fi_write_unlock(file); -+ ia->ia_file = file; -+ ia->ia_valid |= ATTR_FILE; -+ } -+out_si: -+ si_read_unlock(sb); -+out_kfree: -+ kfree(a); ++ goto out; ++ au_fset_ren(a->flags, MNT_WRITE); ++ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir, ++ a->dst_h_parent, a->dst_hdir); ++ udba = au_opt_udba(a->src_dentry->d_sb); ++ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode ++ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode)) ++ err = au_busy_or_stale(); ++ if (!err && au_dbstart(a->src_dentry) == a->btgt) ++ err = au_h_verify(a->src_h_dentry, udba, ++ a->src_h_parent->d_inode, a->src_h_parent, ++ a->br); ++ if (!err && au_dbstart(a->dst_dentry) == a->btgt) ++ err = au_h_verify(a->dst_h_dentry, udba, ++ a->dst_h_parent->d_inode, a->dst_h_parent, ++ a->br); ++ if (!err) ++ goto out; /* success */ ++ ++ err = au_busy_or_stale(); ++ au_ren_unlock(a); ++ +out: -+ AuTraceErr(err); + return err; +} + -+static void au_refresh_iattr(struct inode *inode, struct kstat *st, -+ unsigned int nlink) -+{ -+ unsigned int n; ++/* ---------------------------------------------------------------------- */ + -+ inode->i_mode = st->mode; -+ i_uid_write(inode, st->uid); -+ i_gid_write(inode, st->gid); -+ inode->i_atime = st->atime; -+ inode->i_mtime = st->mtime; -+ inode->i_ctime = st->ctime; ++static void au_ren_refresh_dir(struct au_ren_args *a) ++{ ++ struct inode *dir; + -+ au_cpup_attr_nlink(inode, /*force*/0); -+ if (S_ISDIR(inode->i_mode)) { -+ n = inode->i_nlink; -+ n -= nlink; -+ n += st->nlink; -+ /* 0 can happen */ -+ set_nlink(inode, n); ++ dir = a->dst_dir; ++ dir->i_version++; ++ if (au_ftest_ren(a->flags, ISDIR)) { ++ /* is this updating defined in POSIX? */ ++ au_cpup_attr_timesizes(a->src_inode); ++ au_cpup_attr_nlink(dir, /*force*/1); + } + -+ spin_lock(&inode->i_lock); -+ inode->i_blocks = st->blocks; -+ i_size_write(inode, st->size); -+ spin_unlock(&inode->i_lock); ++ if (au_ibstart(dir) == a->btgt) ++ au_cpup_attr_timesizes(dir); ++ ++ if (au_ftest_ren(a->flags, ISSAMEDIR)) ++ return; ++ ++ dir = a->src_dir; ++ dir->i_version++; ++ if (au_ftest_ren(a->flags, ISDIR)) ++ au_cpup_attr_nlink(dir, /*force*/1); ++ if (au_ibstart(dir) == a->btgt) ++ au_cpup_attr_timesizes(dir); +} + -+static int aufs_getattr(struct vfsmount *mnt __maybe_unused, -+ struct dentry *dentry, struct kstat *st) ++static void au_ren_refresh(struct au_ren_args *a) +{ -+ int err; -+ unsigned int mnt_flags; -+ aufs_bindex_t bindex; -+ unsigned char udba_none, positive; -+ struct super_block *sb, *h_sb; -+ struct inode *inode; -+ struct vfsmount *h_mnt; -+ struct dentry *h_dentry; ++ aufs_bindex_t bend, bindex; ++ struct dentry *d, *h_d; ++ struct inode *i, *h_i; ++ struct super_block *sb; + -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out; -+ mnt_flags = au_mntflags(sb); -+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE); ++ d = a->dst_dentry; ++ d_drop(d); ++ if (a->h_dst) ++ /* already dget-ed by au_ren_or_cpup() */ ++ au_set_h_dptr(d, a->btgt, a->h_dst); + -+ /* support fstat(2) */ -+ if (!d_unlinked(dentry) && !udba_none) { -+ unsigned int sigen = au_sigen(sb); -+ err = au_digen_test(dentry, sigen); -+ if (!err) { -+ di_read_lock_child(dentry, AuLock_IR); -+ err = au_dbrange_test(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else { -+ AuDebugOn(IS_ROOT(dentry)); -+ di_write_lock_child(dentry); -+ err = au_dbrange_test(dentry); -+ if (!err) -+ err = au_reval_for_attr(dentry, sigen); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; ++ i = a->dst_inode; ++ if (i) { ++ if (!au_ftest_ren(a->flags, ISDIR)) ++ vfsub_drop_nlink(i); ++ else { ++ vfsub_dead_dir(i); ++ au_cpup_attr_timesizes(i); + } -+ } else -+ di_read_lock_child(dentry, AuLock_IR); -+ -+ bindex = au_ibstart(inode); -+ h_mnt = au_sbr_mnt(sb, bindex); -+ h_sb = h_mnt->mnt_sb; -+ if (!au_test_fs_bad_iattr(h_sb) && udba_none) -+ goto out_fill; /* success */ -+ -+ h_dentry = NULL; -+ if (au_dbstart(dentry) == bindex) -+ h_dentry = dget(au_h_dptr(dentry, bindex)); -+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) { -+ h_dentry = au_plink_lkup(inode, bindex); -+ if (IS_ERR(h_dentry)) -+ goto out_fill; /* pretending success */ ++ au_update_dbrange(d, /*do_put_zero*/1); ++ } else { ++ bend = a->btgt; ++ for (bindex = au_dbstart(d); bindex < bend; bindex++) ++ au_set_h_dptr(d, bindex, NULL); ++ bend = au_dbend(d); ++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) ++ au_set_h_dptr(d, bindex, NULL); ++ au_update_dbrange(d, /*do_put_zero*/0); + } -+ /* illegally overlapped or something */ -+ if (unlikely(!h_dentry)) -+ goto out_fill; /* pretending success */ + -+ positive = !!h_dentry->d_inode; -+ if (positive) -+ err = vfs_getattr(h_mnt, h_dentry, st); -+ dput(h_dentry); -+ if (!err) { -+ if (positive) -+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink); -+ goto out_fill; /* success */ ++ d = a->src_dentry; ++ au_set_dbwh(d, -1); ++ bend = au_dbend(d); ++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { ++ h_d = au_h_dptr(d, bindex); ++ if (h_d) ++ au_set_h_dptr(d, bindex, NULL); + } -+ AuTraceErr(err); -+ goto out_unlock; ++ au_set_dbend(d, a->btgt); + -+out_fill: -+ generic_fillattr(inode, st); -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ si_read_unlock(sb); -+out: -+ AuTraceErr(err); -+ return err; ++ sb = d->d_sb; ++ i = a->src_inode; ++ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i)) ++ return; /* success */ ++ ++ bend = au_ibend(i); ++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { ++ h_i = au_h_iptr(i, bindex); ++ if (h_i) { ++ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0); ++ /* ignore this error */ ++ au_set_h_iptr(i, bindex, NULL, 0); ++ } ++ } ++ au_set_ibend(i, a->btgt); +} + +/* ---------------------------------------------------------------------- */ + -+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf, -+ int bufsiz) ++/* mainly for link(2) and rename(2) */ ++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt) ++{ ++ aufs_bindex_t bdiropq, bwh; ++ struct dentry *parent; ++ struct au_branch *br; ++ ++ parent = dentry->d_parent; ++ IMustLock(parent->d_inode); /* dir is locked */ ++ ++ bdiropq = au_dbdiropq(parent); ++ bwh = au_dbwh(dentry); ++ br = au_sbr(dentry->d_sb, btgt); ++ if (au_br_rdonly(br) ++ || (0 <= bdiropq && bdiropq < btgt) ++ || (0 <= bwh && bwh < btgt)) ++ btgt = -1; ++ ++ AuDbg("btgt %d\n", btgt); ++ return btgt; ++} ++ ++/* sets src_bstart, dst_bstart and btgt */ ++static int au_ren_wbr(struct au_ren_args *a) +{ + int err; -+ struct super_block *sb; -+ struct dentry *h_dentry; ++ struct au_wr_dir_args wr_dir_args = { ++ /* .force_btgt = -1, */ ++ .flags = AuWrDir_ADD_ENTRY ++ }; + -+ err = -EINVAL; -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (unlikely(!h_dentry->d_inode->i_op->readlink)) -+ goto out; ++ a->src_bstart = au_dbstart(a->src_dentry); ++ a->dst_bstart = au_dbstart(a->dst_dentry); ++ if (au_ftest_ren(a->flags, ISDIR)) ++ au_fset_wrdir(wr_dir_args.flags, ISDIR); ++ wr_dir_args.force_btgt = a->src_bstart; ++ if (a->dst_inode && a->dst_bstart < a->src_bstart) ++ wr_dir_args.force_btgt = a->dst_bstart; ++ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt); ++ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args); ++ a->btgt = err; + -+ err = security_inode_readlink(h_dentry); -+ if (unlikely(err)) -+ goto out; ++ return err; ++} + -+ sb = dentry->d_sb; -+ if (!au_test_ro(sb, bindex, dentry->d_inode)) { -+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry); -+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode); ++static void au_ren_dt(struct au_ren_args *a) ++{ ++ a->h_path.dentry = a->src_h_parent; ++ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path); ++ if (!au_ftest_ren(a->flags, ISSAMEDIR)) { ++ a->h_path.dentry = a->dst_h_parent; ++ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path); + } -+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz); + -+out: -+ return err; ++ au_fclr_ren(a->flags, DT_DSTDIR); ++ if (!au_ftest_ren(a->flags, ISDIR)) ++ return; ++ ++ a->h_path.dentry = a->src_h_dentry; ++ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path); ++ if (a->dst_h_dentry->d_inode) { ++ au_fset_ren(a->flags, DT_DSTDIR); ++ a->h_path.dentry = a->dst_h_dentry; ++ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path); ++ } +} + -+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) ++static void au_ren_rev_dt(int err, struct au_ren_args *a) +{ -+ int err; ++ struct dentry *h_d; ++ struct mutex *h_mtx; + -+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); -+ if (unlikely(err)) -+ goto out; -+ err = au_d_hashed_positive(dentry); -+ if (!err) -+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz); -+ aufs_read_unlock(dentry, AuLock_IR); ++ au_dtime_revert(a->src_dt + AuPARENT); ++ if (!au_ftest_ren(a->flags, ISSAMEDIR)) ++ au_dtime_revert(a->dst_dt + AuPARENT); + -+out: -+ return err; ++ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) { ++ h_d = a->src_dt[AuCHILD].dt_h_path.dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ au_dtime_revert(a->src_dt + AuCHILD); ++ mutex_unlock(h_mtx); ++ ++ if (au_ftest_ren(a->flags, DT_DSTDIR)) { ++ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ au_dtime_revert(a->dst_dt + AuCHILD); ++ mutex_unlock(h_mtx); ++ } ++ } +} + -+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) ++/* ---------------------------------------------------------------------- */ ++ ++int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry, ++ struct inode *_dst_dir, struct dentry *_dst_dentry) +{ -+ int err; -+ mm_segment_t old_fs; -+ union { -+ char *k; -+ char __user *u; -+ } buf; ++ int err, flags; ++ /* reduce stack space */ ++ struct au_ren_args *a; ++ ++ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry)); ++ IMustLock(_src_dir); ++ IMustLock(_dst_dir); + + err = -ENOMEM; -+ buf.k = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!buf.k)) ++ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE); ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) + goto out; + -+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); ++ a->src_dir = _src_dir; ++ a->src_dentry = _src_dentry; ++ a->src_inode = a->src_dentry->d_inode; ++ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */ ++ a->dst_dir = _dst_dir; ++ a->dst_dentry = _dst_dentry; ++ a->dst_inode = a->dst_dentry->d_inode; ++ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */ ++ if (a->dst_inode) { ++ IMustLock(a->dst_inode); ++ au_igrab(a->dst_inode); ++ } ++ ++ err = -ENOTDIR; ++ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN; ++ if (S_ISDIR(a->src_inode->i_mode)) { ++ au_fset_ren(a->flags, ISDIR); ++ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode))) ++ goto out_free; ++ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, ++ AuLock_DIR | flags); ++ } else ++ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, ++ flags); + if (unlikely(err)) -+ goto out_name; ++ goto out_free; + -+ err = au_d_hashed_positive(dentry); -+ if (!err) { -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX); -+ set_fs(old_fs); -+ } -+ aufs_read_unlock(dentry, AuLock_IR); ++ err = au_d_hashed_positive(a->src_dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ err = -ENOENT; ++ if (a->dst_inode) { ++ /* ++ * If it is a dir, VFS unhash dst_dentry before this ++ * function. It means we cannot rely upon d_unhashed(). ++ */ ++ if (unlikely(!a->dst_inode->i_nlink)) ++ goto out_unlock; ++ if (!S_ISDIR(a->dst_inode->i_mode)) { ++ err = au_d_hashed_positive(a->dst_dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ } else if (unlikely(IS_DEADDIR(a->dst_inode))) ++ goto out_unlock; ++ } else if (unlikely(d_unhashed(a->dst_dentry))) ++ goto out_unlock; + -+ if (err >= 0) { -+ buf.k[err] = 0; -+ /* will be freed by put_link */ -+ nd_set_link(nd, buf.k); -+ return NULL; /* success */ -+ } ++ /* ++ * is it possible? ++ * yes, it happend (in linux-3.3-rcN) but I don't know why. ++ * there may exist a problem somewhere else. ++ */ ++ err = -EINVAL; ++ if (unlikely(a->dst_parent->d_inode == a->src_dentry->d_inode)) ++ goto out_unlock; + -+out_name: -+ free_page((unsigned long)buf.k); -+out: -+ AuTraceErr(err); -+ return ERR_PTR(err); -+} ++ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */ ++ di_write_lock_parent(a->dst_parent); + -+static void aufs_put_link(struct dentry *dentry __maybe_unused, -+ struct nameidata *nd, void *cookie __maybe_unused) -+{ -+ char *p; ++ /* which branch we process */ ++ err = au_ren_wbr(a); ++ if (unlikely(err < 0)) ++ goto out_parent; ++ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt); ++ a->h_path.mnt = au_br_mnt(a->br); + -+ p = nd_get_link(nd); -+ if (!IS_ERR_OR_NULL(p)) -+ free_page((unsigned long)p); -+} ++ /* are they available to be renamed */ ++ err = au_ren_may_dir(a); ++ if (unlikely(err)) ++ goto out_children; + -+/* ---------------------------------------------------------------------- */ ++ /* prepare the writable parent dir on the same branch */ ++ if (a->dst_bstart == a->btgt) { ++ au_fset_ren(a->flags, WHDST); ++ } else { ++ err = au_cpup_dirs(a->dst_dentry, a->btgt); ++ if (unlikely(err)) ++ goto out_children; ++ } ++ ++ if (a->src_dir != a->dst_dir) { ++ /* ++ * this temporary unlock is safe, ++ * because both dir->i_mutex are locked. ++ */ ++ di_write_unlock(a->dst_parent); ++ di_write_lock_parent(a->src_parent); ++ err = au_wr_dir_need_wh(a->src_dentry, ++ au_ftest_ren(a->flags, ISDIR), ++ &a->btgt); ++ di_write_unlock(a->src_parent); ++ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1); ++ au_fclr_ren(a->flags, ISSAMEDIR); ++ } else ++ err = au_wr_dir_need_wh(a->src_dentry, ++ au_ftest_ren(a->flags, ISDIR), ++ &a->btgt); ++ if (unlikely(err < 0)) ++ goto out_children; ++ if (err) ++ au_fset_ren(a->flags, WHSRC); + -+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags) -+{ -+ int err; -+ struct super_block *sb; -+ struct inode *h_inode; ++ /* cpup src */ ++ if (a->src_bstart != a->btgt) { ++ struct file *h_file; ++ struct au_pin pin; + -+ sb = inode->i_sb; -+ /* mmap_sem might be acquired already, cf. aufs_mmap() */ -+ lockdep_off(); -+ si_read_lock(sb, AuLock_FLUSH); -+ ii_write_lock_child(inode); -+ lockdep_on(); -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ err = vfsub_update_time(h_inode, ts, flags); -+ lockdep_off(); -+ ii_write_unlock(inode); -+ si_read_unlock(sb); -+ lockdep_on(); -+ return err; -+} ++ err = au_pin(&pin, a->src_dentry, a->btgt, ++ au_opt_udba(a->src_dentry->d_sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out_children; + -+/* ---------------------------------------------------------------------- */ ++ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart); ++ h_file = au_h_open_pre(a->src_dentry, a->src_bstart); ++ if (IS_ERR(h_file)) { ++ err = PTR_ERR(h_file); ++ h_file = NULL; ++ } else { ++ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1, ++ AuCpup_DTIME, &pin); ++ au_h_open_post(a->src_dentry, a->src_bstart, h_file); ++ } ++ au_unpin(&pin); ++ if (unlikely(err)) ++ goto out_children; ++ a->src_bstart = a->btgt; ++ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt); ++ au_fset_ren(a->flags, WHSRC); ++ } + -+struct inode_operations aufs_symlink_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, ++ /* lock them all */ ++ err = au_ren_lock(a); ++ if (unlikely(err)) ++ /* leave the copied-up one */ ++ goto out_children; + -+ .readlink = aufs_readlink, -+ .follow_link = aufs_follow_link, -+ .put_link = aufs_put_link, ++ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) ++ err = au_may_ren(a); ++ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ err = -ENAMETOOLONG; ++ if (unlikely(err)) ++ goto out_hdir; + -+ /* .update_time = aufs_update_time */ -+}; ++ /* store timestamps to be revertible */ ++ au_ren_dt(a); + -+struct inode_operations aufs_dir_iop = { -+ .create = aufs_create, -+ .lookup = aufs_lookup, -+ .link = aufs_link, -+ .unlink = aufs_unlink, -+ .symlink = aufs_symlink, -+ .mkdir = aufs_mkdir, -+ .rmdir = aufs_rmdir, -+ .mknod = aufs_mknod, -+ .rename = aufs_rename, ++ /* here we go */ ++ err = do_rename(a); ++ if (unlikely(err)) ++ goto out_dt; + -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, ++ /* update dir attributes */ ++ au_ren_refresh_dir(a); + -+ .update_time = aufs_update_time -+ /* no support for atomic_open() */ -+}; ++ /* dput/iput all lower dentries */ ++ au_ren_refresh(a); + -+struct inode_operations aufs_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, ++ goto out_hdir; /* success */ + -+ .update_time = aufs_update_time -+}; ++out_dt: ++ au_ren_rev_dt(err, a); ++out_hdir: ++ au_ren_unlock(a); ++out_children: ++ au_nhash_wh_free(&a->whlist); ++ if (err && a->dst_inode && a->dst_bstart != a->btgt) { ++ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt); ++ au_set_h_dptr(a->dst_dentry, a->btgt, NULL); ++ au_set_dbstart(a->dst_dentry, a->dst_bstart); ++ } ++out_parent: ++ if (!err) ++ d_move(a->src_dentry, a->dst_dentry); ++ else { ++ au_update_dbstart(a->dst_dentry); ++ if (!a->dst_inode) ++ d_drop(a->dst_dentry); ++ } ++ if (au_ftest_ren(a->flags, ISSAMEDIR)) ++ di_write_unlock(a->dst_parent); ++ else ++ di_write_unlock2(a->src_parent, a->dst_parent); ++out_unlock: ++ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry); ++out_free: ++ iput(a->dst_inode); ++ if (a->thargs) ++ au_whtmp_rmdir_free(a->thargs); ++ kfree(a); ++out: ++ AuTraceErr(err); ++ return err; ++} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/i_op_del.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,477 @@ ++++ linux-3.9/fs/aufs/iinfo.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,276 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -16488,1702 +17137,1563 @@ + */ + +/* -+ * inode operations (del entry) ++ * inode private data + */ + +#include "aufs.h" + -+/* -+ * decide if a new whiteout for @dentry is necessary or not. -+ * when it is necessary, prepare the parent dir for the upper branch whose -+ * branch index is @bcpup for creation. the actual creation of the whiteout will -+ * be done by caller. -+ * return value: -+ * 0: wh is unnecessary -+ * plus: wh is necessary -+ * minus: error -+ */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup) -+{ -+ int need_wh, err; -+ aufs_bindex_t bstart; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ if (*bcpup < 0) { -+ *bcpup = bstart; -+ if (au_test_ro(sb, bstart, dentry->d_inode)) { -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ *bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; -+ } -+ } else -+ AuDebugOn(bstart < *bcpup -+ || au_test_ro(sb, *bcpup, dentry->d_inode)); -+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart); -+ -+ if (*bcpup != bstart) { -+ err = au_cpup_dirs(dentry, *bcpup); -+ if (unlikely(err)) -+ goto out; -+ need_wh = 1; -+ } else { -+ struct au_dinfo *dinfo, *tmp; -+ -+ need_wh = -ENOMEM; -+ dinfo = au_di(dentry); -+ tmp = au_di_alloc(sb, AuLsc_DI_TMP); -+ if (tmp) { -+ au_di_cp(tmp, dinfo); -+ au_di_swap(tmp, dinfo); -+ /* returns the number of positive dentries */ -+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0); -+ au_di_swap(tmp, dinfo); -+ au_rw_write_unlock(&tmp->di_rwsem); -+ au_di_free(tmp); -+ } -+ } -+ AuDbg("need_wh %d\n", need_wh); -+ err = need_wh; -+ -+out: -+ return err; -+} -+ -+/* -+ * simple tests for the del-entry operations. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) +{ -+ int err; -+ umode_t h_mode; -+ struct dentry *h_dentry, *h_latest; + struct inode *h_inode; + -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ if (dentry->d_inode) { -+ err = -ENOENT; -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(h_inode)) -+ goto out; -+ } -+ -+ err = -ENOENT; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ goto out; -+ err = 0; ++ IiMustAnyLock(inode); + -+ /* -+ * rmdir a dir may break the consistency on some filesystem. -+ * let's try heavy test. -+ */ -+ err = -EACCES; -+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE))) -+ goto out; ++ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); ++ return h_inode; ++} + -+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent, -+ au_sbr(dentry->d_sb, bindex)); -+ err = -EIO; -+ if (IS_ERR(h_latest)) -+ goto out; -+ if (h_latest == h_dentry) -+ err = 0; -+ dput(h_latest); ++/* todo: hard/soft set? */ ++void au_hiput(struct au_hinode *hinode) ++{ ++ au_hn_free(hinode); ++ dput(hinode->hi_whdentry); ++ iput(hinode->hi_inode); ++} + -+out: -+ return err; ++unsigned int au_hi_flags(struct inode *inode, int isdir) ++{ ++ unsigned int flags; ++ const unsigned int mnt_flags = au_mntflags(inode->i_sb); ++ ++ flags = 0; ++ if (au_opt_test(mnt_flags, XINO)) ++ au_fset_hi(flags, XINO); ++ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY)) ++ au_fset_hi(flags, HNOTIFY); ++ return flags; +} + -+/* -+ * decide the branch where we operate for @dentry. the branch index will be set -+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent -+ * dir for reverting. -+ * when a new whiteout is necessary, create it. -+ */ -+static struct dentry* -+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup, -+ struct au_dtime *dt, struct au_pin *pin) ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags) +{ -+ struct dentry *wh_dentry; -+ struct super_block *sb; -+ struct path h_path; -+ int err, need_wh; -+ unsigned int udba; -+ aufs_bindex_t bcpup; ++ struct au_hinode *hinode; ++ struct inode *hi; ++ struct au_iinfo *iinfo = au_ii(inode); + -+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup); -+ wh_dentry = ERR_PTR(need_wh); -+ if (unlikely(need_wh < 0)) -+ goto out; ++ IiMustWriteLock(inode); + -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ bcpup = *rbcpup; -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; ++ hinode = iinfo->ii_hinode + bindex; ++ hi = hinode->hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); + -+ h_path.dentry = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbstart(dentry) == bcpup) { -+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_unpin; ++ if (hi) ++ au_hiput(hinode); ++ hinode->hi_inode = h_inode; ++ if (h_inode) { ++ int err; ++ struct super_block *sb = inode->i_sb; ++ struct au_branch *br; ++ ++ AuDebugOn(inode->i_mode ++ && (h_inode->i_mode & S_IFMT) ++ != (inode->i_mode & S_IFMT)); ++ if (bindex == iinfo->ii_bstart) ++ au_cpup_igen(inode, h_inode); ++ br = au_sbr(sb, bindex); ++ hinode->hi_id = br->br_id; ++ if (au_ftest_hi(flags, XINO)) { ++ err = au_xino_write(sb, bindex, h_inode->i_ino, ++ inode->i_ino); ++ if (unlikely(err)) ++ AuIOErr1("failed au_xino_write() %d\n", err); ++ } ++ ++ if (au_ftest_hi(flags, HNOTIFY) ++ && au_br_hnotifyable(br->br_perm)) { ++ err = au_hn_alloc(hinode, inode); ++ if (unlikely(err)) ++ AuIOErr1("au_hn_alloc() %d\n", err); ++ } + } ++} + -+ h_path.mnt = au_sbr_mnt(sb, bcpup); -+ au_dtime_store(dt, au_pinned_parent(pin), &h_path); -+ wh_dentry = NULL; -+ if (!need_wh) -+ goto out; /* success, no need to create whiteout */ ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh) ++{ ++ struct au_hinode *hinode; + -+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_unpin; ++ IiMustWriteLock(inode); + -+ /* returns with the parent is locked and wh_dentry is dget-ed */ -+ goto out; /* success */ ++ hinode = au_ii(inode)->ii_hinode + bindex; ++ AuDebugOn(hinode->hi_whdentry); ++ hinode->hi_whdentry = h_wh; ++} + -+out_unpin: -+ au_unpin(pin); -+out: -+ return wh_dentry; ++void au_update_iigen(struct inode *inode, int half) ++{ ++ struct au_iinfo *iinfo; ++ struct au_iigen *iigen; ++ unsigned int sigen; ++ ++ sigen = au_sigen(inode->i_sb); ++ iinfo = au_ii(inode); ++ iigen = &iinfo->ii_generation; ++ spin_lock(&iinfo->ii_genspin); ++ iigen->ig_generation = sigen; ++ if (half) ++ au_ig_fset(iigen->ig_flags, HALF_REFRESHED); ++ else ++ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED); ++ spin_unlock(&iinfo->ii_genspin); +} + -+/* -+ * when removing a dir, rename it to a unique temporary whiteout-ed name first -+ * in order to be revertible and save time for removing many child whiteouts -+ * under the dir. -+ * returns 1 when there are too many child whiteout and caller should remove -+ * them asynchronously. returns 0 when the number of children is enough small to -+ * remove now or the branch fs is a remote fs. -+ * otherwise return an error. -+ */ -+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, -+ struct au_nhash *whlist, struct inode *dir) ++/* it may be called at remount time, too */ ++void au_update_ibrange(struct inode *inode, int do_put_zero) +{ -+ int rmdir_later, err, dirwh; -+ struct dentry *h_dentry; -+ struct super_block *sb; ++ struct au_iinfo *iinfo; ++ aufs_bindex_t bindex, bend; + -+ sb = dentry->d_sb; -+ SiMustAnyLock(sb); -+ h_dentry = au_h_dptr(dentry, bindex); -+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex)); -+ if (unlikely(err)) -+ goto out; ++ iinfo = au_ii(inode); ++ if (!iinfo) ++ return; + -+ /* stop monitoring */ -+ au_hn_free(au_hi(dentry->d_inode, bindex)); ++ IiMustWriteLock(inode); + -+ if (!au_test_fs_remote(h_dentry->d_sb)) { -+ dirwh = au_sbi(sb)->si_dirwh; -+ rmdir_later = (dirwh <= 1); -+ if (!rmdir_later) -+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex, -+ dirwh); -+ if (rmdir_later) -+ return rmdir_later; -+ } ++ if (do_put_zero && iinfo->ii_bstart >= 0) { ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++) { ++ struct inode *h_i; + -+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist); -+ if (unlikely(err)) { -+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n", -+ AuDLNPair(h_dentry), bindex, err); -+ err = 0; ++ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; ++ if (h_i && !h_i->i_nlink) ++ au_set_h_iptr(inode, bindex, NULL, 0); ++ } + } + -+out: -+ AuTraceErr(err); -+ return err; ++ iinfo->ii_bstart = -1; ++ iinfo->ii_bend = -1; ++ bend = au_sbend(inode->i_sb); ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (iinfo->ii_hinode[0 + bindex].hi_inode) { ++ iinfo->ii_bstart = bindex; ++ break; ++ } ++ if (iinfo->ii_bstart >= 0) ++ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--) ++ if (iinfo->ii_hinode[0 + bindex].hi_inode) { ++ iinfo->ii_bend = bindex; ++ break; ++ } ++ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend); +} + -+/* -+ * final procedure for deleting a entry. -+ * maintain dentry and iattr. -+ */ -+static void epilog(struct inode *dir, struct dentry *dentry, -+ aufs_bindex_t bindex) -+{ -+ struct inode *inode; ++/* ---------------------------------------------------------------------- */ + -+ inode = dentry->d_inode; -+ d_drop(dentry); -+ inode->i_ctime = dir->i_ctime; ++void au_icntnr_init_once(void *_c) ++{ ++ struct au_icntnr *c = _c; ++ struct au_iinfo *iinfo = &c->iinfo; ++ static struct lock_class_key aufs_ii; + -+ if (au_ibstart(dir) == bindex) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; ++ spin_lock_init(&iinfo->ii_genspin); ++ au_rw_init(&iinfo->ii_rwsem); ++ au_rw_class(&iinfo->ii_rwsem, &aufs_ii); ++ inode_init_once(&c->vfs_inode); +} + -+/* -+ * when an error happened, remove the created whiteout and revert everything. -+ */ -+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex, -+ aufs_bindex_t bwh, struct dentry *wh_dentry, -+ struct dentry *dentry, struct au_dtime *dt) ++int au_iinfo_init(struct inode *inode) +{ -+ int rerr; -+ struct path h_path = { -+ .dentry = wh_dentry, -+ .mnt = au_sbr_mnt(dir->i_sb, bindex) -+ }; ++ struct au_iinfo *iinfo; ++ struct super_block *sb; ++ int nbr, i; + -+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry); -+ if (!rerr) { -+ au_set_dbwh(dentry, bwh); -+ au_dtime_revert(dt); ++ sb = inode->i_sb; ++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++ nbr = au_sbend(sb) + 1; ++ if (unlikely(nbr <= 0)) ++ nbr = 1; ++ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); ++ if (iinfo->ii_hinode) { ++ au_ninodes_inc(sb); ++ for (i = 0; i < nbr; i++) ++ iinfo->ii_hinode[i].hi_id = -1; ++ ++ iinfo->ii_generation.ig_generation = au_sigen(sb); ++ iinfo->ii_bstart = -1; ++ iinfo->ii_bend = -1; ++ iinfo->ii_vdir = NULL; + return 0; + } -+ -+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ return -EIO; ++ return -ENOMEM; +} + -+/* ---------------------------------------------------------------------- */ -+ -+int aufs_unlink(struct inode *dir, struct dentry *dentry) ++int au_ii_realloc(struct au_iinfo *iinfo, int nbr) +{ -+ int err; -+ aufs_bindex_t bwh, bindex, bstart; -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *inode, *h_dir; -+ struct dentry *parent, *wh_dentry; ++ int err, sz; ++ struct au_hinode *hip; + -+ IMustLock(dir); ++ AuRwMustWriteLock(&iinfo->ii_rwsem); + -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out; -+ err = au_d_hashed_positive(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(inode->i_mode))) -+ goto out_unlock; /* possible? */ ++ err = -ENOMEM; ++ sz = sizeof(*hip) * (iinfo->ii_bend + 1); ++ if (!sz) ++ sz = sizeof(*hip); ++ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS); ++ if (hip) { ++ iinfo->ii_hinode = hip; ++ err = 0; ++ } + -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; ++ return err; ++} + -+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ h_path.dentry = au_h_dptr(dentry, bstart); -+ dget(h_path.dentry); -+ if (bindex == bstart) { -+ h_dir = au_pinned_h_dir(&pin); -+ err = vfsub_unlink(h_dir, &h_path, /*force*/0); -+ } else { -+ /* dir inode is locked */ -+ h_dir = wh_dentry->d_parent->d_inode; -+ IMustLock(h_dir); -+ err = 0; ++void au_iinfo_fin(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ struct au_hinode *hi; ++ struct super_block *sb; ++ aufs_bindex_t bindex, bend; ++ const unsigned char unlinked = !inode->i_nlink; ++ ++ iinfo = au_ii(inode); ++ /* bad_inode case */ ++ if (!iinfo) ++ return; ++ ++ sb = inode->i_sb; ++ au_ninodes_dec(sb); ++ if (si_pid_test(sb)) ++ au_xino_delete_inode(inode, unlinked); ++ else { ++ /* ++ * it is safe to hide the dependency between sbinfo and ++ * sb->s_umount. ++ */ ++ lockdep_off(); ++ si_noflush_read_lock(sb); ++ au_xino_delete_inode(inode, unlinked); ++ si_read_unlock(sb); ++ lockdep_on(); + } + -+ if (!err) { -+ vfsub_drop_nlink(inode); -+ epilog(dir, dentry, bindex); ++ if (iinfo->ii_vdir) ++ au_vdir_free(iinfo->ii_vdir); + -+ /* update target timestamps */ -+ if (bindex == bstart) { -+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/ -+ inode->i_ctime = h_path.dentry->d_inode->i_ctime; -+ } else -+ /* todo: this timestamp may be reverted later */ -+ inode->i_ctime = h_dir->i_ctime; -+ goto out_unpin; /* success */ ++ bindex = iinfo->ii_bstart; ++ if (bindex >= 0) { ++ hi = iinfo->ii_hinode + bindex; ++ bend = iinfo->ii_bend; ++ while (bindex++ <= bend) { ++ if (hi->hi_inode) ++ au_hiput(hi); ++ hi++; ++ } + } ++ kfree(iinfo->ii_hinode); ++ iinfo->ii_hinode = NULL; ++ AuRwDestroy(&iinfo->ii_rwsem); ++} +--- /dev/null ++++ linux-3.9/fs/aufs/inode.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,492 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ /* revert */ -+ if (wh_dentry) { -+ int rerr; ++/* ++ * inode functions ++ */ + -+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt); -+ if (rerr) -+ err = rerr; ++#include "aufs.h" ++ ++struct inode *au_igrab(struct inode *inode) ++{ ++ if (inode) { ++ AuDebugOn(!atomic_read(&inode->i_count)); ++ ihold(inode); + } ++ return inode; ++} + -+out_unpin: -+ au_unpin(&pin); -+ dput(wh_dentry); -+ dput(h_path.dentry); -+out_parent: -+ di_write_unlock(parent); -+out_unlock: -+ aufs_read_unlock(dentry, AuLock_DW); -+out: -+ return err; ++static void au_refresh_hinode_attr(struct inode *inode, int do_version) ++{ ++ au_cpup_attr_all(inode, /*force*/0); ++ au_update_iigen(inode, /*half*/1); ++ if (do_version) ++ inode->i_version++; +} + -+int aufs_rmdir(struct inode *dir, struct dentry *dentry) ++static int au_ii_refresh(struct inode *inode, int *update) +{ -+ int err, rmdir_later; -+ aufs_bindex_t bwh, bindex, bstart; -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct inode *inode; -+ struct dentry *parent, *wh_dentry, *h_dentry; -+ struct au_whtmp_rmdir *args; ++ int err, e; ++ umode_t type; ++ aufs_bindex_t bindex, new_bindex; ++ struct super_block *sb; ++ struct au_iinfo *iinfo; ++ struct au_hinode *p, *q, tmp; + -+ IMustLock(dir); ++ IiMustWriteLock(inode); + -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN); ++ *update = 0; ++ sb = inode->i_sb; ++ type = inode->i_mode & S_IFMT; ++ iinfo = au_ii(inode); ++ err = au_ii_realloc(iinfo, au_sbend(sb) + 1); + if (unlikely(err)) + goto out; -+ err = au_alive_dir(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ err = -ENOTDIR; -+ if (unlikely(!S_ISDIR(inode->i_mode))) -+ goto out_unlock; /* possible? */ -+ -+ err = -ENOMEM; -+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS); -+ if (unlikely(!args)) -+ goto out_unlock; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ err = au_test_empty(dentry, &args->whlist); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ h_dentry = au_h_dptr(dentry, bstart); -+ dget(h_dentry); -+ rmdir_later = 0; -+ if (bindex == bstart) { -+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir); -+ if (err > 0) { -+ rmdir_later = err; -+ err = 0; -+ } -+ } else { -+ /* stop monitoring */ -+ au_hn_free(au_hi(inode, bstart)); -+ -+ /* dir inode is locked */ -+ IMustLock(wh_dentry->d_parent->d_inode); -+ err = 0; -+ } -+ -+ if (!err) { -+ vfsub_dead_dir(inode); -+ au_set_dbdiropq(dentry, -1); -+ epilog(dir, dentry, bindex); + -+ if (rmdir_later) { -+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args); -+ args = NULL; -+ } ++ AuDebugOn(iinfo->ii_bstart < 0); ++ p = iinfo->ii_hinode + iinfo->ii_bstart; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++, p++) { ++ if (!p->hi_inode) ++ continue; + -+ goto out_unpin; /* success */ -+ } ++ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT)); ++ new_bindex = au_br_index(sb, p->hi_id); ++ if (new_bindex == bindex) ++ continue; + -+ /* revert */ -+ AuLabel(revert); -+ if (wh_dentry) { -+ int rerr; ++ if (new_bindex < 0) { ++ *update = 1; ++ au_hiput(p); ++ p->hi_inode = NULL; ++ continue; ++ } + -+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt); -+ if (rerr) -+ err = rerr; ++ if (new_bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = new_bindex; ++ if (iinfo->ii_bend < new_bindex) ++ iinfo->ii_bend = new_bindex; ++ /* swap two lower inode, and loop again */ ++ q = iinfo->ii_hinode + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hi_inode) { ++ bindex--; ++ p--; ++ } + } ++ au_update_ibrange(inode, /*do_put_zero*/0); ++ e = au_dy_irefresh(inode); ++ if (unlikely(e && !err)) ++ err = e; + -+out_unpin: -+ au_unpin(&pin); -+ dput(wh_dentry); -+ dput(h_dentry); -+out_parent: -+ di_write_unlock(parent); -+ if (args) -+ au_whtmp_rmdir_free(args); -+out_unlock: -+ aufs_read_unlock(dentry, AuLock_DW); +out: + AuTraceErr(err); + return err; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/i_op_ren.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,1026 @@ -+/* -+ * Copyright (C) 2005-2013 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/* -+ * inode operation (rename entry) -+ * todo: this is crazy monster -+ */ + -+#include "aufs.h" ++int au_refresh_hinode_self(struct inode *inode) ++{ ++ int err, update; + -+enum { AuSRC, AuDST, AuSrcDst }; -+enum { AuPARENT, AuCHILD, AuParentChild }; ++ err = au_ii_refresh(inode, &update); ++ if (!err) ++ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); + -+#define AuRen_ISDIR 1 -+#define AuRen_ISSAMEDIR (1 << 1) -+#define AuRen_WHSRC (1 << 2) -+#define AuRen_WHDST (1 << 3) -+#define AuRen_MNT_WRITE (1 << 4) -+#define AuRen_DT_DSTDIR (1 << 5) -+#define AuRen_DIROPQ (1 << 6) -+#define AuRen_CPUP (1 << 7) -+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name) -+#define au_fset_ren(flags, name) \ -+ do { (flags) |= AuRen_##name; } while (0) -+#define au_fclr_ren(flags, name) \ -+ do { (flags) &= ~AuRen_##name; } while (0) ++ AuTraceErr(err); ++ return err; ++} + -+struct au_ren_args { -+ struct { -+ struct dentry *dentry, *h_dentry, *parent, *h_parent, -+ *wh_dentry; -+ struct inode *dir, *inode; -+ struct au_hinode *hdir; -+ struct au_dtime dt[AuParentChild]; -+ aufs_bindex_t bstart; -+ } sd[AuSrcDst]; ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry) ++{ ++ int err, e, update; ++ unsigned int flags; ++ umode_t mode; ++ aufs_bindex_t bindex, bend; ++ unsigned char isdir; ++ struct au_hinode *p; ++ struct au_iinfo *iinfo; + -+#define src_dentry sd[AuSRC].dentry -+#define src_dir sd[AuSRC].dir -+#define src_inode sd[AuSRC].inode -+#define src_h_dentry sd[AuSRC].h_dentry -+#define src_parent sd[AuSRC].parent -+#define src_h_parent sd[AuSRC].h_parent -+#define src_wh_dentry sd[AuSRC].wh_dentry -+#define src_hdir sd[AuSRC].hdir -+#define src_h_dir sd[AuSRC].hdir->hi_inode -+#define src_dt sd[AuSRC].dt -+#define src_bstart sd[AuSRC].bstart ++ err = au_ii_refresh(inode, &update); ++ if (unlikely(err)) ++ goto out; + -+#define dst_dentry sd[AuDST].dentry -+#define dst_dir sd[AuDST].dir -+#define dst_inode sd[AuDST].inode -+#define dst_h_dentry sd[AuDST].h_dentry -+#define dst_parent sd[AuDST].parent -+#define dst_h_parent sd[AuDST].h_parent -+#define dst_wh_dentry sd[AuDST].wh_dentry -+#define dst_hdir sd[AuDST].hdir -+#define dst_h_dir sd[AuDST].hdir->hi_inode -+#define dst_dt sd[AuDST].dt -+#define dst_bstart sd[AuDST].bstart ++ update = 0; ++ iinfo = au_ii(inode); ++ p = iinfo->ii_hinode + iinfo->ii_bstart; ++ mode = (inode->i_mode & S_IFMT); ++ isdir = S_ISDIR(mode); ++ flags = au_hi_flags(inode, isdir); ++ bend = au_dbend(dentry); ++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { ++ struct inode *h_i; ++ struct dentry *h_d; + -+ struct dentry *h_trap; -+ struct au_branch *br; -+ struct au_hinode *src_hinode; -+ struct path h_path; -+ struct au_nhash whlist; -+ aufs_bindex_t btgt, src_bwh, src_bdiropq; ++ h_d = au_h_dptr(dentry, bindex); ++ if (!h_d || !h_d->d_inode) ++ continue; + -+ unsigned int flags; ++ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT)); ++ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { ++ h_i = au_h_iptr(inode, bindex); ++ if (h_i) { ++ if (h_i == h_d->d_inode) ++ continue; ++ err = -EIO; ++ break; ++ } ++ } ++ if (bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = bindex; ++ if (iinfo->ii_bend < bindex) ++ iinfo->ii_bend = bindex; ++ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags); ++ update = 1; ++ } ++ au_update_ibrange(inode, /*do_put_zero*/0); ++ e = au_dy_irefresh(inode); ++ if (unlikely(e && !err)) ++ err = e; ++ if (!err) ++ au_refresh_hinode_attr(inode, update && isdir); + -+ struct au_whtmp_rmdir *thargs; -+ struct dentry *h_dst; -+}; ++out: ++ AuTraceErr(err); ++ return err; ++} + -+/* ---------------------------------------------------------------------- */ ++static int set_inode(struct inode *inode, struct dentry *dentry) ++{ ++ int err; ++ unsigned int flags; ++ umode_t mode; ++ aufs_bindex_t bindex, bstart, btail; ++ unsigned char isdir; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ struct au_iinfo *iinfo; + -+/* -+ * functions for reverting. -+ * when an error happened in a single rename systemcall, we should revert -+ * everything as if nothing happend. -+ * we don't need to revert the copied-up/down the parent dir since they are -+ * harmless. -+ */ ++ IiMustWriteLock(inode); + -+#define RevertFailure(fmt, ...) do { \ -+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \ -+ ##__VA_ARGS__, err, rerr); \ -+ err = -EIO; \ -+} while (0) ++ err = 0; ++ isdir = 0; ++ bstart = au_dbstart(dentry); ++ h_inode = au_h_dptr(dentry, bstart)->d_inode; ++ mode = h_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_iop; ++ inode->i_fop = &aufs_file_fop; ++ err = au_dy_iaop(inode, bstart, h_inode); ++ if (unlikely(err)) ++ goto out; ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ btail = au_dbtaildir(dentry); ++ inode->i_op = &aufs_dir_iop; ++ inode->i_fop = &aufs_dir_fop; ++ break; ++ case S_IFLNK: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_symlink_iop; ++ break; ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ case S_IFSOCK: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_iop; ++ au_init_special_fop(inode, mode, h_inode->i_rdev); ++ break; ++ default: ++ AuIOErr("Unknown file type 0%o\n", mode); ++ err = -EIO; ++ goto out; ++ } + -+static void au_ren_rev_diropq(int err, struct au_ren_args *a) -+{ -+ int rerr; ++ /* do not set hnotify for whiteouted dirs (SHWH mode) */ ++ flags = au_hi_flags(inode, isdir); ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) ++ && au_ftest_hi(flags, HNOTIFY) ++ && dentry->d_name.len > AUFS_WH_PFX_LEN ++ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) ++ au_fclr_hi(flags, HNOTIFY); ++ iinfo = au_ii(inode); ++ iinfo->ii_bstart = bstart; ++ iinfo->ii_bend = btail; ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry) ++ au_set_h_iptr(inode, bindex, ++ au_igrab(h_dentry->d_inode), flags); ++ } ++ au_cpup_attr_all(inode, /*force*/1); + -+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(a->src_dentry, a->btgt); -+ au_hn_imtx_unlock(a->src_hinode); -+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq); -+ if (rerr) -+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry)); ++out: ++ return err; +} + -+static void au_ren_rev_rename(int err, struct au_ren_args *a) ++/* ++ * successful returns with iinfo write_locked ++ * minus: errno ++ * zero: success, matched ++ * plus: no error, but unmatched ++ */ ++static int reval_inode(struct inode *inode, struct dentry *dentry) +{ -+ int rerr; ++ int err; ++ unsigned int gen; ++ struct au_iigen iigen; ++ aufs_bindex_t bindex, bend; ++ struct inode *h_inode, *h_dinode; + -+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name, -+ a->src_h_parent); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("lkup one %.*s", AuDLNPair(a->src_dentry)); -+ return; -+ } ++ /* ++ * before this function, if aufs got any iinfo lock, it must be only ++ * one, the parent dir. ++ * it can happen by UDBA and the obsoleted inode number. ++ */ ++ err = -EIO; ++ if (unlikely(inode->i_ino == parent_ino(dentry))) ++ goto out; + -+ rerr = vfsub_rename(a->dst_h_dir, -+ au_h_dptr(a->src_dentry, a->btgt), -+ a->src_h_dir, &a->h_path); -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */ -+ if (rerr) -+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry)); -+} ++ err = 1; ++ ii_write_lock_new_child(inode); ++ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode; ++ bend = au_ibend(inode); ++ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (!h_inode || h_inode != h_dinode) ++ continue; + -+static void au_ren_rev_cpup(int err, struct au_ren_args *a) -+{ -+ int rerr; ++ err = 0; ++ gen = au_iigen(inode, &iigen); ++ if (gen == au_digen(dentry) ++ && !au_ig_ftest(iigen.ig_flags, HALF_REFRESHED)) ++ break; + -+ a->h_path.dentry = a->dst_h_dentry; -+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0); -+ au_set_h_dptr(a->src_dentry, a->btgt, NULL); -+ au_set_dbstart(a->src_dentry, a->src_bstart); -+ if (rerr) -+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry)); ++ /* fully refresh inode using dentry */ ++ err = au_refresh_hinode(inode, dentry); ++ if (!err) ++ au_update_iigen(inode, /*half*/0); ++ break; ++ } ++ ++ if (unlikely(err)) ++ ii_write_unlock(inode); ++out: ++ return err; +} + -+static void au_ren_rev_whtmp(int err, struct au_ren_args *a) ++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ unsigned int d_type, ino_t *ino) +{ -+ int rerr; ++ int err; ++ struct mutex *mtx; + -+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name, -+ a->dst_h_parent); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("lkup one %.*s", AuDLNPair(a->dst_dentry)); -+ return; ++ /* prevent hardlinked inode number from race condition */ ++ mtx = NULL; ++ if (d_type != DT_DIR) { ++ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx; ++ mutex_lock(mtx); + } -+ if (a->h_path.dentry->d_inode) { -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ return; ++ err = au_xino_read(sb, bindex, h_ino, ino); ++ if (unlikely(err)) ++ goto out; ++ ++ if (!*ino) { ++ err = -EIO; ++ *ino = au_xino_new_ino(sb); ++ if (unlikely(!*ino)) ++ goto out; ++ err = au_xino_write(sb, bindex, h_ino, *ino); ++ if (unlikely(err)) ++ goto out; + } + -+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path); -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ if (!rerr) -+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst)); -+ else -+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst)); ++out: ++ if (mtx) ++ mutex_unlock(mtx); ++ return err; +} + -+static void au_ren_rev_whsrc(int err, struct au_ren_args *a) ++/* successful returns with iinfo write_locked */ ++/* todo: return with unlocked? */ ++struct inode *au_new_inode(struct dentry *dentry, int must_new) +{ -+ int rerr; ++ struct inode *inode, *h_inode; ++ struct dentry *h_dentry; ++ struct super_block *sb; ++ struct mutex *mtx; ++ ino_t h_ino, ino; ++ int err; ++ aufs_bindex_t bstart; + -+ a->h_path.dentry = a->src_wh_dentry; -+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry); -+ au_set_dbwh(a->src_dentry, a->src_bwh); -+ if (rerr) -+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry)); -+} -+#undef RevertFailure ++ sb = dentry->d_sb; ++ bstart = au_dbstart(dentry); ++ h_dentry = au_h_dptr(dentry, bstart); ++ h_inode = h_dentry->d_inode; ++ h_ino = h_inode->i_ino; + -+/* ---------------------------------------------------------------------- */ ++ /* ++ * stop 'race'-ing between hardlinks under different ++ * parents. ++ */ ++ mtx = NULL; ++ if (!S_ISDIR(h_inode->i_mode)) ++ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx; + -+/* -+ * when we have to copyup the renaming entry, do it with the rename-target name -+ * in order to minimize the cost (the later actual rename is unnecessary). -+ * otherwise rename it on the target branch. -+ */ -+static int au_ren_or_cpup(struct au_ren_args *a) -+{ -+ int err; -+ struct dentry *d; ++new_ino: ++ if (mtx) ++ mutex_lock(mtx); ++ err = au_xino_read(sb, bstart, h_ino, &ino); ++ inode = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; + -+ d = a->src_dentry; -+ if (au_dbstart(d) == a->btgt) { -+ a->h_path.dentry = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, DIROPQ) -+ && au_dbdiropq(d) == a->btgt) -+ au_fclr_ren(a->flags, DIROPQ); -+ AuDebugOn(au_dbstart(d) != a->btgt); -+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt), -+ a->dst_h_dir, &a->h_path); -+ } else { -+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; -+ struct file *h_file; ++ if (!ino) { ++ ino = au_xino_new_ino(sb); ++ if (unlikely(!ino)) { ++ inode = ERR_PTR(-EIO); ++ goto out; ++ } ++ } + -+ au_fset_ren(a->flags, CPUP); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_set_dbstart(d, a->btgt); -+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry)); -+ h_file = au_h_open_pre(d, a->src_bstart); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else -+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1, -+ !AuCpup_DTIME, a->dst_parent); -+ mutex_unlock(h_mtx); -+ au_h_open_post(d, a->src_bstart, h_file); ++ AuDbg("i%lu\n", (unsigned long)ino); ++ inode = au_iget_locked(sb, ino); ++ err = PTR_ERR(inode); ++ if (IS_ERR(inode)) ++ goto out; ++ ++ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); ++ if (inode->i_state & I_NEW) { ++ /* verbose coding for lock class name */ ++ if (unlikely(S_ISLNK(h_inode->i_mode))) ++ au_rw_class(&au_ii(inode)->ii_rwsem, ++ au_lc_key + AuLcSymlink_IIINFO); ++ else if (unlikely(S_ISDIR(h_inode->i_mode))) ++ au_rw_class(&au_ii(inode)->ii_rwsem, ++ au_lc_key + AuLcDir_IIINFO); ++ else /* likely */ ++ au_rw_class(&au_ii(inode)->ii_rwsem, ++ au_lc_key + AuLcNonDir_IIINFO); ++ ++ ii_write_lock_new_child(inode); ++ err = set_inode(inode, dentry); + if (!err) { -+ d = a->dst_dentry; -+ au_set_h_dptr(d, a->btgt, NULL); -+ au_update_dbstart(d); -+ } else { -+ au_set_h_dptr(d, a->btgt, NULL); -+ au_set_dbstart(d, a->src_bstart); ++ unlock_new_inode(inode); ++ goto out; /* success */ ++ } ++ ++ /* ++ * iget_failed() calls iput(), but we need to call ++ * ii_write_unlock() after iget_failed(). so dirty hack for ++ * i_count. ++ */ ++ atomic_inc(&inode->i_count); ++ iget_failed(inode); ++ ii_write_unlock(inode); ++ au_xino_write(sb, bstart, h_ino, /*ino*/0); ++ /* ignore this error */ ++ goto out_iput; ++ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) { ++ /* ++ * horrible race condition between lookup, readdir and copyup ++ * (or something). ++ */ ++ if (mtx) ++ mutex_unlock(mtx); ++ err = reval_inode(inode, dentry); ++ if (unlikely(err < 0)) { ++ mtx = NULL; ++ goto out_iput; + } ++ ++ if (!err) { ++ mtx = NULL; ++ goto out; /* success */ ++ } else if (mtx) ++ mutex_lock(mtx); + } -+ if (!err && a->h_dst) -+ /* it will be set to dinfo later */ -+ dget(a->h_dst); + -+ return err; ++ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode))) ++ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir," ++ " b%d, %s, %.*s, hi%lu, i%lu.\n", ++ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry), ++ (unsigned long)h_ino, (unsigned long)ino); ++ ino = 0; ++ err = au_xino_write(sb, bstart, h_ino, /*ino*/0); ++ if (!err) { ++ iput(inode); ++ if (mtx) ++ mutex_unlock(mtx); ++ goto new_ino; ++ } ++ ++out_iput: ++ iput(inode); ++ inode = ERR_PTR(err); ++out: ++ if (mtx) ++ mutex_unlock(mtx); ++ return inode; +} + -+/* cf. aufs_rmdir() */ -+static int au_ren_del_whtmp(struct au_ren_args *a) ++/* ---------------------------------------------------------------------- */ ++ ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode) +{ + int err; -+ struct inode *dir; + -+ dir = a->dst_dir; -+ SiMustAnyLock(dir->i_sb); -+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt, -+ au_sbi(dir->i_sb)->si_dirwh) -+ || au_test_fs_remote(a->h_dst->d_sb)) { -+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist); -+ if (unlikely(err)) -+ pr_warn("failed removing whtmp dir %.*s (%d), " -+ "ignored.\n", AuDLNPair(a->h_dst), err); -+ } else { -+ au_nhash_wh_free(&a->thargs->whlist); -+ a->thargs->whlist = a->whlist; -+ a->whlist.nh_num = 0; -+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs); -+ dput(a->h_dst); -+ a->thargs = NULL; ++ err = au_br_rdonly(au_sbr(sb, bindex)); ++ ++ /* pseudo-link after flushed may happen out of bounds */ ++ if (!err ++ && inode ++ && au_ibstart(inode) <= bindex ++ && bindex <= au_ibend(inode)) { ++ /* ++ * permission check is unnecessary since vfsub routine ++ * will be called later ++ */ ++ struct inode *hi = au_h_iptr(inode, bindex); ++ if (hi) ++ err = IS_IMMUTABLE(hi) ? -EROFS : 0; + } + -+ return 0; ++ return err; +} + -+/* make it 'opaque' dir. */ -+static int au_ren_diropq(struct au_ren_args *a) ++int au_test_h_perm(struct inode *h_inode, int mask) +{ -+ int err; -+ struct dentry *diropq; -+ -+ err = 0; -+ a->src_bdiropq = au_dbdiropq(a->src_dentry); -+ a->src_hinode = au_hi(a->src_inode, a->btgt); -+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ diropq = au_diropq_create(a->src_dentry, a->btgt); -+ au_hn_imtx_unlock(a->src_hinode); -+ if (IS_ERR(diropq)) -+ err = PTR_ERR(diropq); -+ dput(diropq); -+ -+ return err; ++ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) ++ return 0; ++ return inode_permission(h_inode, mask); +} + -+static int do_rename(struct au_ren_args *a) ++int au_test_h_perm_sio(struct inode *h_inode, int mask) +{ -+ int err; -+ struct dentry *d, *h_d; ++ if (au_test_nfs(h_inode->i_sb) ++ && (mask & MAY_WRITE) ++ && S_ISDIR(h_inode->i_mode)) ++ mask |= MAY_READ; /* force permission check */ ++ return au_test_h_perm(h_inode, mask); ++} +--- /dev/null ++++ linux-3.9/fs/aufs/inode.h 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,600 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ /* prepare workqueue args for asynchronous rmdir */ -+ h_d = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) { -+ err = -ENOMEM; -+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS); -+ if (unlikely(!a->thargs)) -+ goto out; -+ a->h_dst = dget(h_d); -+ } ++/* ++ * inode operations ++ */ + -+ /* create whiteout for src_dentry */ -+ if (au_ftest_ren(a->flags, WHSRC)) { -+ a->src_bwh = au_dbwh(a->src_dentry); -+ AuDebugOn(a->src_bwh >= 0); -+ a->src_wh_dentry -+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent); -+ err = PTR_ERR(a->src_wh_dentry); -+ if (IS_ERR(a->src_wh_dentry)) -+ goto out_thargs; -+ } ++#ifndef __AUFS_INODE_H__ ++#define __AUFS_INODE_H__ + -+ /* lookup whiteout for dentry */ -+ if (au_ftest_ren(a->flags, WHDST)) { -+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name, -+ a->br); -+ err = PTR_ERR(h_d); -+ if (IS_ERR(h_d)) -+ goto out_whsrc; -+ if (!h_d->d_inode) -+ dput(h_d); -+ else -+ a->dst_wh_dentry = h_d; -+ } ++#ifdef __KERNEL__ + -+ /* rename dentry to tmpwh */ -+ if (a->thargs) { -+ err = au_whtmp_ren(a->dst_h_dentry, a->br); -+ if (unlikely(err)) -+ goto out_whdst; ++#include ++#include "rwsem.h" + -+ d = a->dst_dentry; -+ au_set_h_dptr(d, a->btgt, NULL); -+ err = au_lkup_neg(d, a->btgt); -+ if (unlikely(err)) -+ goto out_whtmp; -+ a->dst_h_dentry = au_h_dptr(d, a->btgt); -+ } ++struct vfsmount; + -+ /* cpup src */ -+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) { -+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; -+ struct file *h_file; ++struct au_hnotify { ++#ifdef CONFIG_AUFS_HNOTIFY ++#ifdef CONFIG_AUFS_HFSNOTIFY ++ /* never use fsnotify_add_vfsmount_mark() */ ++ struct fsnotify_mark hn_mark; ++#endif ++ struct inode *hn_aufs_inode; /* no get/put */ ++#endif ++} ____cacheline_aligned_in_smp; + -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart); -+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ h_file = NULL; -+ } else -+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1, -+ !AuCpup_DTIME); -+ mutex_unlock(h_mtx); -+ au_h_open_post(a->src_dentry, a->src_bstart, h_file); -+ if (unlikely(err)) -+ goto out_whtmp; -+ } ++struct au_hinode { ++ struct inode *hi_inode; ++ aufs_bindex_t hi_id; ++#ifdef CONFIG_AUFS_HNOTIFY ++ struct au_hnotify *hi_notify; ++#endif + -+ /* rename by vfs_rename or cpup */ -+ d = a->dst_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) -+ && (a->dst_wh_dentry -+ || au_dbdiropq(d) == a->btgt -+ /* hide the lower to keep xino */ -+ || a->btgt < au_dbend(d) -+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ))) -+ au_fset_ren(a->flags, DIROPQ); -+ err = au_ren_or_cpup(a); -+ if (unlikely(err)) -+ /* leave the copied-up one */ -+ goto out_whtmp; ++ /* reference to the copied-up whiteout with get/put */ ++ struct dentry *hi_whdentry; ++}; + -+ /* make dir opaque */ -+ if (au_ftest_ren(a->flags, DIROPQ)) { -+ err = au_ren_diropq(a); -+ if (unlikely(err)) -+ goto out_rename; -+ } ++/* ig_flags */ ++#define AuIG_HALF_REFRESHED 1 ++#define au_ig_ftest(flags, name) ((flags) & AuIG_##name) ++#define au_ig_fset(flags, name) \ ++ do { (flags) |= AuIG_##name; } while (0) ++#define au_ig_fclr(flags, name) \ ++ do { (flags) &= ~AuIG_##name; } while (0) ++ ++struct au_iigen { ++ __u32 ig_generation, ig_flags; ++}; ++ ++struct au_vdir; ++struct au_iinfo { ++ spinlock_t ii_genspin; ++ struct au_iigen ii_generation; ++ struct super_block *ii_hsb1; /* no get/put */ ++ ++ struct au_rwsem ii_rwsem; ++ aufs_bindex_t ii_bstart, ii_bend; ++ __u32 ii_higen; ++ struct au_hinode *ii_hinode; ++ struct au_vdir *ii_vdir; ++}; ++ ++struct au_icntnr { ++ struct au_iinfo iinfo; ++ struct inode vfs_inode; ++} ____cacheline_aligned_in_smp; + -+ /* update target timestamps */ -+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt); -+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt); -+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/ -+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; ++/* au_pin flags */ ++#define AuPin_DI_LOCKED 1 ++#define AuPin_MNT_WRITE (1 << 1) ++#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) ++#define au_fset_pin(flags, name) \ ++ do { (flags) |= AuPin_##name; } while (0) ++#define au_fclr_pin(flags, name) \ ++ do { (flags) &= ~AuPin_##name; } while (0) + -+ /* remove whiteout for dentry */ -+ if (a->dst_wh_dentry) { -+ a->h_path.dentry = a->dst_wh_dentry; -+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path, -+ a->dst_dentry); -+ if (unlikely(err)) -+ goto out_diropq; -+ } ++struct au_pin { ++ /* input */ ++ struct dentry *dentry; ++ unsigned int udba; ++ unsigned char lsc_di, lsc_hi, flags; ++ aufs_bindex_t bindex; + -+ /* remove whtmp */ -+ if (a->thargs) -+ au_ren_del_whtmp(a); /* ignore this error */ ++ /* output */ ++ struct dentry *parent; ++ struct au_hinode *hdir; ++ struct vfsmount *h_mnt; + -+ err = 0; -+ goto out_success; ++ /* temporary unlock/relock for copyup */ ++ struct dentry *h_dentry, *h_parent; ++ struct au_branch *br; ++ struct task_struct *task; ++}; + -+out_diropq: -+ if (au_ftest_ren(a->flags, DIROPQ)) -+ au_ren_rev_diropq(err, a); -+out_rename: -+ if (!au_ftest_ren(a->flags, CPUP)) -+ au_ren_rev_rename(err, a); -+ else -+ au_ren_rev_cpup(err, a); -+ dput(a->h_dst); -+out_whtmp: -+ if (a->thargs) -+ au_ren_rev_whtmp(err, a); -+out_whdst: -+ dput(a->dst_wh_dentry); -+ a->dst_wh_dentry = NULL; -+out_whsrc: -+ if (a->src_wh_dentry) -+ au_ren_rev_whsrc(err, a); -+out_success: -+ dput(a->src_wh_dentry); -+ dput(a->dst_wh_dentry); -+out_thargs: -+ if (a->thargs) { -+ dput(a->h_dst); -+ au_whtmp_rmdir_free(a->thargs); -+ a->thargs = NULL; -+ } -+out: -+ return err; -+} ++void au_pin_hdir_unlock(struct au_pin *p); ++int au_pin_hdir_relock(struct au_pin *p); ++void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task); ++void au_pin_hdir_acquire_nest(struct au_pin *p); ++void au_pin_hdir_release(struct au_pin *p); + +/* ---------------------------------------------------------------------- */ + -+/* -+ * test if @dentry dir can be rename destination or not. -+ * success means, it is a logically empty dir. -+ */ -+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist) ++static inline struct au_iinfo *au_ii(struct inode *inode) +{ -+ return au_test_empty(dentry, whlist); -+} ++ struct au_iinfo *iinfo; + -+/* -+ * test if @dentry dir can be rename source or not. -+ * if it can, return 0 and @children is filled. -+ * success means, -+ * - it is a logically empty dir. -+ * - or, it exists on writable branch and has no children including whiteouts -+ * on the lower branch. -+ */ -+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) -+{ -+ int err; -+ unsigned int rdhash; -+ aufs_bindex_t bstart; ++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++ if (iinfo->ii_hinode) ++ return iinfo; ++ return NULL; /* debugging bad_inode case */ ++} + -+ bstart = au_dbstart(dentry); -+ if (bstart != btgt) { -+ struct au_nhash whlist; ++/* ---------------------------------------------------------------------- */ + -+ SiMustAnyLock(dentry->d_sb); -+ rdhash = au_sbi(dentry->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, -+ dentry)); -+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_test_empty(dentry, &whlist); -+ au_nhash_wh_free(&whlist); -+ goto out; -+ } ++/* inode.c */ ++struct inode *au_igrab(struct inode *inode); ++int au_refresh_hinode_self(struct inode *inode); ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry); ++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ unsigned int d_type, ino_t *ino); ++struct inode *au_new_inode(struct dentry *dentry, int must_new); ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode); ++int au_test_h_perm(struct inode *h_inode, int mask); ++int au_test_h_perm_sio(struct inode *h_inode, int mask); + -+ if (bstart == au_dbtaildir(dentry)) -+ return 0; /* success */ ++static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, ++ ino_t h_ino, unsigned int d_type, ino_t *ino) ++{ ++#ifdef CONFIG_AUFS_SHWH ++ return au_ino(sb, bindex, h_ino, d_type, ino); ++#else ++ return 0; ++#endif ++} + -+ err = au_test_empty_lower(dentry); ++/* i_op.c */ ++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; + -+out: -+ if (err == -ENOTEMPTY) { -+ AuWarn1("renaming dir who has child(ren) on multiple branches," -+ " is not supported\n"); -+ err = -EXDEV; -+ } -+ return err; -+} ++/* au_wr_dir flags */ ++#define AuWrDir_ADD_ENTRY 1 ++#define AuWrDir_TMP_WHENTRY (1 << 1) ++#define AuWrDir_ISDIR (1 << 2) ++#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) ++#define au_fset_wrdir(flags, name) \ ++ do { (flags) |= AuWrDir_##name; } while (0) ++#define au_fclr_wrdir(flags, name) \ ++ do { (flags) &= ~AuWrDir_##name; } while (0) + -+/* side effect: sets whlist and h_dentry */ -+static int au_ren_may_dir(struct au_ren_args *a) -+{ -+ int err; -+ unsigned int rdhash; -+ struct dentry *d; ++struct au_wr_dir_args { ++ aufs_bindex_t force_btgt; ++ unsigned char flags; ++}; ++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, ++ struct au_wr_dir_args *args); + -+ d = a->dst_dentry; -+ SiMustAnyLock(d->d_sb); ++struct dentry *au_pinned_h_parent(struct au_pin *pin); ++void au_pin_init(struct au_pin *pin, struct dentry *dentry, ++ aufs_bindex_t bindex, int lsc_di, int lsc_hi, ++ unsigned int udba, unsigned char flags); ++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, ++ unsigned int udba, unsigned char flags) __must_check; ++int au_do_pin(struct au_pin *pin) __must_check; ++void au_unpin(struct au_pin *pin); + -+ err = 0; -+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) { -+ rdhash = au_sbi(d->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d)); -+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; ++/* i_op_add.c */ ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t dev); ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); ++int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ++ bool want_excl); ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry); ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); + -+ au_set_dbstart(d, a->dst_bstart); -+ err = may_rename_dstdir(d, &a->whlist); -+ au_set_dbstart(d, a->btgt); -+ } -+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (unlikely(err)) -+ goto out; ++/* i_op_del.c */ ++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); ++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_unlink(struct inode *dir, struct dentry *dentry); ++int aufs_rmdir(struct inode *dir, struct dentry *dentry); + -+ d = a->src_dentry; -+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ err = may_rename_srcdir(d, a->btgt); -+ if (unlikely(err)) { -+ au_nhash_wh_free(&a->whlist); -+ a->whlist.nh_num = 0; -+ } -+ } -+out: -+ return err; -+} ++/* i_op_ren.c */ ++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); ++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry); + -+/* ---------------------------------------------------------------------- */ ++/* iinfo.c */ ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); ++void au_hiput(struct au_hinode *hinode); ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh); ++unsigned int au_hi_flags(struct inode *inode, int isdir); + -+/* -+ * simple tests for rename. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+static int au_may_ren(struct au_ren_args *a) -+{ -+ int err, isdir; -+ struct inode *h_inode; ++/* hinode flags */ ++#define AuHi_XINO 1 ++#define AuHi_HNOTIFY (1 << 1) ++#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) ++#define au_fset_hi(flags, name) \ ++ do { (flags) |= AuHi_##name; } while (0) ++#define au_fclr_hi(flags, name) \ ++ do { (flags) &= ~AuHi_##name; } while (0) + -+ if (a->src_bstart == a->btgt) { -+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent, -+ au_ftest_ren(a->flags, ISDIR)); -+ if (unlikely(err)) -+ goto out; -+ err = -EINVAL; -+ if (unlikely(a->src_h_dentry == a->h_trap)) -+ goto out; -+ } ++#ifndef CONFIG_AUFS_HNOTIFY ++#undef AuHi_HNOTIFY ++#define AuHi_HNOTIFY 0 ++#endif + -+ err = 0; -+ if (a->dst_bstart != a->btgt) -+ goto out; ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags); + -+ err = -ENOTEMPTY; -+ if (unlikely(a->dst_h_dentry == a->h_trap)) -+ goto out; ++void au_update_iigen(struct inode *inode, int half); ++void au_update_ibrange(struct inode *inode, int do_put_zero); + -+ err = -EIO; -+ h_inode = a->dst_h_dentry->d_inode; -+ isdir = !!au_ftest_ren(a->flags, ISDIR); -+ if (!a->dst_dentry->d_inode) { -+ if (unlikely(h_inode)) -+ goto out; -+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ } else { -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ if (unlikely(err)) -+ goto out; -+ } ++void au_icntnr_init_once(void *_c); ++int au_iinfo_init(struct inode *inode); ++void au_iinfo_fin(struct inode *inode); ++int au_ii_realloc(struct au_iinfo *iinfo, int nbr); + -+out: -+ if (unlikely(err == -ENOENT || err == -EEXIST)) -+ err = -EIO; -+ AuTraceErr(err); -+ return err; -+} ++#ifdef CONFIG_PROC_FS ++/* plink.c */ ++int au_plink_maint(struct super_block *sb, int flags); ++void au_plink_maint_leave(struct au_sbinfo *sbinfo); ++int au_plink_maint_enter(struct super_block *sb); ++#ifdef CONFIG_AUFS_DEBUG ++void au_plink_list(struct super_block *sb); ++#else ++AuStubVoid(au_plink_list, struct super_block *sb) ++#endif ++int au_plink_test(struct inode *inode); ++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); ++void au_plink_append(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++void au_plink_put(struct super_block *sb, int verbose); ++void au_plink_clean(struct super_block *sb, int verbose); ++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); ++#else ++AuStubInt0(au_plink_maint, struct super_block *sb, int flags); ++AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo); ++AuStubInt0(au_plink_maint_enter, struct super_block *sb); ++AuStubVoid(au_plink_list, struct super_block *sb); ++AuStubInt0(au_plink_test, struct inode *inode); ++AuStub(struct dentry *, au_plink_lkup, return NULL, ++ struct inode *inode, aufs_bindex_t bindex); ++AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++AuStubVoid(au_plink_put, struct super_block *sb, int verbose); ++AuStubVoid(au_plink_clean, struct super_block *sb, int verbose); ++AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id); ++#endif /* CONFIG_PROC_FS */ + +/* ---------------------------------------------------------------------- */ + ++/* lock subclass for iinfo */ ++enum { ++ AuLsc_II_CHILD, /* child first */ ++ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */ ++ AuLsc_II_CHILD3, /* copyup dirs */ ++ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ ++ AuLsc_II_PARENT2, ++ AuLsc_II_PARENT3, /* copyup dirs */ ++ AuLsc_II_NEW_CHILD ++}; ++ +/* -+ * locking order -+ * (VFS) -+ * - src_dir and dir by lock_rename() -+ * - inode if exitsts -+ * (aufs) -+ * - lock all -+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls, -+ * + si_read_lock -+ * + di_write_lock2_child() -+ * + di_write_lock_child() -+ * + ii_write_lock_child() -+ * + di_write_lock_child2() -+ * + ii_write_lock_child2() -+ * + src_parent and parent -+ * + di_write_lock_parent() -+ * + ii_write_lock_parent() -+ * + di_write_lock_parent2() -+ * + ii_write_lock_parent2() -+ * + lower src_dir and dir by vfsub_lock_rename() -+ * + verify the every relationships between child and parent. if any -+ * of them failed, unlock all and return -EBUSY. ++ * ii_read_lock_child, ii_write_lock_child, ++ * ii_read_lock_child2, ii_write_lock_child2, ++ * ii_read_lock_child3, ii_write_lock_child3, ++ * ii_read_lock_parent, ii_write_lock_parent, ++ * ii_read_lock_parent2, ii_write_lock_parent2, ++ * ii_read_lock_parent3, ii_write_lock_parent3, ++ * ii_read_lock_new_child, ii_write_lock_new_child, + */ -+static void au_ren_unlock(struct au_ren_args *a) -+{ -+ struct super_block *sb; ++#define AuReadLockFunc(name, lsc) \ ++static inline void ii_read_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++} + -+ sb = a->dst_dentry->d_sb; -+ if (au_ftest_ren(a->flags, MNT_WRITE)) -+ vfsub_mnt_drop_write(a->br->br_mnt); -+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); ++#define AuWriteLockFunc(name, lsc) \ ++static inline void ii_write_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ +} + -+static int au_ren_lock(struct au_ren_args *a) -+{ -+ int err; -+ unsigned int udba; ++#define AuRWLockFuncs(name, lsc) \ ++ AuReadLockFunc(name, lsc) \ ++ AuWriteLockFunc(name, lsc) + -+ err = 0; -+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt); -+ a->src_hdir = au_hi(a->src_dir, a->btgt); -+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt); -+ a->dst_hdir = au_hi(a->dst_dir, a->btgt); -+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); -+ udba = au_opt_udba(a->src_dentry->d_sb); -+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode -+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode)) -+ err = au_busy_or_stale(); -+ if (!err && au_dbstart(a->src_dentry) == a->btgt) -+ err = au_h_verify(a->src_h_dentry, udba, -+ a->src_h_parent->d_inode, a->src_h_parent, -+ a->br); -+ if (!err && au_dbstart(a->dst_dentry) == a->btgt) -+ err = au_h_verify(a->dst_h_dentry, udba, -+ a->dst_h_parent->d_inode, a->dst_h_parent, -+ a->br); -+ if (!err) { -+ err = vfsub_mnt_want_write(a->br->br_mnt); -+ if (unlikely(err)) -+ goto out_unlock; -+ au_fset_ren(a->flags, MNT_WRITE); -+ goto out; /* success */ -+ } ++AuRWLockFuncs(child, CHILD); ++AuRWLockFuncs(child2, CHILD2); ++AuRWLockFuncs(child3, CHILD3); ++AuRWLockFuncs(parent, PARENT); ++AuRWLockFuncs(parent2, PARENT2); ++AuRWLockFuncs(parent3, PARENT3); ++AuRWLockFuncs(new_child, NEW_CHILD); + -+ err = au_busy_or_stale(); ++#undef AuReadLockFunc ++#undef AuWriteLockFunc ++#undef AuRWLockFuncs + -+out_unlock: -+ au_ren_unlock(a); -+out: -+ return err; -+} ++/* ++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock ++ */ ++AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem); ++ ++#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) ++#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) ++#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) + +/* ---------------------------------------------------------------------- */ + -+static void au_ren_refresh_dir(struct au_ren_args *a) ++static inline void au_icntnr_init(struct au_icntnr *c) +{ -+ struct inode *dir; -+ -+ dir = a->dst_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ /* is this updating defined in POSIX? */ -+ au_cpup_attr_timesizes(a->src_inode); -+ au_cpup_attr_nlink(dir, /*force*/1); -+ } ++#ifdef CONFIG_AUFS_DEBUG ++ c->vfs_inode.i_mode = 0; ++#endif ++} + -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); ++static inline unsigned int au_iigen(struct inode *inode, struct au_iigen *iigen) ++{ ++ unsigned int gen; ++ struct au_iinfo *iinfo; + -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ return; ++ iinfo = au_ii(inode); ++ spin_lock(&iinfo->ii_genspin); ++ if (iigen) ++ *iigen = iinfo->ii_generation; ++ gen = iinfo->ii_generation.ig_generation; ++ spin_unlock(&iinfo->ii_genspin); + -+ dir = a->src_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_cpup_attr_nlink(dir, /*force*/1); -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); ++ return gen; +} + -+static void au_ren_refresh(struct au_ren_args *a) ++/* tiny test for inode number */ ++/* tmpfs generation is too rough */ ++static inline int au_test_higen(struct inode *inode, struct inode *h_inode) +{ -+ aufs_bindex_t bend, bindex; -+ struct dentry *d, *h_d; -+ struct inode *i, *h_i; -+ struct super_block *sb; ++ struct au_iinfo *iinfo; + -+ d = a->dst_dentry; -+ d_drop(d); -+ if (a->h_dst) -+ /* already dget-ed by au_ren_or_cpup() */ -+ au_set_h_dptr(d, a->btgt, a->h_dst); ++ iinfo = au_ii(inode); ++ AuRwMustAnyLock(&iinfo->ii_rwsem); ++ return !(iinfo->ii_hsb1 == h_inode->i_sb ++ && iinfo->ii_higen == h_inode->i_generation); ++} + -+ i = a->dst_inode; -+ if (i) { -+ if (!au_ftest_ren(a->flags, ISDIR)) -+ vfsub_drop_nlink(i); -+ else { -+ vfsub_dead_dir(i); -+ au_cpup_attr_timesizes(i); -+ } -+ au_update_dbrange(d, /*do_put_zero*/1); -+ } else { -+ bend = a->btgt; -+ for (bindex = au_dbstart(d); bindex < bend; bindex++) -+ au_set_h_dptr(d, bindex, NULL); -+ bend = au_dbend(d); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) -+ au_set_h_dptr(d, bindex, NULL); -+ au_update_dbrange(d, /*do_put_zero*/0); -+ } ++static inline void au_iigen_dec(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ ++ iinfo = au_ii(inode); ++ spin_lock(&iinfo->ii_genspin); ++ iinfo->ii_generation.ig_generation--; ++ spin_unlock(&iinfo->ii_genspin); ++} + -+ d = a->src_dentry; -+ au_set_dbwh(d, -1); -+ bend = au_dbend(d); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_d = au_h_dptr(d, bindex); -+ if (h_d) -+ au_set_h_dptr(d, bindex, NULL); -+ } -+ au_set_dbend(d, a->btgt); ++static inline int au_iigen_test(struct inode *inode, unsigned int sigen) ++{ ++ int err; + -+ sb = d->d_sb; -+ i = a->src_inode; -+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i)) -+ return; /* success */ ++ err = 0; ++ if (unlikely(inode && au_iigen(inode, NULL) != sigen)) ++ err = -EIO; + -+ bend = au_ibend(i); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_i = au_h_iptr(i, bindex); -+ if (h_i) { -+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0); -+ /* ignore this error */ -+ au_set_h_iptr(i, bindex, NULL, 0); -+ } -+ } -+ au_set_ibend(i, a->btgt); ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+/* mainly for link(2) and rename(2) */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt) ++static inline aufs_bindex_t au_ii_br_id(struct inode *inode, ++ aufs_bindex_t bindex) +{ -+ aufs_bindex_t bdiropq, bwh; -+ struct dentry *parent; -+ struct au_branch *br; ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_hinode[0 + bindex].hi_id; ++} + -+ parent = dentry->d_parent; -+ IMustLock(parent->d_inode); /* dir is locked */ ++static inline aufs_bindex_t au_ibstart(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_bstart; ++} + -+ bdiropq = au_dbdiropq(parent); -+ bwh = au_dbwh(dentry); -+ br = au_sbr(dentry->d_sb, btgt); -+ if (au_br_rdonly(br) -+ || (0 <= bdiropq && bdiropq < btgt) -+ || (0 <= bwh && bwh < btgt)) -+ btgt = -1; ++static inline aufs_bindex_t au_ibend(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_bend; ++} + -+ AuDbg("btgt %d\n", btgt); -+ return btgt; ++static inline struct au_vdir *au_ivdir(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_vdir; +} + -+/* sets src_bstart, dst_bstart and btgt */ -+static int au_ren_wbr(struct au_ren_args *a) ++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) +{ -+ int err; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry; ++} + -+ a->src_bstart = au_dbstart(a->src_dentry); -+ a->dst_bstart = au_dbstart(a->dst_dentry); -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ wr_dir_args.force_btgt = a->src_bstart; -+ if (a->dst_inode && a->dst_bstart < a->src_bstart) -+ wr_dir_args.force_btgt = a->dst_bstart; -+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt); -+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args); -+ a->btgt = err; ++static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_bstart = bindex; ++} + -+ return err; ++static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_bend = bindex; +} + -+static void au_ren_dt(struct au_ren_args *a) ++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) +{ -+ a->h_path.dentry = a->src_h_parent; -+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) { -+ a->h_path.dentry = a->dst_h_parent; -+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path); -+ } ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_vdir = vdir; ++} + -+ au_fclr_ren(a->flags, DT_DSTDIR); -+ if (!au_ftest_ren(a->flags, ISDIR)) -+ return; ++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_hinode + bindex; ++} + -+ a->h_path.dentry = a->src_h_dentry; -+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path); -+ if (a->dst_h_dentry->d_inode) { -+ au_fset_ren(a->flags, DT_DSTDIR); -+ a->h_path.dentry = a->dst_h_dentry; -+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path); -+ } ++/* ---------------------------------------------------------------------- */ ++ ++static inline struct dentry *au_pinned_parent(struct au_pin *pin) ++{ ++ if (pin) ++ return pin->parent; ++ return NULL; +} + -+static void au_ren_rev_dt(int err, struct au_ren_args *a) ++static inline struct inode *au_pinned_h_dir(struct au_pin *pin) +{ -+ struct dentry *h_d; -+ struct mutex *h_mtx; ++ if (pin && pin->hdir) ++ return pin->hdir->hi_inode; ++ return NULL; ++} + -+ au_dtime_revert(a->src_dt + AuPARENT); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) -+ au_dtime_revert(a->dst_dt + AuPARENT); ++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) ++{ ++ if (pin) ++ return pin->hdir; ++ return NULL; ++} + -+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) { -+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->src_dt + AuCHILD); -+ mutex_unlock(h_mtx); ++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) ++{ ++ if (pin) ++ pin->dentry = dentry; ++} + -+ if (au_ftest_ren(a->flags, DT_DSTDIR)) { -+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->dst_dt + AuCHILD); -+ mutex_unlock(h_mtx); -+ } ++static inline void au_pin_set_parent_lflag(struct au_pin *pin, ++ unsigned char lflag) ++{ ++ if (pin) { ++ if (lflag) ++ au_fset_pin(pin->flags, DI_LOCKED); ++ else ++ au_fclr_pin(pin->flags, DI_LOCKED); ++ } ++} ++ ++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) ++{ ++ if (pin) { ++ dput(pin->parent); ++ pin->parent = dget(parent); + } +} + +/* ---------------------------------------------------------------------- */ + -+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry, -+ struct inode *_dst_dir, struct dentry *_dst_dentry) -+{ -+ int err, flags; -+ /* reduce stack space */ -+ struct au_ren_args *a; ++struct au_branch; ++#ifdef CONFIG_AUFS_HNOTIFY ++struct au_hnotify_op { ++ void (*ctl)(struct au_hinode *hinode, int do_set); ++ int (*alloc)(struct au_hinode *hinode); + -+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry)); -+ IMustLock(_src_dir); -+ IMustLock(_dst_dir); ++ /* ++ * if it returns true, the the caller should free hinode->hi_notify, ++ * otherwise ->free() frees it. ++ */ ++ int (*free)(struct au_hinode *hinode, ++ struct au_hnotify *hn) __must_check; + -+ err = -ENOMEM; -+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE); -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; ++ void (*fin)(void); ++ int (*init)(void); + -+ a->src_dir = _src_dir; -+ a->src_dentry = _src_dentry; -+ a->src_inode = a->src_dentry->d_inode; -+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */ -+ a->dst_dir = _dst_dir; -+ a->dst_dentry = _dst_dentry; -+ a->dst_inode = a->dst_dentry->d_inode; -+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */ -+ if (a->dst_inode) { -+ IMustLock(a->dst_inode); -+ au_igrab(a->dst_inode); -+ } ++ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm); ++ void (*fin_br)(struct au_branch *br); ++ int (*init_br)(struct au_branch *br, int perm); ++}; + -+ err = -ENOTDIR; -+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN; -+ if (S_ISDIR(a->src_inode->i_mode)) { -+ au_fset_ren(a->flags, ISDIR); -+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode))) -+ goto out_free; -+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ AuLock_DIR | flags); -+ } else -+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ flags); -+ if (unlikely(err)) -+ goto out_free; ++/* hnotify.c */ ++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode); ++void au_hn_free(struct au_hinode *hinode); ++void au_hn_ctl(struct au_hinode *hinode, int do_set); ++void au_hn_reset(struct inode *inode, unsigned int flags); ++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, ++ struct qstr *h_child_qstr, struct inode *h_child_inode); ++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm); ++int au_hnotify_init_br(struct au_branch *br, int perm); ++void au_hnotify_fin_br(struct au_branch *br); ++int __init au_hnotify_init(void); ++void au_hnotify_fin(void); ++ ++/* hfsnotify.c */ ++extern const struct au_hnotify_op au_hnotify_op; + -+ err = au_d_hashed_positive(a->src_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ err = -ENOENT; -+ if (a->dst_inode) { -+ /* -+ * If it is a dir, VFS unhash dst_dentry before this -+ * function. It means we cannot rely upon d_unhashed(). -+ */ -+ if (unlikely(!a->dst_inode->i_nlink)) -+ goto out_unlock; -+ if (!S_ISDIR(a->dst_inode->i_mode)) { -+ err = au_d_hashed_positive(a->dst_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else if (unlikely(IS_DEADDIR(a->dst_inode))) -+ goto out_unlock; -+ } else if (unlikely(d_unhashed(a->dst_dentry))) -+ goto out_unlock; ++static inline ++void au_hn_init(struct au_hinode *hinode) ++{ ++ hinode->hi_notify = NULL; ++} + -+ /* -+ * is it possible? -+ * yes, it happend (in linux-3.3-rcN) but I don't know why. -+ * there may exist a problem somewhere else. -+ */ -+ err = -EINVAL; -+ if (unlikely(a->dst_parent->d_inode == a->src_dentry->d_inode)) -+ goto out_unlock; ++static inline struct au_hnotify *au_hn(struct au_hinode *hinode) ++{ ++ return hinode->hi_notify; ++} + -+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */ -+ di_write_lock_parent(a->dst_parent); ++#else ++static inline ++int au_hn_alloc(struct au_hinode *hinode __maybe_unused, ++ struct inode *inode __maybe_unused) ++{ ++ return -EOPNOTSUPP; ++} + -+ /* which branch we process */ -+ err = au_ren_wbr(a); -+ if (unlikely(err < 0)) -+ goto out_parent; -+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt); -+ a->h_path.mnt = a->br->br_mnt; ++static inline struct au_hnotify *au_hn(struct au_hinode *hinode) ++{ ++ return NULL; ++} + -+ /* are they available to be renamed */ -+ err = au_ren_may_dir(a); -+ if (unlikely(err)) -+ goto out_children; ++AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused) ++AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused, ++ int do_set __maybe_unused) ++AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused, ++ unsigned int flags __maybe_unused) ++AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused, ++ struct au_branch *br __maybe_unused, ++ int perm __maybe_unused) ++AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused, ++ int perm __maybe_unused) ++AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused) ++AuStubInt0(__init au_hnotify_init, void) ++AuStubVoid(au_hnotify_fin, void) ++AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused) ++#endif /* CONFIG_AUFS_HNOTIFY */ + -+ /* prepare the writable parent dir on the same branch */ -+ if (a->dst_bstart == a->btgt) { -+ au_fset_ren(a->flags, WHDST); -+ } else { -+ err = au_cpup_dirs(a->dst_dentry, a->btgt); -+ if (unlikely(err)) -+ goto out_children; -+ } ++static inline void au_hn_suspend(struct au_hinode *hdir) ++{ ++ au_hn_ctl(hdir, /*do_set*/0); ++} + -+ if (a->src_dir != a->dst_dir) { -+ /* -+ * this temporary unlock is safe, -+ * because both dir->i_mutex are locked. -+ */ -+ di_write_unlock(a->dst_parent); -+ di_write_lock_parent(a->src_parent); -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ di_write_unlock(a->src_parent); -+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1); -+ au_fclr_ren(a->flags, ISSAMEDIR); -+ } else -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ if (unlikely(err < 0)) -+ goto out_children; -+ if (err) -+ au_fset_ren(a->flags, WHSRC); ++static inline void au_hn_resume(struct au_hinode *hdir) ++{ ++ au_hn_ctl(hdir, /*do_set*/1); ++} + -+ /* lock them all */ -+ err = au_ren_lock(a); -+ if (unlikely(err)) -+ goto out_children; ++static inline void au_hn_imtx_lock(struct au_hinode *hdir) ++{ ++ mutex_lock(&hdir->hi_inode->i_mutex); ++ au_hn_suspend(hdir); ++} + -+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) -+ err = au_may_ren(a); -+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ err = -ENAMETOOLONG; -+ if (unlikely(err)) -+ goto out_hdir; ++static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir, ++ unsigned int sc __maybe_unused) ++{ ++ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc); ++ au_hn_suspend(hdir); ++} + -+ /* store timestamps to be revertible */ -+ au_ren_dt(a); ++static inline void au_hn_imtx_unlock(struct au_hinode *hdir) ++{ ++ au_hn_resume(hdir); ++ mutex_unlock(&hdir->hi_inode->i_mutex); ++} + -+ /* here we go */ -+ err = do_rename(a); -+ if (unlikely(err)) -+ goto out_dt; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_INODE_H__ */ +--- /dev/null ++++ linux-3.9/fs/aufs/ioctl.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,196 @@ ++/* ++ * Copyright (C) 2005-2013 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ /* update dir attributes */ -+ au_ren_refresh_dir(a); ++/* ++ * ioctl ++ * plink-management and readdir in userspace. ++ * assist the pathconf(3) wrapper library. ++ */ + -+ /* dput/iput all lower dentries */ -+ au_ren_refresh(a); ++#include "aufs.h" + -+ goto out_hdir; /* success */ ++static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg) ++{ ++ int err, fd; ++ aufs_bindex_t wbi, bindex, bend; ++ struct file *h_file; ++ struct super_block *sb; ++ struct dentry *root; ++ struct au_branch *br; ++ struct aufs_wbr_fd wbrfd = { ++ .oflags = au_dir_roflags, ++ .brid = -1 ++ }; ++ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY ++ | O_NOATIME | O_CLOEXEC; + -+out_dt: -+ au_ren_rev_dt(err, a); -+out_hdir: -+ au_ren_unlock(a); -+out_children: -+ au_nhash_wh_free(&a->whlist); -+ if (err && a->dst_inode && a->dst_bstart != a->btgt) { -+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt); -+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL); -+ au_set_dbstart(a->dst_dentry, a->dst_bstart); ++ AuDebugOn(wbrfd.oflags & ~valid); ++ ++ if (arg) { ++ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd)); ++ if (unlikely(err)) { ++ err = -EFAULT; ++ goto out; ++ } ++ ++ err = -EINVAL; ++ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid); ++ wbrfd.oflags |= au_dir_roflags; ++ AuDbg("0%o\n", wbrfd.oflags); ++ if (unlikely(wbrfd.oflags & ~valid)) ++ goto out; + } -+out_parent: -+ if (!err) -+ d_move(a->src_dentry, a->dst_dentry); -+ else { -+ au_update_dbstart(a->dst_dentry); -+ if (!a->dst_inode) -+ d_drop(a->dst_dentry); ++ ++ fd = get_unused_fd(); ++ err = fd; ++ if (unlikely(fd < 0)) ++ goto out; ++ ++ h_file = ERR_PTR(-EINVAL); ++ wbi = 0; ++ br = NULL; ++ sb = path->dentry->d_sb; ++ root = sb->s_root; ++ aufs_read_lock(root, AuLock_IR); ++ bend = au_sbend(sb); ++ if (wbrfd.brid >= 0) { ++ wbi = au_br_index(sb, wbrfd.brid); ++ if (unlikely(wbi < 0 || wbi > bend)) ++ goto out_unlock; + } -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ di_write_unlock(a->dst_parent); -+ else -+ di_write_unlock2(a->src_parent, a->dst_parent); ++ ++ h_file = ERR_PTR(-ENOENT); ++ br = au_sbr(sb, wbi); ++ if (!au_br_writable(br->br_perm)) { ++ if (arg) ++ goto out_unlock; ++ ++ bindex = wbi + 1; ++ wbi = -1; ++ for (; bindex <= bend; bindex++) { ++ br = au_sbr(sb, bindex); ++ if (au_br_writable(br->br_perm)) { ++ wbi = bindex; ++ br = au_sbr(sb, wbi); ++ break; ++ } ++ } ++ } ++ AuDbg("wbi %d\n", wbi); ++ if (wbi >= 0) ++ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL); ++ +out_unlock: -+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry); -+out_free: -+ iput(a->dst_inode); -+ if (a->thargs) -+ au_whtmp_rmdir_free(a->thargs); -+ kfree(a); ++ aufs_read_unlock(root, AuLock_IR); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out_fd; ++ ++ atomic_dec(&br->br_count); /* cf. au_h_open() */ ++ fd_install(fd, h_file); ++ err = fd; ++ goto out; /* success */ ++ ++out_fd: ++ put_unused_fd(fd); +out: + AuTraceErr(err); + return err; +} ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/Kconfig 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,203 @@ -+config AUFS_FS -+ tristate "Aufs (Advanced multi layered unification filesystem) support" -+ depends on EXPERIMENTAL -+ help -+ Aufs is a stackable unification filesystem such as Unionfs, -+ which unifies several directories and provides a merged single -+ directory. -+ In the early days, aufs was entirely re-designed and -+ re-implemented Unionfs Version 1.x series. Introducing many -+ original ideas, approaches and improvements, it becomes totally -+ different from Unionfs while keeping the basic features. -+ -+if AUFS_FS -+choice -+ prompt "Maximum number of branches" -+ default AUFS_BRANCH_MAX_127 -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_127 -+ bool "127" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_511 -+ bool "511" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_1023 -+ bool "1023" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_32767 -+ bool "32767" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+endchoice + -+config AUFS_SBILIST -+ bool -+ depends on AUFS_MAGIC_SYSRQ || PROC_FS -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq or /proc, enabled automatically. ++/* ---------------------------------------------------------------------- */ + -+config AUFS_HNOTIFY -+ bool "Detect direct branch access (bypassing aufs)" -+ help -+ If you want to modify files on branches directly, eg. bypassing aufs, -+ and want aufs to detect the changes of them fully, then enable this -+ option and use 'udba=notify' mount option. -+ Currently there is only one available configuration, "fsnotify". -+ It will have a negative impact to the performance. -+ See detail in aufs.5. ++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; + -+choice -+ prompt "method" if AUFS_HNOTIFY -+ default AUFS_HFSNOTIFY -+config AUFS_HFSNOTIFY -+ bool "fsnotify" -+ select FSNOTIFY -+endchoice ++ switch (cmd) { ++ case AUFS_CTL_RDU: ++ case AUFS_CTL_RDU_INO: ++ err = au_rdu_ioctl(file, cmd, arg); ++ break; + -+config AUFS_EXPORT -+ bool "NFS-exportable aufs" -+ depends on EXPORTFS -+ help -+ If you want to export your mounted aufs via NFS, then enable this -+ option. There are several requirements for this configuration. -+ See detail in aufs.5. ++ case AUFS_CTL_WBR_FD: ++ err = au_wbr_fd(&file->f_path, (void __user *)arg); ++ break; + -+config AUFS_INO_T_64 -+ bool -+ depends on AUFS_EXPORT -+ depends on 64BIT && !(ALPHA || S390) -+ default y -+ help -+ Automatic configuration for internal use. -+ /* typedef unsigned long/int __kernel_ino_t */ -+ /* alpha and s390x are int */ ++ case AUFS_CTL_IBUSY: ++ err = au_ibusy_ioctl(file, arg); ++ break; + -+config AUFS_RDU -+ bool "Readdir in userspace" -+ help -+ Aufs has two methods to provide a merged view for a directory, -+ by a user-space library and by kernel-space natively. The latter -+ is always enabled but sometimes large and slow. -+ If you enable this option, install the library in aufs2-util -+ package, and set some environment variables for your readdir(3), -+ then the work will be handled in user-space which generally -+ shows better performance in most cases. -+ See detail in aufs.5. ++ default: ++ /* do not call the lower */ ++ AuDbg("0x%x\n", cmd); ++ err = -ENOTTY; ++ } + -+config AUFS_PROC_MAP -+ bool "support for /proc/maps and lsof(1)" -+ depends on PROC_FS -+ help -+ When you issue mmap(2) in aufs, it is actually a direct mmap(2) -+ call to the file on the branch fs since the file in aufs is -+ purely virtual. And the file path printed in /proc/maps (and -+ others) will be the path on the branch fs. In most cases, it -+ does no harm. But some utilities like lsof(1) may confuse since -+ the utility or user may expect the file path in aufs to be -+ printed. -+ To address this issue, aufs provides a patch which introduces a -+ new member called vm_prfile into struct vm_are_struct. The patch -+ is meaningless without enabling this configuration since nobody -+ sets the new vm_prfile member. -+ If you don't apply the patch, then enabling this configuration -+ will cause a compile error. -+ This approach is fragile since if someone else make some changes -+ around vm_file, then vm_prfile may not work anymore. As a -+ workaround such case, aufs provides this configuration. If you -+ disable it, then lsof(1) may produce incorrect result but the -+ problem will be gone even if the aufs patch is applied (I hope). ++ AuTraceErr(err); ++ return err; ++} + -+config AUFS_SP_IATTR -+ bool "Respect the attributes (mtime/ctime mainly) of special files" -+ help -+ When you write something to a special file, some attributes of it -+ (mtime/ctime mainly) may be updated. Generally such updates are -+ less important (actually some device drivers and NFS ignore -+ it). But some applications (such like test program) requires -+ such updates. If you need these updates, then enable this -+ configuration which introduces some overhead. -+ Currently this configuration handles FIFO only. ++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; + -+config AUFS_SHWH -+ bool "Show whiteouts" -+ help -+ If you want to make the whiteouts in aufs visible, then enable -+ this option and specify 'shwh' mount option. Although it may -+ sounds like philosophy or something, but in technically it -+ simply shows the name of whiteout with keeping its behaviour. ++ switch (cmd) { ++ case AUFS_CTL_WBR_FD: ++ err = au_wbr_fd(&file->f_path, (void __user *)arg); ++ break; + -+config AUFS_BR_RAMFS -+ bool "Ramfs (initramfs/rootfs) as an aufs branch" -+ help -+ If you want to use ramfs as an aufs branch fs, then enable this -+ option. Generally tmpfs is recommended. -+ Aufs prohibited them to be a branch fs by default, because -+ initramfs becomes unusable after switch_root or something -+ generally. If you sets initramfs as an aufs branch and boot your -+ system by switch_root, you will meet a problem easily since the -+ files in initramfs may be inaccessible. -+ Unless you are going to use ramfs as an aufs branch fs without -+ switch_root or something, leave it N. ++ default: ++ /* do not call the lower */ ++ AuDbg("0x%x\n", cmd); ++ err = -ENOTTY; ++ } + -+config AUFS_BR_FUSE -+ bool "Fuse fs as an aufs branch" -+ depends on FUSE_FS -+ select AUFS_POLL -+ help -+ If you want to use fuse-based userspace filesystem as an aufs -+ branch fs, then enable this option. -+ It implements the internal poll(2) operation which is -+ implemented by fuse only (curretnly). ++ AuTraceErr(err); ++ return err; ++} + -+config AUFS_POLL -+ bool -+ help -+ Automatic configuration for internal use. ++#ifdef CONFIG_COMPAT ++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ long err; + -+config AUFS_BR_HFSPLUS -+ bool "Hfsplus as an aufs branch" -+ depends on HFSPLUS_FS -+ default y -+ help -+ If you want to use hfsplus fs as an aufs branch fs, then enable -+ this option. This option introduces a small overhead at -+ copying-up a file on hfsplus. ++ switch (cmd) { ++ case AUFS_CTL_RDU: ++ case AUFS_CTL_RDU_INO: ++ err = au_rdu_compat_ioctl(file, cmd, arg); ++ break; + -+config AUFS_BDEV_LOOP -+ bool -+ depends on BLK_DEV_LOOP -+ default y -+ help -+ Automatic configuration for internal use. -+ Convert =[ym] into =y. ++ case AUFS_CTL_IBUSY: ++ err = au_ibusy_compat_ioctl(file, arg); ++ break; + -+config AUFS_DEBUG -+ bool "Debug aufs" -+ help -+ Enable this to compile aufs internal debug code. -+ It will have a negative impact to the performance. ++ default: ++ err = aufs_ioctl_dir(file, cmd, arg); ++ } + -+config AUFS_MAGIC_SYSRQ -+ bool -+ depends on AUFS_DEBUG && MAGIC_SYSRQ -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq, enabled automatically. -+endif ++ AuTraceErr(err); ++ return err; ++} ++ ++#if 0 /* unused yet */ ++long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg)); ++} ++#endif ++#endif --- /dev/null -+++ linux-3.x-rcN/fs/aufs/loop.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,133 @@ ++++ linux-3.9/fs/aufs/loop.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,135 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -18237,12 +18747,14 @@ +{ + int ret; + struct task_struct *tsk = current; ++ char c, comm[sizeof(tsk->comm)]; + + ret = 0; + if (tsk->flags & PF_KTHREAD) { -+ const char c = tsk->comm[4]; ++ get_task_comm(comm, tsk); ++ c = comm[4]; + ret = ('0' <= c && c <= '9' -+ && !strncmp(tsk->comm, "loop", 4)); ++ && !strncmp(comm, "loop", 4)); + } + + return ret; @@ -18318,7 +18830,7 @@ + kfree(au_warn_loopback_array); +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/loop.h 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/loop.h 2013-05-13 17:13:17.217133960 +0200 @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -18371,7 +18883,7 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_LOOP_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/magic.mk 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/magic.mk 2013-05-13 17:13:17.217133960 +0200 @@ -0,0 +1,54 @@ + +# defined in ${srctree}/fs/fuse/inode.c @@ -18428,53 +18940,8 @@ +ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b +endif --- /dev/null -+++ linux-3.x-rcN/fs/aufs/Makefile 2013-02-19 11:26:12.672456917 -0500 -@@ -0,0 +1,42 @@ -+ -+include ${src}/magic.mk -+ifeq (${CONFIG_AUFS_FS},m) -+include ${src}/conf.mk -+endif -+-include ${src}/priv_def.mk -+ -+# cf. include/linux/kernel.h -+# enable pr_debug -+ccflags-y += -DDEBUG -+# sparse requires the full pathname -+ifdef M -+ccflags-y += -include ${M}/../../include/linux/aufs_type.h -+else -+ccflags-y += -include ${srctree}/include/linux/aufs_type.h -+endif -+ -+obj-$(CONFIG_AUFS_FS) += aufs.o -+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ -+ wkq.o vfsub.o dcsub.o \ -+ cpup.o whout.o wbr_policy.o \ -+ dinfo.o dentry.o \ -+ dynop.o \ -+ finfo.o file.o f_op.o \ -+ dir.o vdir.o \ -+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ -+ ioctl.o -+ -+# all are boolean -+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o -+aufs-$(CONFIG_SYSFS) += sysfs.o -+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o -+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o -+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o -+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o -+aufs-$(CONFIG_AUFS_EXPORT) += export.o -+aufs-$(CONFIG_AUFS_POLL) += poll.o -+aufs-$(CONFIG_AUFS_RDU) += rdu.o -+aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o -+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o -+aufs-$(CONFIG_AUFS_DEBUG) += debug.o -+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o ---- /dev/null -+++ linux-3.x-rcN/fs/aufs/module.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,202 @@ ++++ linux-3.9/fs/aufs/module.c 2013-05-13 17:13:17.217133960 +0200 +@@ -0,0 +1,203 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -18580,6 +19047,7 @@ +MODULE_DESCRIPTION(AUFS_NAME + " -- Advanced multi layered unification filesystem"); +MODULE_VERSION(AUFS_VERSION); ++MODULE_ALIAS_FS(AUFS_NAME); + +/* this module parameter has no meaning when SYSFS is disabled */ +int sysaufs_brs = 1; @@ -18678,7 +19146,7 @@ +module_init(aufs_init); +module_exit(aufs_exit); --- /dev/null -+++ linux-3.x-rcN/fs/aufs/module.h 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/module.h 2013-05-13 17:13:17.217133960 +0200 @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -18786,8 +19254,8 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_MODULE_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/opts.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,1677 @@ ++++ linux-3.9/fs/aufs/opts.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,1697 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -18976,17 +19444,16 @@ + {0, NULL} +}; + -+static match_table_t brrattr = { ++static match_table_t brattr = { ++ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN}, + {AuBrRAttr_WH, AUFS_BRRATTR_WH}, -+ {0, NULL} -+}; -+ -+static match_table_t brwattr = { + {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH}, + {0, NULL} +}; + -+#define AuBrStr_LONGEST AUFS_BRPERM_RW "+" AUFS_BRWATTR_NLWH ++#define AuBrStr_LONGEST AUFS_BRPERM_RW \ ++ "+" AUFS_BRATTR_UNPIN \ ++ "+" AUFS_BRWATTR_NLWH + +static int br_attr_val(char *str, match_table_t table, substring_t args[]) +{ @@ -19017,7 +19484,7 @@ +static int noinline_for_stack br_perm_val(char *perm) +{ + int val; -+ char *p; ++ char *p, *q; + substring_t args[MAX_OPT_ARGS]; + + p = strchr(perm, '+'); @@ -19034,13 +19501,33 @@ + if (!p) + goto out; + -+ switch (val) { ++ p++; ++ while (1) { ++ q = strchr(p, '+'); ++ if (q) ++ *q = 0; ++ val |= br_attr_val(p, brattr, args); ++ if (q) { ++ *q = '+'; ++ p = q + 1; ++ } else ++ break; ++ } ++ switch (val & AuBrPerm_Mask) { + case AuBrPerm_RO: + case AuBrPerm_RR: -+ val |= br_attr_val(p + 1, brrattr, args); ++ if (unlikely(val & AuBrWAttr_NoLinkWH)) { ++ pr_warn("ignored branch attribute %s\n", ++ AUFS_BRWATTR_NLWH); ++ val &= ~AuBrWAttr_NoLinkWH; ++ } + break; + case AuBrPerm_RW: -+ val |= br_attr_val(p + 1, brwattr, args); ++ if (unlikely(val & AuBrRAttr_WH)) { ++ pr_warn("ignored branch attribute %s\n", ++ AUFS_BRRATTR_WH); ++ val &= ~AuBrRAttr_WH; ++ } + break; + } + @@ -19083,6 +19570,7 @@ + AuDebugOn(1); + } + ++ AppendAttr(AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN); + AppendAttr(AuBrRAttr_WH, AUFS_BRRATTR_WH); + AppendAttr(AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH); + @@ -20308,7 +20796,7 @@ + au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); + if (wbr) + wbr_wh_write_lock(wbr); -+ err = au_wh_init(au_h_dptr(root, bindex), br, sb); ++ err = au_wh_init(br, sb); + if (wbr) + wbr_wh_write_unlock(wbr); + au_hn_imtx_unlock(hdir); @@ -20466,7 +20954,7 @@ + return au_mntflags(sb) & AuOptMask_UDBA; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/opts.h 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/opts.h 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,209 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -20678,8 +21166,8 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_OPTS_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/plink.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,511 @@ ++++ linux-3.9/fs/aufs/plink.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,520 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -20801,19 +21289,12 @@ + +/* ---------------------------------------------------------------------- */ + -+struct pseudo_link { -+ union { -+ struct list_head list; -+ struct rcu_head rcu; -+ }; -+ struct inode *inode; -+}; -+ +#ifdef CONFIG_AUFS_DEBUG +void au_plink_list(struct super_block *sb) +{ ++ int i; + struct au_sbinfo *sbinfo; -+ struct list_head *plink_list; ++ struct hlist_head *plink_hlist; + struct pseudo_link *plink; + + SiMustAnyLock(sb); @@ -20822,20 +21303,22 @@ + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); + -+ plink_list = &sbinfo->si_plink.head; -+ rcu_read_lock(); -+ list_for_each_entry_rcu(plink, plink_list, list) -+ AuDbg("%lu\n", plink->inode->i_ino); -+ rcu_read_unlock(); ++ for (i = 0; i < AuPlink_NHASH; i++) { ++ plink_hlist = &sbinfo->si_plink[i].head; ++ rcu_read_lock(); ++ hlist_for_each_entry_rcu(plink, plink_hlist, hlist) ++ AuDbg("%lu\n", plink->inode->i_ino); ++ rcu_read_unlock(); ++ } +} +#endif + +/* is the inode pseudo-linked? */ +int au_plink_test(struct inode *inode) +{ -+ int found; ++ int found, i; + struct au_sbinfo *sbinfo; -+ struct list_head *plink_list; ++ struct hlist_head *plink_hlist; + struct pseudo_link *plink; + + sbinfo = au_sbi(inode->i_sb); @@ -20844,9 +21327,10 @@ + AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM)); + + found = 0; -+ plink_list = &sbinfo->si_plink.head; ++ i = au_plink_hash(inode->i_ino); ++ plink_hlist = &sbinfo->si_plink[i].head; + rcu_read_lock(); -+ list_for_each_entry_rcu(plink, plink_list, list) ++ hlist_for_each_entry_rcu(plink, plink_hlist, hlist) + if (plink->inode == inode) { + found = 1; + break; @@ -20942,7 +21426,7 @@ +{ + int err; + struct path h_path = { -+ .mnt = br->br_mnt ++ .mnt = au_br_mnt(br) + }; + struct inode *h_dir; + @@ -21025,7 +21509,7 @@ +static void do_put_plink(struct pseudo_link *plink, int do_del) +{ + if (do_del) -+ list_del(&plink->list); ++ hlist_del(&plink->hlist); + iput(plink->inode); + kfree(plink); +} @@ -21048,30 +21532,23 @@ +{ + struct super_block *sb; + struct au_sbinfo *sbinfo; -+ struct list_head *plink_list; ++ struct hlist_head *plink_hlist; + struct pseudo_link *plink, *tmp; -+ int found, err, cnt; ++ struct au_sphlhead *sphl; ++ int found, err, cnt, i; + + sb = inode->i_sb; + sbinfo = au_sbi(sb); + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); + -+ cnt = 0; -+ found = 0; -+ plink_list = &sbinfo->si_plink.head; -+ rcu_read_lock(); -+ list_for_each_entry_rcu(plink, plink_list, list) { -+ cnt++; -+ if (plink->inode == inode) { -+ found = 1; -+ break; -+ } -+ } -+ rcu_read_unlock(); ++ found = au_plink_test(inode); + if (found) + return; + ++ i = au_plink_hash(inode->i_ino); ++ sphl = sbinfo->si_plink + i; ++ plink_hlist = &sphl->head; + tmp = kmalloc(sizeof(*plink), GFP_NOFS); + if (tmp) + tmp->inode = au_igrab(inode); @@ -21080,20 +21557,22 @@ + goto out; + } + -+ spin_lock(&sbinfo->si_plink.spin); -+ list_for_each_entry(plink, plink_list, list) { ++ spin_lock(&sphl->spin); ++ hlist_for_each_entry(plink, plink_hlist, hlist) { + if (plink->inode == inode) { + found = 1; + break; + } + } + if (!found) -+ list_add_rcu(&tmp->list, plink_list); -+ spin_unlock(&sbinfo->si_plink.spin); ++ hlist_add_head_rcu(&tmp->hlist, plink_hlist); ++ spin_unlock(&sphl->spin); + if (!found) { -+ cnt++; -+ WARN_ONCE(cnt > AUFS_PLINK_WARN, -+ "unexpectedly many pseudo links, %d\n", cnt); ++ cnt = au_sphl_count(sphl); ++#define msg "unexpectedly unblanced or too many pseudo-links" ++ if (cnt > AUFS_PLINK_WARN) ++ AuWarn1(msg ", %d\n", cnt); ++#undef msg + err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex)); + } else { + do_put_plink(tmp, 0); @@ -21104,7 +21583,7 @@ + if (unlikely(err)) { + pr_warn("err %d, damaged pseudo link.\n", err); + if (tmp) { -+ au_spl_del_rcu(&tmp->list, &sbinfo->si_plink); ++ au_sphl_del_rcu(&tmp->hlist, sphl); + call_rcu(&tmp->rcu, do_put_plink_rcu); + } + } @@ -21113,9 +21592,11 @@ +/* free all plinks */ +void au_plink_put(struct super_block *sb, int verbose) +{ ++ int i, warned; + struct au_sbinfo *sbinfo; -+ struct list_head *plink_list; -+ struct pseudo_link *plink, *tmp; ++ struct hlist_head *plink_hlist; ++ struct hlist_node *tmp; ++ struct pseudo_link *plink; + + SiMustWriteLock(sb); + @@ -21123,12 +21604,18 @@ + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); + -+ plink_list = &sbinfo->si_plink.head; + /* no spin_lock since sbinfo is write-locked */ -+ WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed"); -+ list_for_each_entry_safe(plink, tmp, plink_list, list) -+ do_put_plink(plink, 0); -+ INIT_LIST_HEAD(plink_list); ++ warned = 0; ++ for (i = 0; i < AuPlink_NHASH; i++) { ++ plink_hlist = &sbinfo->si_plink[i].head; ++ if (!warned && verbose && !hlist_empty(plink_hlist)) { ++ pr_warn("pseudo-link is not flushed"); ++ warned = 1; ++ } ++ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) ++ do_put_plink(plink, 0); ++ INIT_HLIST_HEAD(plink_hlist); ++ } +} + +void au_plink_clean(struct super_block *sb, int verbose) @@ -21142,15 +21629,44 @@ + aufs_write_unlock(root); +} + ++static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id) ++{ ++ int do_put; ++ aufs_bindex_t bstart, bend, bindex; ++ ++ do_put = 0; ++ bstart = au_ibstart(inode); ++ bend = au_ibend(inode); ++ if (bstart >= 0) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (!au_h_iptr(inode, bindex) ++ || au_ii_br_id(inode, bindex) != br_id) ++ continue; ++ au_set_h_iptr(inode, bindex, NULL, 0); ++ do_put = 1; ++ break; ++ } ++ if (do_put) ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (au_h_iptr(inode, bindex)) { ++ do_put = 0; ++ break; ++ } ++ } else ++ do_put = 1; ++ ++ return do_put; ++} ++ +/* free the plinks on a branch specified by @br_id */ +void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id) +{ + struct au_sbinfo *sbinfo; -+ struct list_head *plink_list; -+ struct pseudo_link *plink, *tmp; ++ struct hlist_head *plink_hlist; ++ struct hlist_node *tmp; ++ struct pseudo_link *plink; + struct inode *inode; -+ aufs_bindex_t bstart, bend, bindex; -+ unsigned char do_put; ++ int i, do_put; + + SiMustWriteLock(sb); + @@ -21158,41 +21674,22 @@ + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); + -+ plink_list = &sbinfo->si_plink.head; + /* no spin_lock since sbinfo is write-locked */ -+ list_for_each_entry_safe(plink, tmp, plink_list, list) { -+ do_put = 0; -+ inode = au_igrab(plink->inode); -+ ii_write_lock_child(inode); -+ bstart = au_ibstart(inode); -+ bend = au_ibend(inode); -+ if (bstart >= 0) { -+ for (bindex = bstart; bindex <= bend; bindex++) { -+ if (!au_h_iptr(inode, bindex) -+ || au_ii_br_id(inode, bindex) != br_id) -+ continue; -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ do_put = 1; -+ break; -+ } -+ } else -+ do_put_plink(plink, 1); -+ -+ if (do_put) { -+ for (bindex = bstart; bindex <= bend; bindex++) -+ if (au_h_iptr(inode, bindex)) { -+ do_put = 0; -+ break; -+ } ++ for (i = 0; i < AuPlink_NHASH; i++) { ++ plink_hlist = &sbinfo->si_plink[i].head; ++ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) { ++ inode = au_igrab(plink->inode); ++ ii_write_lock_child(inode); ++ do_put = au_plink_do_half_refresh(inode, br_id); + if (do_put) + do_put_plink(plink, 1); ++ ii_write_unlock(inode); ++ iput(inode); + } -+ ii_write_unlock(inode); -+ iput(inode); + } +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/poll.c 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/poll.c 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -21251,7 +21748,7 @@ + return mask; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/procfs.c 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/procfs.c 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2010-2013 Junjiro R. Okajima @@ -21424,7 +21921,7 @@ + return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/rdu.c 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/rdu.c 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,384 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -21811,7 +22308,7 @@ +} +#endif --- /dev/null -+++ linux-3.x-rcN/fs/aufs/rwsem.h 2013-02-19 11:26:12.679123764 -0500 ++++ linux-3.9/fs/aufs/rwsem.h 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -22002,8 +22499,8 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_RWSEM_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/sbinfo.c 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,343 @@ ++++ linux-3.9/fs/aufs/sbinfo.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,346 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -22033,11 +22530,13 @@ + */ +void au_si_free(struct kobject *kobj) +{ ++ int i; + struct au_sbinfo *sbinfo; + char *locked __maybe_unused; /* debug only */ + + sbinfo = container_of(kobj, struct au_sbinfo, si_kobj); -+ AuDebugOn(!list_empty(&sbinfo->si_plink.head)); ++ for (i = 0; i < AuPlink_NHASH; i++) ++ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head)); + AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len)); + + au_rw_write_lock(&sbinfo->si_rwsem); @@ -22059,7 +22558,7 @@ + +int au_si_alloc(struct super_block *sb) +{ -+ int err; ++ int err, i; + struct au_sbinfo *sbinfo; + static struct lock_class_key aufs_si; + @@ -22112,7 +22611,8 @@ + sbinfo->si_rdhash = AUFS_RDHASH_DEF; + sbinfo->si_dirwh = AUFS_DIRWH_DEF; + -+ au_spl_init(&sbinfo->si_plink); ++ for (i = 0; i < AuPlink_NHASH; i++) ++ au_sphl_init(sbinfo->si_plink + i); + init_waitqueue_head(&sbinfo->si_plink_wq); + spin_lock_init(&sbinfo->si_plink_maint_lock); + @@ -22348,8 +22848,8 @@ + spin_unlock(&sbinfo->au_si_pid.tree_lock); +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/spl.h 2013-02-19 11:26:12.679123764 -0500 -@@ -0,0 +1,62 @@ ++++ linux-3.9/fs/aufs/spl.h 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,112 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -22410,11 +22910,61 @@ + spin_unlock(&spl->spin); +} + ++/* ---------------------------------------------------------------------- */ ++ ++struct au_sphlhead { ++ spinlock_t spin; ++ struct hlist_head head; ++}; ++ ++static inline void au_sphl_init(struct au_sphlhead *sphl) ++{ ++ spin_lock_init(&sphl->spin); ++ INIT_HLIST_HEAD(&sphl->head); ++} ++ ++static inline void au_sphl_add(struct hlist_node *hlist, ++ struct au_sphlhead *sphl) ++{ ++ spin_lock(&sphl->spin); ++ hlist_add_head(hlist, &sphl->head); ++ spin_unlock(&sphl->spin); ++} ++ ++static inline void au_sphl_del(struct hlist_node *hlist, ++ struct au_sphlhead *sphl) ++{ ++ spin_lock(&sphl->spin); ++ hlist_del(hlist); ++ spin_unlock(&sphl->spin); ++} ++ ++static inline void au_sphl_del_rcu(struct hlist_node *hlist, ++ struct au_sphlhead *sphl) ++{ ++ spin_lock(&sphl->spin); ++ hlist_del_rcu(hlist); ++ spin_unlock(&sphl->spin); ++} ++ ++static inline unsigned long au_sphl_count(struct au_sphlhead *sphl) ++{ ++ unsigned long cnt; ++ struct hlist_node *pos; ++ ++ cnt = 0; ++ spin_lock(&sphl->spin); ++ hlist_for_each(pos, &sphl->head) ++ cnt++; ++ spin_unlock(&sphl->spin); ++ return cnt; ++} ++ +#endif /* __KERNEL__ */ +#endif /* __AUFS_SPL_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/super.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,993 @@ ++++ linux-3.9/fs/aufs/super.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,992 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -22521,7 +23071,7 @@ + hdp = au_di(sb->s_root)->di_hdentry; + for (bindex = 0; !err && bindex <= bend; bindex++) { + br = au_sbr(sb, bindex); -+ path.mnt = br->br_mnt; ++ path.mnt = au_br_mnt(br); + path.dentry = hdp[bindex].hd_dentry; + err = au_seq_path(seq, &path); + if (err > 0) { @@ -23400,17 +23950,16 @@ + +struct file_system_type aufs_fs_type = { + .name = AUFS_FSTYPE, -+ .fs_flags = -+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */ -+ | FS_REVAL_DOT, /* for NFS branch and udba */ ++ /* a race between rename and others */ ++ .fs_flags = FS_RENAME_DOES_D_MOVE, + .mount = aufs_mount, + .kill_sb = aufs_kill_sb, + /* no need to __module_get() and module_put(). */ + .owner = THIS_MODULE, +}; --- /dev/null -+++ linux-3.x-rcN/fs/aufs/super.h 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,546 @@ ++++ linux-3.9/fs/aufs/super.h 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,555 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -23468,6 +24017,20 @@ + unsigned long long mfsrr_watermark; +}; + ++struct pseudo_link { ++ union { ++ struct hlist_node hlist; ++ struct rcu_head rcu; ++ }; ++ struct inode *inode; ++}; ++ ++#define AuPlink_NHASH 100 ++static inline int au_plink_hash(ino_t ino) ++{ ++ return ino % AuPlink_NHASH; ++} ++ +struct au_branch; +struct au_sbinfo { + /* nowait tasks in the system-wide workqueue */ @@ -23558,7 +24121,7 @@ + /* int si_rendir; */ + + /* pseudo_link list */ -+ struct au_splhead si_plink; ++ struct au_sphlhead si_plink[AuPlink_NHASH]; + wait_queue_head_t si_plink_wq; + spinlock_t si_plink_maint_lock; + pid_t si_plink_maint_pid; @@ -23571,7 +24134,9 @@ + */ + struct kobject si_kobj; +#ifdef CONFIG_DEBUG_FS -+ struct dentry *si_dbgaufs, *si_dbgaufs_xib; ++ struct dentry *si_dbgaufs; ++ struct dentry *si_dbgaufs_plink; ++ struct dentry *si_dbgaufs_xib; +#ifdef CONFIG_AUFS_EXPORT + struct dentry *si_dbgaufs_xigen; +#endif @@ -23679,16 +24244,8 @@ +/* ---------------------------------------------------------------------- */ + +#ifdef CONFIG_AUFS_EXPORT ++int au_test_nfsd(void); +void au_export_init(struct super_block *sb); -+ -+static inline int au_test_nfsd(void) -+{ -+ struct task_struct *tsk = current; -+ -+ return (tsk->flags & PF_KTHREAD) -+ && !strcmp(tsk->comm, "nfsd"); -+} -+ +void au_xigen_inc(struct inode *inode); +int au_xigen_new(struct inode *inode); +int au_xigen_set(struct super_block *sb, struct file *base); @@ -23701,8 +24258,8 @@ + return -ESTALE; +} +#else -+AuStubVoid(au_export_init, struct super_block *sb) +AuStubInt0(au_test_nfsd, void) ++AuStubVoid(au_export_init, struct super_block *sb) +AuStubVoid(au_xigen_inc, struct inode *inode) +AuStubInt0(au_xigen_new, struct inode *inode) +AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base) @@ -23770,6 +24327,7 @@ + /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ +#ifdef CONFIG_DEBUG_FS + sbinfo->si_dbgaufs = NULL; ++ sbinfo->si_dbgaufs_plink = NULL; + sbinfo->si_dbgaufs_xib = NULL; +#ifdef CONFIG_AUFS_EXPORT + sbinfo->si_dbgaufs_xigen = NULL; @@ -23958,7 +24516,7 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_SUPER_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/sysaufs.c 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/fs/aufs/sysaufs.c 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -24066,7 +24624,7 @@ + return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/sysaufs.h 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/fs/aufs/sysaufs.h 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -24173,7 +24731,7 @@ +#endif /* __KERNEL__ */ +#endif /* __SYSAUFS_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/sysfs.c 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/fs/aufs/sysfs.c 2013-05-13 17:13:17.220468929 +0200 @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -24269,7 +24827,7 @@ + root = sb->s_root; + di_read_lock_parent(root, !AuLock_IR); + br = au_sbr(sb, bindex); -+ path.mnt = br->br_mnt; ++ path.mnt = au_br_mnt(br); + path.dentry = au_h_dptr(root, bindex); + au_seq_path(seq, &path); + di_read_unlock(root, !AuLock_IR); @@ -24433,8 +24991,8 @@ + } +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/sysrq.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,148 @@ ++++ linux-3.9/fs/aufs/sysrq.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,151 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -24472,16 +25030,18 @@ + plevel = au_plevel; + au_plevel = KERN_WARNING; + -+ sbinfo = au_sbi(sb); + /* since we define pr_fmt, call printk directly */ ++#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str) ++ ++ sbinfo = au_sbi(sb); + printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo)); -+ printk(KERN_WARNING AUFS_NAME ": superblock\n"); ++ pr("superblock\n"); + au_dpri_sb(sb); + +#if 0 -+ printk(KERN_WARNING AUFS_NAME ": root dentry\n"); ++ pr("root dentry\n"); + au_dpri_dentry(sb->s_root); -+ printk(KERN_WARNING AUFS_NAME ": root inode\n"); ++ pr("root inode\n"); + au_dpri_inode(sb->s_root->d_inode); +#endif + @@ -24509,7 +25069,7 @@ +#if 1 + { + struct inode *i; -+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n"); ++ pr("isolated inode\n"); + spin_lock(&inode_sb_list_lock); + list_for_each_entry(i, &sb->s_inodes, i_sb_list) { + spin_lock(&i->i_lock); @@ -24520,17 +25080,18 @@ + spin_unlock(&inode_sb_list_lock); + } +#endif -+ printk(KERN_WARNING AUFS_NAME ": files\n"); ++ pr("files\n"); + lg_global_lock(&files_lglock); + do_file_list_for_each_entry(sb, file) { + umode_t mode; -+ mode = file->f_dentry->d_inode->i_mode; ++ mode = file_inode(file)->i_mode; + if (!special_file(mode) || au_special_file(mode)) + au_dpri_file(file); + } while_file_list_for_each_entry; + lg_global_unlock(&files_lglock); -+ printk(KERN_WARNING AUFS_NAME ": done\n"); ++ pr("done\n"); + ++#undef pr + au_plevel = plevel; +} + @@ -24584,8 +25145,8 @@ + pr_err("err %d (ignored)\n", err); +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/vdir.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,885 @@ ++++ linux-3.9/fs/aufs/vdir.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,878 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -24693,24 +25254,20 @@ + +static void au_nhash_wh_do_free(struct hlist_head *head) +{ -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos, *node; ++ struct au_vdir_wh *pos; ++ struct hlist_node *node; + -+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) { -+ /* hlist_del(pos); */ -+ kfree(tpos); -+ } ++ hlist_for_each_entry_safe(pos, node, head, wh_hash) ++ kfree(pos); +} + +static void au_nhash_de_do_free(struct hlist_head *head) +{ -+ struct au_vdir_dehstr *tpos; -+ struct hlist_node *pos, *node; ++ struct au_vdir_dehstr *pos; ++ struct hlist_node *node; + -+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) { -+ /* hlist_del(pos); */ -+ au_cache_free_vdir_dehstr(tpos); -+ } ++ hlist_for_each_entry_safe(pos, node, head, hash) ++ au_cache_free_vdir_dehstr(pos); +} + +static void au_nhash_do_free(struct au_nhash *nhash, @@ -24749,15 +25306,14 @@ + int num; + unsigned int u, n; + struct hlist_head *head; -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos; ++ struct au_vdir_wh *pos; + + num = 0; + n = whlist->nh_num; + head = whlist->nh_head; + for (u = 0; u < n; u++, head++) -+ hlist_for_each_entry(tpos, pos, head, wh_hash) -+ if (tpos->wh_bindex == btgt && ++num > limit) ++ hlist_for_each_entry(pos, head, wh_hash) ++ if (pos->wh_bindex == btgt && ++num > limit) + return 1; + return 0; +} @@ -24789,13 +25345,12 @@ +int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen) +{ + struct hlist_head *head; -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos; ++ struct au_vdir_wh *pos; + struct au_vdir_destr *str; + + head = au_name_hash(whlist, name, nlen); -+ hlist_for_each_entry(tpos, pos, head, wh_hash) { -+ str = &tpos->wh_str; ++ hlist_for_each_entry(pos, head, wh_hash) { ++ str = &pos->wh_str; + AuDbg("%.*s\n", str->len, str->name); + if (au_nhash_test_name(str, name, nlen)) + return 1; @@ -24807,13 +25362,12 @@ +static int test_known(struct au_nhash *delist, char *name, int nlen) +{ + struct hlist_head *head; -+ struct au_vdir_dehstr *tpos; -+ struct hlist_node *pos; ++ struct au_vdir_dehstr *pos; + struct au_vdir_destr *str; + + head = au_name_hash(delist, name, nlen); -+ hlist_for_each_entry(tpos, pos, head, hash) { -+ str = tpos->str; ++ hlist_for_each_entry(pos, head, hash) { ++ str = pos->str; + AuDbg("%.*s\n", str->len, str->name); + if (au_nhash_test_name(str, name, nlen)) + return 1; @@ -25102,8 +25656,8 @@ + int err; + unsigned int nh, u; + struct hlist_head *head; -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos, *n; ++ struct au_vdir_wh *pos; ++ struct hlist_node *n; + char *p, *o; + struct au_vdir_destr *destr; + @@ -25120,11 +25674,11 @@ + p += AUFS_WH_PFX_LEN; + for (u = 0; u < nh; u++) { + head = whlist->nh_head + u; -+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) { -+ destr = &tpos->wh_str; ++ hlist_for_each_entry_safe(pos, n, head, wh_hash) { ++ destr = &pos->wh_str; + memcpy(p, destr->name, destr->len); + err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN, -+ tpos->wh_ino, tpos->wh_type, delist); ++ pos->wh_ino, pos->wh_type, delist); + if (unlikely(err)) + break; + } @@ -25220,7 +25774,7 @@ + struct au_vdir *vdir, *allocated; + + err = 0; -+ inode = file->f_dentry->d_inode; ++ inode = file_inode(file); + IMustLock(inode); + SiMustAnyLock(inode->i_sb); + @@ -25344,7 +25898,7 @@ + } else + return 0; /* success */ + -+ inode = file->f_dentry->d_inode; ++ inode = file_inode(file); + err = copy_vdir(vdir_cache, au_ivdir(inode)); + if (!err) { + file->f_version = inode->i_version; @@ -25472,8 +26026,8 @@ + return 0; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/vfsub.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,777 @@ ++++ linux-3.9/fs/aufs/vfsub.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,769 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -25517,7 +26071,7 @@ + h_sb = h_path->dentry->d_sb; + *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb)); + if (*did) -+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st); ++ err = vfs_getattr(h_path, &st); + + return err; +} @@ -26016,23 +26570,18 @@ +{ + int err; + struct inode *h_inode; ++ struct super_block *h_sb; + -+ h_inode = h_path->dentry->d_inode; + if (!h_file) { -+ err = vfsub_mnt_want_write(h_path->mnt); -+ if (err) -+ goto out; -+ err = inode_permission(h_inode, MAY_WRITE); -+ if (err) -+ goto out_mnt; -+ err = get_write_access(h_inode); -+ if (err) -+ goto out_mnt; -+ err = break_lease(h_inode, O_WRONLY); -+ if (err) -+ goto out_inode; ++ err = vfsub_truncate(h_path, length); ++ goto out; + } + ++ h_inode = h_path->dentry->d_inode; ++ h_sb = h_inode->i_sb; ++ lockdep_off(); ++ sb_start_write(h_sb); ++ lockdep_on(); + err = locks_verify_truncate(h_inode, h_file, length); + if (!err) + err = security_path_truncate(h_path); @@ -26041,13 +26590,10 @@ + err = do_truncate(h_path->dentry, length, attr, h_file); + lockdep_on(); + } ++ lockdep_off(); ++ sb_end_write(h_sb); ++ lockdep_on(); + -+out_inode: -+ if (!h_file) -+ put_write_access(h_inode); -+out_mnt: -+ if (!h_file) -+ vfsub_mnt_drop_write(h_path->mnt); +out: + return err; +} @@ -26252,8 +26798,8 @@ + return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/vfsub.h 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,284 @@ ++++ linux-3.9/fs/aufs/vfsub.h 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,294 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -26289,6 +26835,7 @@ +/* copied from linux/fs/internal.h */ +/* todo: BAD approach!! */ +extern struct lglock vfsmount_lock; ++extern void __mnt_drop_write(struct vfsmount *); +extern spinlock_t inode_sb_list_lock; + +/* copied from linux/fs/file_table.c */ @@ -26358,19 +26905,6 @@ + +/* ---------------------------------------------------------------------- */ + -+/* cf. i_[ug]id_read() in linux/include/fs.h */ -+static inline uid_t vfsub_ia_uid(struct iattr *ia) -+{ -+ return from_kuid(&init_user_ns, ia->ia_uid); -+} -+ -+static inline gid_t vfsub_ia_gid(struct iattr *ia) -+{ -+ return from_kgid(&init_user_ns, ia->ia_gid); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ +int vfsub_update_h_iattr(struct path *h_path, int *did); +struct file *vfsub_dentry_open(struct path *path, int flags); +struct file *vfsub_filp_open(const char *path, int oflags, int mode); @@ -26411,6 +26945,13 @@ + lockdep_on(); +} + ++static inline void vfsub_mnt_drop_write_file(struct file *file) ++{ ++ lockdep_off(); ++ mnt_drop_write_file(file); ++ lockdep_on(); ++} ++ +/* ---------------------------------------------------------------------- */ + +struct au_hinode; @@ -26444,6 +26985,11 @@ +int vfsub_flush(struct file *file, fl_owner_t id); +int vfsub_readdir(struct file *file, filldir_t filldir, void *arg); + ++static inline loff_t vfsub_f_size_read(struct file *file) ++{ ++ return i_size_read(file_inode(file)); ++} ++ +static inline unsigned int vfsub_file_flags(struct file *file) +{ + unsigned int flags; @@ -26484,6 +27030,16 @@ + unsigned int flags); +long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags); ++ ++static inline long vfsub_truncate(struct path *path, loff_t length) ++{ ++ long err; ++ lockdep_off(); ++ err = vfs_truncate(path, length); ++ lockdep_on(); ++ return err; ++} ++ +int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr, + struct file *h_file); +int vfsub_fsync(struct file *file, struct path *path, int datasync); @@ -26539,8 +27095,8 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_VFSUB_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/wbr_policy.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,700 @@ ++++ linux-3.9/fs/aufs/wbr_policy.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,701 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -26580,7 +27136,7 @@ + ia.ia_uid = h_isrc->i_uid; + ia.ia_gid = h_isrc->i_gid; + sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID)); -+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc); ++ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc->i_flags); + err = vfsub_sio_notify_change(h_path, &ia); + + /* is this nfs only? */ @@ -26640,7 +27196,7 @@ + + err = 0; + if (h_path.dentry->d_inode) { -+ h_path.mnt = br->br_mnt; ++ h_path.mnt = au_br_mnt(br); + err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path, + dentry); + } @@ -26651,6 +27207,7 @@ +} + +static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst, ++ struct au_pin *pin, + struct dentry *h_parent, void *arg) +{ + int err, rerr; @@ -26668,7 +27225,7 @@ + AuDebugOn(h_dir != au_h_iptr(dir, bdst)); + IMustLock(h_dir); + -+ err = au_lkup_neg(dentry, bdst); ++ err = au_lkup_neg(dentry, bdst, /*wh*/0); + if (unlikely(err < 0)) + goto out; + h_path.dentry = au_h_dptr(dentry, bdst); @@ -26974,7 +27531,7 @@ + continue; + + /* sb->s_root for NFS is unreliable */ -+ h_path.mnt = br->br_mnt; ++ h_path.mnt = au_br_mnt(br); + h_path.dentry = h_path.mnt->mnt_root; + err = vfs_statfs(&h_path, st); + if (unlikely(err)) { @@ -27242,8 +27799,8 @@ + } +}; --- /dev/null -+++ linux-3.x-rcN/fs/aufs/whout.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,1042 @@ ++++ linux-3.9/fs/aufs/whout.c 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,1022 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -27420,7 +27977,7 @@ +{ + int err; + struct path h_path = { -+ .mnt = br->br_mnt ++ .mnt = au_br_mnt(br) + }; + struct inode *h_dir; + struct dentry *h_parent; @@ -27479,7 +28036,7 @@ +{ + int err; + struct path h_path = { -+ .mnt = br->br_mnt ++ .mnt = au_br_mnt(br) + }; + + err = 0; @@ -27509,14 +28066,10 @@ + if (!whpath->dentry->d_inode) + return; + -+ err = vfsub_mnt_want_write(whpath->mnt); -+ if (!err) { -+ if (isdir) -+ err = vfsub_rmdir(h_dir, whpath); -+ else -+ err = vfsub_unlink(h_dir, whpath, /*force*/0); -+ vfsub_mnt_drop_write(whpath->mnt); -+ } ++ if (isdir) ++ err = vfsub_rmdir(h_dir, whpath); ++ else ++ err = vfsub_unlink(h_dir, whpath, /*force*/0); + if (unlikely(err)) + pr_warn("failed removing %.*s (%d), ignored.\n", + AuDLNPair(whpath->dentry), err); @@ -27545,11 +28098,7 @@ + + if (au_test_nfs(path->dentry->d_sb)) + mode |= S_IXUGO; -+ err = vfsub_mnt_want_write(path->mnt); -+ if (!err) { -+ err = vfsub_mkdir(h_dir, path, mode); -+ vfsub_mnt_drop_write(path->mnt); -+ } ++ err = vfsub_mkdir(h_dir, path, mode); + } else if (S_ISDIR(path->dentry->d_inode->i_mode)) + err = 0; + else @@ -27643,13 +28192,8 @@ + err = -EEXIST; + h_dir = h_root->d_inode; + if (!base[AuBrWh_BASE].dentry->d_inode) { -+ err = vfsub_mnt_want_write(h_path->mnt); -+ if (!err) { -+ h_path->dentry = base[AuBrWh_BASE].dentry; -+ err = vfsub_create(h_dir, h_path, WH_MASK, -+ /*want_excl*/true); -+ vfsub_mnt_drop_write(h_path->mnt); -+ } ++ h_path->dentry = base[AuBrWh_BASE].dentry; ++ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true); + } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode)) + err = 0; + else @@ -27681,16 +28225,14 @@ +/* + * initialize the whiteout base file/dir for @br. + */ -+int au_wh_init(struct dentry *h_root, struct au_branch *br, -+ struct super_block *sb) ++int au_wh_init(struct au_branch *br, struct super_block *sb) +{ + int err, i; + const unsigned char do_plink + = !!au_opt_test(au_mntflags(sb), PLINK); -+ struct path path = { -+ .mnt = br->br_mnt -+ }; + struct inode *h_dir; ++ struct path path = br->br_path; ++ struct dentry *h_root = path.dentry; + struct au_wbr *wbr = br->br_wbr; + static const struct qstr base_name[] = { + [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME, @@ -27804,19 +28346,16 @@ + dir = a->sb->s_root->d_inode; + hdir = au_hi(dir, bindex); + h_root = au_h_dptr(a->sb->s_root, bindex); ++ AuDebugOn(h_root != au_br_dentry(a->br)); + + au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); + wbr_wh_write_lock(wbr); + err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode, + h_root, a->br); + if (!err) { -+ err = vfsub_mnt_want_write(a->br->br_mnt); -+ if (!err) { -+ h_path.dentry = wbr->wbr_whbase; -+ h_path.mnt = a->br->br_mnt; -+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0); -+ vfsub_mnt_drop_write(a->br->br_mnt); -+ } ++ h_path.dentry = wbr->wbr_whbase; ++ h_path.mnt = au_br_mnt(a->br); ++ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0); + } else { + pr_warn("%.*s is moved, ignored\n", + AuDLNPair(wbr->wbr_whbase)); @@ -27825,7 +28364,7 @@ + dput(wbr->wbr_whbase); + wbr->wbr_whbase = NULL; + if (!err) -+ err = au_wh_init(h_root, a->br, a->sb); ++ err = au_wh_init(a->br, a->sb); + wbr_wh_write_unlock(wbr); + au_hn_imtx_unlock(hdir); + di_read_unlock(a->sb->s_root, AuLock_IR); @@ -27896,7 +28435,7 @@ + IMustLock(h_dir); + + br = au_sbr(sb, bindex); -+ h_path.mnt = br->br_mnt; ++ h_path.mnt = au_br_mnt(br); + wbr = br->br_wbr; + wbr_wh_read_lock(wbr); + if (wbr->wbr_whbase) { @@ -27945,7 +28484,7 @@ + } else { + struct path tmp = { + .dentry = opq_dentry, -+ .mnt = br->br_mnt ++ .mnt = au_br_mnt(br) + }; + err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp); + if (!err) @@ -28056,8 +28595,7 @@ + struct qstr wh_name; + char *p; + struct hlist_head *head; -+ struct au_vdir_wh *tpos; -+ struct hlist_node *pos; ++ struct au_vdir_wh *pos; + struct au_vdir_destr *str; + + err = -ENOMEM; @@ -28072,11 +28610,11 @@ + n = whlist->nh_num; + head = whlist->nh_head; + for (ul = 0; !err && ul < n; ul++, head++) { -+ hlist_for_each_entry(tpos, pos, head, wh_hash) { -+ if (tpos->wh_bindex != bindex) ++ hlist_for_each_entry(pos, head, wh_hash) { ++ if (pos->wh_bindex != bindex) + continue; + -+ str = &tpos->wh_str; ++ str = &pos->wh_str; + if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) { + memcpy(p, str->name, str->len); + wh_name.len = AUFS_WH_PFX_LEN + str->len; @@ -28197,7 +28735,7 @@ + + if (!err) { + h_tmp.dentry = wh_dentry; -+ h_tmp.mnt = br->br_mnt; ++ h_tmp.mnt = au_br_mnt(br); + err = vfsub_rmdir(h_dir, &h_tmp); + } + @@ -28241,21 +28779,20 @@ + h_parent = dget_parent(a->wh_dentry); + h_dir = h_parent->d_inode; + hdir = au_hi(a->dir, bindex); ++ err = vfsub_mnt_want_write(au_br_mnt(a->br)); ++ if (unlikely(err)) ++ goto out_mnt; + au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); + err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, + a->br); -+ if (!err) { -+ err = vfsub_mnt_want_write(a->br->br_mnt); -+ if (!err) { -+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, -+ &a->whlist); -+ vfsub_mnt_drop_write(a->br->br_mnt); -+ } -+ } ++ if (!err) ++ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist); + au_hn_imtx_unlock(hdir); ++ vfsub_mnt_drop_write(au_br_mnt(a->br)); ++ ++out_mnt: + dput(h_parent); + ii_write_unlock(a->dir); -+ +out: + /* mutex_unlock(&a->dir->i_mutex); */ + au_whtmp_rmdir_free(a); @@ -28287,8 +28824,8 @@ + } +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/whout.h 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,88 @@ ++++ linux-3.9/fs/aufs/whout.h 2013-05-13 17:13:17.220468929 +0200 +@@ -0,0 +1,87 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -28329,8 +28866,7 @@ +int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br); +int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path, + struct dentry *dentry); -+int au_wh_init(struct dentry *h_parent, struct au_branch *br, -+ struct super_block *sb); ++int au_wh_init(struct au_branch *br, struct super_block *sb); + +/* diropq flags */ +#define AuDiropq_CREATE 1 @@ -28378,7 +28914,7 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_WHOUT_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/wkq.c 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/fs/aufs/wkq.c 2013-05-13 17:13:17.223803898 +0200 @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -28595,7 +29131,7 @@ + return err; +} --- /dev/null -+++ linux-3.x-rcN/fs/aufs/wkq.h 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/fs/aufs/wkq.h 2013-05-13 17:13:17.223803898 +0200 @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima @@ -28690,8 +29226,8 @@ +#endif /* __KERNEL__ */ +#endif /* __AUFS_WKQ_H__ */ --- /dev/null -+++ linux-3.x-rcN/fs/aufs/xino.c 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,1265 @@ ++++ linux-3.9/fs/aufs/xino.c 2013-05-13 17:13:17.223803898 +0200 +@@ -0,0 +1,1264 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -28859,7 +29395,7 @@ + goto out_dput; + } + -+ path.mnt = base_file->f_vfsmnt; ++ path.mnt = base_file->f_path.mnt; + file = vfsub_dentry_open(&path, + O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE + /* | __FMODE_NONOTIFY */); @@ -28876,8 +29412,7 @@ + + if (copy_src) { + /* no one can touch copy_src xino */ -+ err = au_copy_file(file, copy_src, -+ i_size_read(copy_src->f_dentry->d_inode)); ++ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src)); + if (unlikely(err)) { + pr_err("%.*s copy err %d\n", AuLNPair(name), err); + goto out_fput; @@ -28963,12 +29498,12 @@ + fput(file); + br->br_xino.xi_file = new_xino; + -+ h_sb = br->br_mnt->mnt_sb; ++ h_sb = au_br_sb(br); + for (bi = 0; bi <= bend; bi++) { + if (unlikely(bi == bindex)) + continue; + br = au_sbr(sb, bi); -+ if (br->br_mnt->mnt_sb != h_sb) ++ if (au_br_sb(br) != h_sb) + continue; + + fput(br->br_xino.xi_file); @@ -29004,7 +29539,7 @@ + bindex = au_br_index(sb, br->br_id); + err = au_xino_trunc(sb, bindex); + if (!err -+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks ++ && file_inode(br->br_xino.xi_file)->i_blocks + >= br->br_xino_upper) + br->br_xino_upper += AUFS_XINO_TRUNC_STEP; + @@ -29024,7 +29559,7 @@ + struct xino_do_trunc_args *args; + int wkq_err; + -+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks ++ if (file_inode(br->br_xino.xi_file)->i_blocks + < br->br_xino_upper) + return; + @@ -29103,7 +29638,7 @@ + h_ino, ino); + if (!err) { + if (au_opt_test(mnt_flags, TRUNC_XINO) -+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb)) ++ && au_test_fs_trunc_xino(au_br_sb(br))) + xino_try_trunc(sb, br); + return 0; /* success */ + } @@ -29161,7 +29696,7 @@ + + pos = pindex; + pos *= PAGE_SIZE; -+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE) ++ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE) + sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos); + else { + memset(p, 0, PAGE_SIZE); @@ -29256,7 +29791,7 @@ + err = au_xino_do_write(xwrite, br->br_xino.xi_file, + h_inode->i_ino, /*ino*/0); + if (!err && try_trunc -+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb)) ++ && au_test_fs_trunc_xino(au_br_sb(br))) + xino_try_trunc(sb, br); + } +} @@ -29294,7 +29829,7 @@ + } + + file = sbinfo->si_xib; -+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE; ++ pend = vfsub_f_size_read(file) / PAGE_SIZE; + for (ul = pindex + 1; ul <= pend; ul++) { + err = xib_pindex(sb, ul); + if (unlikely(err)) @@ -29347,7 +29882,7 @@ + pos *= sizeof(*ino); + + file = au_sbr(sb, bindex)->br_xino.xi_file; -+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino)) ++ if (vfsub_f_size_read(file) < pos + sizeof(*ino)) + return 0; /* no ino */ + + sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos); @@ -29463,10 +29998,10 @@ + shared_br = NULL; + bend = au_sbend(sb); + if (do_test) { -+ tgt_sb = br->br_mnt->mnt_sb; ++ tgt_sb = au_br_sb(br); + for (bindex = 0; bindex <= bend; bindex++) { + b = au_sbr(sb, bindex); -+ if (tgt_sb == b->br_mnt->mnt_sb) { ++ if (tgt_sb == au_br_sb(b)) { + shared_br = b; + break; + } @@ -29522,7 +30057,7 @@ + MtxMustLock(&sbinfo->si_xib_mtx); + p = sbinfo->si_xib_buf; + func = sbinfo->si_xread; -+ pend = i_size_read(file->f_dentry->d_inode); ++ pend = vfsub_f_size_read(file); + pos = 0; + while (pos < pend) { + sz = xino_fread(func, file, page, PAGE_SIZE, &pos); @@ -29592,7 +30127,7 @@ + goto out; + + file = sbinfo->si_xib; -+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE) ++ if (vfsub_f_size_read(file) <= PAGE_SIZE) + goto out; + + au_xino_lock_dir(sb, file, &ldir); @@ -29702,7 +30237,7 @@ + + sbinfo->si_xib_last_pindex = 0; + sbinfo->si_xib_next_bit = 0; -+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) { ++ if (vfsub_f_size_read(file) < PAGE_SIZE) { + pos = 0; + err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf, + PAGE_SIZE, &pos); @@ -29895,7 +30430,7 @@ + for (bindex = 0; bindex <= bend; bindex++) { + br = au_sbr(sb, bindex); + if (au_br_writable(br->br_perm) -+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) { ++ && !au_test_fs_bad_xino(au_br_sb(br))) { + bwr = bindex; + break; + } @@ -29906,7 +30441,7 @@ + page = (void *)__get_free_page(GFP_NOFS); + if (unlikely(!page)) + goto out; -+ path.mnt = br->br_mnt; ++ path.mnt = au_br_mnt(br); + path.dentry = au_h_dptr(sb->s_root, bwr); + p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME)); + file = (void *)p; @@ -29958,7 +30493,7 @@ + return err; +} --- /dev/null -+++ linux-3.x-rcN/include/linux/aufs_type.h 2013-02-19 11:26:12.682457188 -0500 ++++ linux-3.9/include/linux/aufs_type.h 2013-05-13 17:13:17.223803898 +0200 @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012-2013 Junjiro R. Okajima @@ -29980,8 +30515,8 @@ + +#include --- /dev/null -+++ linux-3.x-rcN/include/uapi/linux/aufs_type.h 2013-02-19 11:26:12.682457188 -0500 -@@ -0,0 +1,233 @@ ++++ linux-3.9/include/uapi/linux/aufs_type.h 2013-05-13 17:13:17.223803898 +0200 +@@ -0,0 +1,235 @@ +/* + * Copyright (C) 2005-2013 Junjiro R. Okajima + * @@ -30014,8 +30549,9 @@ +#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__ +#include +#undef pr_fmt -+#define pr_fmt(fmt) AUFS_NAME " %s:%d:%s[%d]: " fmt, \ -+ __func__, __LINE__, current->comm, current->pid ++#define pr_fmt(fmt) \ ++ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \ ++ (int)sizeof(current->comm), current->comm, current->pid +#else +#include +#include @@ -30023,7 +30559,7 @@ + +#include + -+#define AUFS_VERSION "3.x-rcN-20130204" ++#define AUFS_VERSION "3.9-20130513" + +/* todo? move this to linux-2.6.19/include/magic.h */ +#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's') @@ -30060,7 +30596,7 @@ +#define AUFS_WH_PFX ".wh." +#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1) +#define AUFS_WH_TMP_LEN 4 -+/* a limit for rmdir/rename a dir */ ++/* a limit for rmdir/rename a dir and copyup */ +#define AUFS_MAX_NAMELEN (NAME_MAX \ + - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\ + - 1 /* dot */\ @@ -30077,7 +30613,7 @@ +#define AUFS_WKQ_NAME AUFS_NAME "d" +#define AUFS_MFS_DEF_SEC 30 /* seconds */ +#define AUFS_MFS_MAX_SEC 3600 /* seconds */ -+#define AUFS_PLINK_WARN 100 /* number of plinks */ ++#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */ + +/* pseudo-link maintenace under /proc */ +#define AUFS_PLINK_MAINT_NAME "plink_maint" @@ -30102,6 +30638,7 @@ +#define AUFS_BRPERM_RR "rr" +#define AUFS_BRRATTR_WH "wh" +#define AUFS_BRWATTR_NLWH "nolwh" ++#define AUFS_BRATTR_UNPIN "unpin" + +/* ---------------------------------------------------------------------- */ + diff --git a/linux/config.x86_64 b/linux/config.x86_64 index 8d6be68b4..1def1cf3d 100644 --- a/linux/config.x86_64 +++ b/linux/config.x86_64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86_64 3.9.0-1 Kernel Configuration +# Linux/x86_64 3.9.2-1 Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y diff --git a/linux/linux.install b/linux/linux.install index 43bb7420e..b86e52230 100644 --- a/linux/linux.install +++ b/linux/linux.install @@ -2,7 +2,7 @@ # arg 2: the old package version KERNEL_NAME= -KERNEL_VERSION=3.9.0-1-CHAKRA +KERNEL_VERSION=3.9.2-1-CHAKRA # set a sane PATH to ensure that critical utils like depmod will be found export PATH='/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin' @@ -49,4 +49,4 @@ post_remove() { # also remove the compat symlinks rm -f boot/{initramfs-linux,kernel26}${KERNEL_NAME}.img rm -f boot/{initramfs-linux,kernel26}${KERNEL_NAME}-fallback.img -} \ No newline at end of file +} diff --git a/ndiswrapper/PKGBUILD b/ndiswrapper/PKGBUILD index 97f34fe0a..77e648f10 100644 --- a/ndiswrapper/PKGBUILD +++ b/ndiswrapper/PKGBUILD @@ -6,25 +6,31 @@ _kver="$(cat /lib/modules/${_extramodules}/version)" pkgname=ndiswrapper pkgver=1.58 -pkgrel=2 +pkgrel=1 pkgdesc="Module for NDIS (Windows Network Drivers) drivers supplied by vendors. For linux-testing." license=('GPL') arch=('x86_64') url="http://ndiswrapper.sourceforge.net" install="ndiswrapper.install" -depends=('linux>=3.9' 'linux<4.0') +depends=('linux>=3.9' 'linux<3.10') provides=("ndiswrapper-utils=$pkgver") replaces=('ndiswrapper-utils') conflicts=('ndiswrapper-utils') makedepends=('linux-headers') -source=("http://downloads.sourceforge.net/sourceforge/ndiswrapper/ndiswrapper-$pkgver.tar.gz") -md5sums=('ba6f57e5eb2f9e94cb07c7f151b2afcd') +source=("http://downloads.sourceforge.net/sourceforge/ndiswrapper/ndiswrapper-$pkgver.tar.gz" + ndiswrapper-1.58-add_taint.patch) +sha1sums=('a256812b3136648ed93e04146d2276a3ca70957c' + 'f2c8bd7553f87a647b2e221b01fbd5d0d07eff8c') build() { cd "$srcdir/$pkgname-$pkgver" + # https://bugs.gentoo.org/show_bug.cgi?id=467956 3.9 kernels + patch -Np1 -i "$srcdir/ndiswrapper-1.58-add_taint.patch" + sed -i "/modinfo/s/s/usr\//" driver/Makefile sed -i "s|/include/linux/version.h|/include/generated/uapi/linux/version.h|g" driver/Makefile + make KVERS=$_kver } diff --git a/ndiswrapper/ndiswrapper-1.58-add_taint.patch b/ndiswrapper/ndiswrapper-1.58-add_taint.patch new file mode 100644 index 000000000..87dc3a0e7 --- /dev/null +++ b/ndiswrapper/ndiswrapper-1.58-add_taint.patch @@ -0,0 +1,24 @@ +diff -ur ndiswrapper-1.58.orig/driver/loader.c ndiswrapper-1.58/driver/loader.c +--- ndiswrapper-1.58.orig/driver/loader.c 2013-05-01 09:48:34.910000000 +0900 ++++ ndiswrapper-1.58/driver/loader.c 2013-05-01 09:48:59.870000000 +0900 +@@ -575,7 +575,7 @@ + } else { + printk(KERN_INFO "%s: driver %s (%s) loaded\n", + DRIVER_NAME, wrap_driver->name, wrap_driver->version); +- add_taint(TAINT_PROPRIETARY_MODULE); ++ add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_STILL_OK); + EXIT1(return 0); + } + } +diff -ur ndiswrapper-1.58.orig/driver/wrapper.c ndiswrapper-1.58/driver/wrapper.c +--- ndiswrapper-1.58.orig/driver/wrapper.c 2013-02-20 03:00:37.000000000 +0900 ++++ ndiswrapper-1.58/driver/wrapper.c 2013-05-01 09:54:30.140000000 +0900 +@@ -72,7 +72,7 @@ + static int __init wrapper_init(void) + { + #ifdef TAINT_OOT_MODULE +- add_taint(TAINT_OOT_MODULE); ++ add_taint(TAINT_OOT_MODULE, LOCKDEP_STILL_OK); + #endif + printk(KERN_INFO "%s version %s loaded (smp=%s, preempt=%s)\n", + DRIVER_NAME, DRIVER_VERSION, -- GitLab