/* CacheFiles path walking and related routines * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> #include <linux/sched.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/quotaops.h> #include <linux/xattr.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/security.h> #include "internal.h" static int cachefiles_wait_bit(void *flags) { schedule(); return 0; } /* * record the fact that an object is now active */ static void cachefiles_mark_object_active(struct cachefiles_cache *cache, struct cachefiles_object *object) { struct cachefiles_object *xobject; struct rb_node **_p, *_parent = NULL; struct dentry *dentry; _enter(",%p", object); try_again: write_lock(&cache->active_lock); if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) BUG(); dentry = object->dentry; _p = &cache->active_nodes.rb_node; while (*_p) { _parent = *_p; xobject = rb_entry(_parent, struct cachefiles_object, active_node); ASSERT(xobject != object); if (xobject->dentry > dentry) _p = &(*_p)->rb_left; else if (xobject->dentry < dentry) _p = &(*_p)->rb_right; else goto wait_for_old_object; } rb_link_node(&object->active_node, _parent, _p); rb_insert_color(&object->active_node, &cache->active_nodes); write_unlock(&cache->active_lock); _leave(""); return; /* an old object from a previous incarnation is hogging the slot - we * need to wait for it to be destroyed */ wait_for_old_object: if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { printk(KERN_ERR "\n"); printk(KERN_ERR "CacheFiles: Error:" " Unexpected object collision\n"); printk(KERN_ERR "xobject: OBJ%x\n", xobject->fscache.debug_id); printk(KERN_ERR "xobjstate=%s\n", fscache_object_states[xobject->fscache.state]); printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags); printk(KERN_ERR "xobjevent=%lx [%lx]\n", xobject->fscache.events, xobject->fscache.event_mask); printk(KERN_ERR "xops=%u inp=%u exc=%u\n", xobject->fscache.n_ops, xobject->fscache.n_in_progress, xobject->fscache.n_exclusive); printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n", xobject->fscache.cookie, xobject->fscache.cookie->parent, xobject->fscache.cookie->netfs_data, xobject->fscache.cookie->flags); printk(KERN_ERR "xparent=%p\n", xobject->fscache.parent); printk(KERN_ERR "object: OBJ%x\n", object->fscache.debug_id); printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n", object->fscache.cookie, object->fscache.cookie->parent, object->fscache.cookie->netfs_data, object->fscache.cookie->flags); printk(KERN_ERR "parent=%p\n", object->fscache.parent); BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); _debug(">>> wait"); wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); _debug("<<< waited"); cache->cache.ops->put_object(&xobject->fscache); goto try_again; } /* * delete an object representation from the cache * - file backed objects are unlinked * - directory backed objects are stuffed into the graveyard for userspace to * delete * - unlocks the directory mutex */ static int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *dir, struct dentry *rep) { struct dentry *grave, *trap; char nbuffer[8 + 8 + 1]; int ret; _enter(",'%*.*s','%*.*s'", dir->d_name.len, dir->d_name.len, dir->d_name.name, rep->d_name.len, rep->d_name.len, rep->d_name.name); /* non-directories can just be unlinked */ if (!S_ISDIR(rep->d_inode->i_mode)) { _debug("unlink stale object"); ret = vfs_unlink(dir->d_inode, rep); mutex_unlock(&dir->d_inode->i_mutex); if (ret == -EIO) cachefiles_io_error(cache, "Unlink failed"); _leave(" = %d", ret); return ret; } /* directories have to be moved to the graveyard */ _debug("move stale object to graveyard"); mutex_unlock(&dir->d_inode->i_mutex); try_again: /* first step is to make up a grave dentry in the graveyard */ sprintf(nbuffer, "%08x%08x", (uint32_t) get_seconds(), (uint32_t) atomic_inc_return(&cache->gravecounter)); /* do the multiway lock magic */ trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ if (rep->d_parent != dir) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); _leave(" = 0 [culled?]"); return 0; } if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { unlock_rename(cache->graveyard, dir); cachefiles_io_error(cache, "Graveyard no longer a directory"); return -EIO; } if (trap == rep) { unlock_rename(cache->graveyard, dir); cachefiles_io_error(cache, "May not make directory loop"); return -EIO; } if (d_mountpoint(rep)) { unlock_rename(cache->graveyard, dir); cachefiles_io_error(cache, "Mountpoint in cache"); return -EIO; } grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); if (IS_ERR(grave)) { unlock_rename(cache->graveyard, dir); if (PTR_ERR(grave) == -ENOMEM) { _leave(" = -ENOMEM"); return -ENOMEM; } cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave)); return -EIO; } if (grave->d_inode) { unlock_rename(cache->graveyard, dir); dput(grave); grave = NULL; cond_resched(); goto try_again; } if (d_mountpoint(grave)) { unlock_rename(cache->graveyard, dir); dput(grave); cachefiles_io_error(cache, "Mountpoint in graveyard"); return -EIO; } /* target should not be an ancestor of source */ if (trap == grave) { unlock_rename(cache->graveyard, dir); dput(grave); cachefiles_io_error(cache, "May not make directory loop"); return -EIO; } /* attempt the rename */ ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); unlock_rename(cache->graveyard, dir); dput(grave); _leave(" = 0"); return 0; } /* * delete an object representation from the cache */ int cachefiles_delete_object(struct cachefiles_cache *cache, struct cachefiles_object *object) { struct dentry *dir; int ret; _enter(",{%p}", object->dentry); ASSERT(object->dentry); ASSERT(object->dentry->d_inode); ASSERT(object->dentry->d_parent); dir = dget_parent(object->dentry); mutex_lock(&dir->d_inode->i_mutex); ret = cachefiles_bury_object(cache, dir, object->dentry); dput(dir); _leave(" = %d", ret); return ret; } /* * walk from the parent object to the child object through the backing * filesystem, creating directories as we go */ int cachefiles_walk_to_object(struct cachefiles_object *parent, struct cachefiles_object *object, const char *key, struct cachefiles_xattr *auxdata) { struct cachefiles_cache *cache; struct dentry *dir, *next = NULL; unsigned long start; const char *name; int ret, nlen; _enter("{%p},,%s,", parent->dentry, key); cache = container_of(parent->fscache.cache, struct cachefiles_cache, cache); ASSERT(parent->dentry); ASSERT(parent->dentry->d_inode); if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { // TODO: convert file to dir _leave("looking up in none directory"); return -ENOBUFS; } dir = dget(parent->dentry); advance: /* attempt to transit the first directory component */ name = key; nlen = strlen(key); /* key ends in a double NUL */ key = key + nlen + 1; if (!*key) key = NULL; lookup_again: /* search the current directory for the element name */ _debug("lookup '%s'", name); mutex_lock(&dir->d_inode->i_mutex); start = jiffies; next = lookup_one_len(name, dir, nlen); cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(next)) goto lookup_error; _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); if (!key) object->new = !next->d_inode; /* if this element of the path doesn't exist, then the lookup phase * failed, and we can release any readers in the certain knowledge that * there's nothing for them to actually read */ if (!next->d_inode) fscache_object_lookup_negative(&object->fscache); /* we need to create the object if it's negative */ if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { /* index objects and intervening tree levels must be subdirs */ if (!next->d_inode) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) goto create_error; start = jiffies; ret = vfs_mkdir(dir->d_inode, next, 0); cachefiles_hist(cachefiles_mkdir_histogram, start); if (ret < 0) goto create_error; ASSERT(next->d_inode); _debug("mkdir -> %p{%p{ino=%lu}}", next, next->d_inode, next->d_inode->i_ino); } else if (!S_ISDIR(next->d_inode->i_mode)) { kerror("inode %lu is not a directory", next->d_inode->i_ino); ret = -ENOBUFS; goto error; } } else { /* non-index objects start out life as files */ if (!next->d_inode) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) goto create_error; start = jiffies; ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); cachefiles_hist(cachefiles_create_histogram, start); if (ret < 0) goto create_error; ASSERT(next->d_inode); _debug("create -> %p{%p{ino=%lu}}", next, next->d_inode, next->d_inode->i_ino); } else if (!S_ISDIR(next->d_inode->i_mode) && !S_ISREG(next->d_inode->i_mode) ) { kerror("inode %lu is not a file or directory", next->d_inode->i_ino); ret = -ENOBUFS; goto error; } } /* process the next component */ if (key) { _debug("advance"); mutex_unlock(&dir->d_inode->i_mutex); dput(dir); dir = next; next = NULL; goto advance; } /* we've found the object we were looking for */ object->dentry = next; /* if we've found that the terminal object exists, then we need to * check its attributes and delete it if it's out of date */ if (!object->new) { _debug("validate '%*.*s'", next->d_name.len, next->d_name.len, next->d_name.name); ret = cachefiles_check_object_xattr(object, auxdata); if (ret == -ESTALE) { /* delete the object (the deleter drops the directory * mutex) */ object->dentry = NULL; ret = cachefiles_bury_object(cache, dir, next); dput(next); next = NULL; if (ret < 0) goto delete_error; _debug("redo lookup"); goto lookup_again; } } /* note that we're now using this object */ cachefiles_mark_object_active(cache, object); mutex_unlock(&dir->d_inode->i_mutex); dput(dir); dir = NULL; _debug("=== OBTAINED_OBJECT ==="); if (object->new) { /* attach data to a newly constructed terminal object */ ret = cachefiles_set_object_xattr(object, auxdata); if (ret < 0) goto check_error; } else { /* always update the atime on an object we've just looked up * (this is used to keep track of culling, and atimes are only * updated by read, write and readdir but not lookup or * open) */ touch_atime(cache->mnt, next); } /* open a file interface onto a data file */ if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { if (S_ISREG(object->dentry->d_inode->i_mode)) { const struct address_space_operations *aops; ret = -EPERM; aops = object->dentry->d_inode->i_mapping->a_ops; if (!aops->bmap) goto check_error; object->backer = object->dentry; } else { BUG(); // TODO: open file in data-class subdir } } object->new = 0; fscache_obtained_object(&object->fscache); _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); return 0; create_error: _debug("create error %d", ret); if (ret == -EIO) cachefiles_io_error(cache, "Create/mkdir failed"); goto error; check_error: _debug("check error %d", ret); write_lock(&cache->active_lock); rb_erase(&object->active_node, &cache->active_nodes); clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); write_unlock(&cache->active_lock); dput(object->dentry); object->dentry = NULL; goto error_out; delete_error: _debug("delete error %d", ret); goto error_out2; lookup_error: _debug("lookup error %ld", PTR_ERR(next)); ret = PTR_ERR(next); if (ret == -EIO) cachefiles_io_error(cache, "Lookup failed"); next = NULL; error: mutex_unlock(&dir->d_inode->i_mutex); dput(next); error_out2: dput(dir); error_out: if (ret == -ENOSPC) ret = -ENOBUFS; _leave(" = error %d", -ret); return ret; } /* * get a subdirectory */ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, struct dentry *dir, const char *dirname) { struct dentry *subdir; unsigned long start; int ret; _enter(",,%s", dirname); /* search the current directory for the element name */ mutex_lock(&dir->d_inode->i_mutex); start = jiffies; subdir = lookup_one_len(dirname, dir, strlen(dirname)); cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(subdir)) { if (PTR_ERR(subdir) == -ENOMEM) goto nomem_d_alloc; goto lookup_error; } _debug("subdir -> %p %s", subdir, subdir->d_inode ? "positive" : "negative"); /* we need to create the subdir if it doesn't exist yet */ if (!subdir->d_inode) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) goto mkdir_error; _debug("attempt mkdir"); ret = vfs_mkdir(dir->d_inode, subdir, 0700); if (ret < 0) goto mkdir_error; ASSERT(subdir->d_inode); _debug("mkdir -> %p{%p{ino=%lu}}", subdir, subdir->d_inode, subdir->d_inode->i_ino); } mutex_unlock(&dir->d_inode->i_mutex); /* we need to make sure the subdir is a directory */ ASSERT(subdir->d_inode); if (!S_ISDIR(subdir->d_inode->i_mode)) { kerror("%s is not a directory", dirname); ret = -EIO; goto check_error; } ret = -EPERM; if (!subdir->d_inode->i_op || !subdir->d_inode->i_op->setxattr || !subdir->d_inode->i_op->getxattr || !subdir->d_inode->i_op->lookup || !subdir->d_inode->i_op->mkdir || !subdir->d_inode->i_op->create || !subdir->d_inode->i_op->rename || !subdir->d_inode->i_op->rmdir || !subdir->d_inode->i_op->unlink) goto check_error; _leave(" = [%lu]", subdir->d_inode->i_ino); return subdir; check_error: dput(subdir); _leave(" = %d [check]", ret); return ERR_PTR(ret); mkdir_error: mutex_unlock(&dir->d_inode->i_mutex); dput(subdir); kerror("mkdir %s failed with error %d", dirname, ret); return ERR_PTR(ret); lookup_error: mutex_unlock(&dir->d_inode->i_mutex); ret = PTR_ERR(subdir); kerror("Lookup %s failed with error %d", dirname, ret); return ERR_PTR(ret); nomem_d_alloc: mutex_unlock(&dir->d_inode->i_mutex); _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } /* * find out if an object is in use or not * - if finds object and it's not in use: * - returns a pointer to the object and a reference on it * - returns with the directory locked */ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct cachefiles_object *object; struct rb_node *_n; struct dentry *victim; unsigned long start; int ret; //_enter(",%*.*s/,%s", // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); /* look up the victim */ mutex_lock_nested(&dir->d_inode->i_mutex, 1); start = jiffies; victim = lookup_one_len(filename, dir, strlen(filename)); cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(victim)) goto lookup_error; //_debug("victim -> %p %s", // victim, victim->d_inode ? "positive" : "negative"); /* if the object is no longer there then we probably retired the object * at the netfs's request whilst the cull was in progress */ if (!victim->d_inode) { mutex_unlock(&dir->d_inode->i_mutex); dput(victim); _leave(" = -ENOENT [absent]"); return ERR_PTR(-ENOENT); } /* check to see if we're using this object */ read_lock(&cache->active_lock); _n = cache->active_nodes.rb_node; while (_n) { object = rb_entry(_n, struct cachefiles_object, active_node); if (object->dentry > victim) _n = _n->rb_left; else if (object->dentry < victim) _n = _n->rb_right; else goto object_in_use; } read_unlock(&cache->active_lock); //_leave(" = %p", victim); return victim; object_in_use: read_unlock(&cache->active_lock); mutex_unlock(&dir->d_inode->i_mutex); dput(victim); //_leave(" = -EBUSY [in use]"); return ERR_PTR(-EBUSY); lookup_error: mutex_unlock(&dir->d_inode->i_mutex); ret = PTR_ERR(victim); if (ret == -ENOENT) { /* file or dir now absent - probably retired by netfs */ _leave(" = -ESTALE [absent]"); return ERR_PTR(-ESTALE); } if (ret == -EIO) { cachefiles_io_error(cache, "Lookup failed"); } else if (ret != -ENOMEM) { kerror("Internal error: %d", ret); ret = -EIO; } _leave(" = %d", ret); return ERR_PTR(ret); } /* * cull an object if it's not in use * - called only by cache manager daemon */ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; int ret; _enter(",%*.*s/,%s", dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); victim = cachefiles_check_active(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); _debug("victim -> %p %s", victim, victim->d_inode ? "positive" : "negative"); /* okay... the victim is not being used so we can cull it * - start by marking it as stale */ _debug("victim is cullable"); ret = cachefiles_remove_object_xattr(cache, victim); if (ret < 0) goto error_unlock; /* actually remove the victim (drops the dir mutex) */ _debug("bury"); ret = cachefiles_bury_object(cache, dir, victim); if (ret < 0) goto error; dput(victim); _leave(" = 0"); return 0; error_unlock: mutex_unlock(&dir->d_inode->i_mutex); error: dput(victim); if (ret == -ENOENT) { /* file or dir now absent - probably retired by netfs */ _leave(" = -ESTALE [absent]"); return -ESTALE; } if (ret != -ENOMEM) { kerror("Internal error: %d", ret); ret = -EIO; } _leave(" = %d", ret); return ret; } /* * find out if an object is in use or not * - called only by cache manager daemon * - returns -EBUSY or 0 to indicate whether an object is in use or not */ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename) { struct dentry *victim; //_enter(",%*.*s/,%s", // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); victim = cachefiles_check_active(cache, dir, filename); if (IS_ERR(victim)) return PTR_ERR(victim); mutex_unlock(&dir->d_inode->i_mutex); dput(victim); //_leave(" = 0"); return 0; }