diff options
Diffstat (limited to 'fs')
129 files changed, 11604 insertions, 946 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c new file mode 100644 index 00000000000..e847f504a47 --- /dev/null +++ b/fs/9p/9p.c @@ -0,0 +1,359 @@ +/* + * linux/fs/9p/9p.c + * + * This file contains functions 9P2000 functions + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "mux.h" + +/** + * v9fs_t_version - negotiate protocol parameters with sever + * @v9ses: 9P2000 session information + * @msize: requested max size packet + * @version: requested version.extension string + * @fcall: pointer to response fcall pointer + * + */ + +int +v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, + char *version, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version); + msg.id = TVERSION; + msg.params.tversion.msize = msize; + msg.params.tversion.version = version; + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_attach - mount the server + * @v9ses: 9P2000 session information + * @uname: user name doing the attach + * @aname: remote name being attached to + * @fid: mount fid to attatch to root node + * @afid: authentication fid (in this case result key) + * @fcall: pointer to response fcall pointer + * + */ + +int +v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, + u32 fid, u32 afid, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname, + aname, fid, afid); + msg.id = TATTACH; + msg.params.tattach.fid = fid; + msg.params.tattach.afid = afid; + msg.params.tattach.uname = uname; + msg.params.tattach.aname = aname; + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_clunk - release a fid (finish a transaction) + * @v9ses: 9P2000 session information + * @fid: fid to release + * @fcall: pointer to response fcall pointer + * + */ + +int +v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d\n", fid); + msg.id = TCLUNK; + msg.params.tclunk.fid = fid; + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_v9fs_t_flush - flush a pending transaction + * @v9ses: 9P2000 session information + * @tag: tid to release + * + */ + +int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "oldtag %d\n", tag); + msg.id = TFLUSH; + msg.params.tflush.oldtag = tag; + return v9fs_mux_rpc(v9ses, &msg, NULL); +} + +/** + * v9fs_t_stat - read a file's meta-data + * @v9ses: 9P2000 session information + * @fid: fid pointing to file or directory to get info about + * @fcall: pointer to response fcall + * + */ + +int +v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d\n", fid); + if (fcall) + *fcall = NULL; + + msg.id = TSTAT; + msg.params.tstat.fid = fid; + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_wstat - write a file's meta-data + * @v9ses: 9P2000 session information + * @fid: fid pointing to file or directory to write info about + * @stat: metadata + * @fcall: pointer to response fcall + * + */ + +int +v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_stat *stat, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length); + msg.id = TWSTAT; + msg.params.twstat.fid = fid; + msg.params.twstat.stat = stat; + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_walk - walk a fid to a new file or directory + * @v9ses: 9P2000 session information + * @fid: fid to walk + * @newfid: new fid (for clone operations) + * @name: path to walk fid to + * @fcall: pointer to response fcall + * + */ + +/* TODO: support multiple walk */ + +int +v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, + char *name, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name); + msg.id = TWALK; + msg.params.twalk.fid = fid; + msg.params.twalk.newfid = newfid; + + if (name) { + msg.params.twalk.nwname = 1; + msg.params.twalk.wnames = &name; + } else { + msg.params.twalk.nwname = 0; + } + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_open - open a file + * + * @v9ses - 9P2000 session information + * @fid - fid to open + * @mode - mode to open file (R, RW, etc) + * @fcall - pointer to response fcall + * + */ + +int +v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, + struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + long errorno = -1; + + dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode); + msg.id = TOPEN; + msg.params.topen.fid = fid; + msg.params.topen.mode = mode; + + errorno = v9fs_mux_rpc(v9ses, &msg, fcall); + + return errorno; +} + +/** + * v9fs_t_remove - remove a file or directory + * @v9ses: 9P2000 session information + * @fid: fid to remove + * @fcall: pointer to response fcall + * + */ + +int +v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d\n", fid); + msg.id = TREMOVE; + msg.params.tremove.fid = fid; + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_create - create a file or directory + * @v9ses: 9P2000 session information + * @fid: fid to create + * @name: name of the file or directory to create + * @perm: permissions to create with + * @mode: mode to open file (R, RW, etc) + * @fcall: pointer to response fcall + * + */ + +int +v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, + u32 perm, u8 mode, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + + dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n", + fid, name, perm, mode); + + msg.id = TCREATE; + msg.params.tcreate.fid = fid; + msg.params.tcreate.name = name; + msg.params.tcreate.perm = perm; + msg.params.tcreate.mode = mode; + + return v9fs_mux_rpc(v9ses, &msg, fcall); +} + +/** + * v9fs_t_read - read data + * @v9ses: 9P2000 session information + * @fid: fid to read from + * @offset: offset to start read at + * @count: how many bytes to read + * @fcall: pointer to response fcall (with data) + * + */ + +int +v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset, + u32 count, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + struct v9fs_fcall *rc = NULL; + long errorno = -1; + + dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid, + (long unsigned int)offset, count); + msg.id = TREAD; + msg.params.tread.fid = fid; + msg.params.tread.offset = offset; + msg.params.tread.count = count; + errorno = v9fs_mux_rpc(v9ses, &msg, &rc); + + if (!errorno) { + errorno = rc->params.rread.count; + dump_data(rc->params.rread.data, rc->params.rread.count); + } + + if (fcall) + *fcall = rc; + else + kfree(rc); + + return errorno; +} + +/** + * v9fs_t_write - write data + * @v9ses: 9P2000 session information + * @fid: fid to write to + * @offset: offset to start write at + * @count: how many bytes to write + * @fcall: pointer to response fcall + * + */ + +int +v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, + u64 offset, u32 count, void *data, struct v9fs_fcall **fcall) +{ + struct v9fs_fcall msg; + struct v9fs_fcall *rc = NULL; + long errorno = -1; + + dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid, + (unsigned long long)offset, count); + dump_data(data, count); + + msg.id = TWRITE; + msg.params.twrite.fid = fid; + msg.params.twrite.offset = offset; + msg.params.twrite.count = count; + msg.params.twrite.data = data; + + errorno = v9fs_mux_rpc(v9ses, &msg, &rc); + + if (!errorno) + errorno = rc->params.rwrite.count; + + if (fcall) + *fcall = rc; + else + kfree(rc); + + return errorno; +} diff --git a/fs/9p/9p.h b/fs/9p/9p.h new file mode 100644 index 00000000000..f55424216be --- /dev/null +++ b/fs/9p/9p.h @@ -0,0 +1,341 @@ +/* + * linux/fs/9p/9p.h + * + * 9P protocol definitions. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +/* Message Types */ +enum { + TVERSION = 100, + RVERSION, + TAUTH = 102, + RAUTH, + TATTACH = 104, + RATTACH, + TERROR = 106, + RERROR, + TFLUSH = 108, + RFLUSH, + TWALK = 110, + RWALK, + TOPEN = 112, + ROPEN, + TCREATE = 114, + RCREATE, + TREAD = 116, + RREAD, + TWRITE = 118, + RWRITE, + TCLUNK = 120, + RCLUNK, + TREMOVE = 122, + RREMOVE, + TSTAT = 124, + RSTAT, + TWSTAT = 126, + RWSTAT, +}; + +/* modes */ +enum { + V9FS_OREAD = 0x00, + V9FS_OWRITE = 0x01, + V9FS_ORDWR = 0x02, + V9FS_OEXEC = 0x03, + V9FS_OEXCL = 0x04, + V9FS_OTRUNC = 0x10, + V9FS_OREXEC = 0x20, + V9FS_ORCLOSE = 0x40, + V9FS_OAPPEND = 0x80, +}; + +/* permissions */ +enum { + V9FS_DMDIR = 0x80000000, + V9FS_DMAPPEND = 0x40000000, + V9FS_DMEXCL = 0x20000000, + V9FS_DMMOUNT = 0x10000000, + V9FS_DMAUTH = 0x08000000, + V9FS_DMTMP = 0x04000000, + V9FS_DMSYMLINK = 0x02000000, + V9FS_DMLINK = 0x01000000, + /* 9P2000.u extensions */ + V9FS_DMDEVICE = 0x00800000, + V9FS_DMNAMEDPIPE = 0x00200000, + V9FS_DMSOCKET = 0x00100000, + V9FS_DMSETUID = 0x00080000, + V9FS_DMSETGID = 0x00040000, +}; + +/* qid.types */ +enum { + V9FS_QTDIR = 0x80, + V9FS_QTAPPEND = 0x40, + V9FS_QTEXCL = 0x20, + V9FS_QTMOUNT = 0x10, + V9FS_QTAUTH = 0x08, + V9FS_QTTMP = 0x04, + V9FS_QTSYMLINK = 0x02, + V9FS_QTLINK = 0x01, + V9FS_QTFILE = 0x00, +}; + +/* ample room for Twrite/Rread header (iounit) */ +#define V9FS_IOHDRSZ 24 + +/* qids are the unique ID for a file (like an inode */ +struct v9fs_qid { + u8 type; + u32 version; + u64 path; +}; + +/* Plan 9 file metadata (stat) structure */ +struct v9fs_stat { + u16 size; + u16 type; + u32 dev; + struct v9fs_qid qid; + u32 mode; + u32 atime; + u32 mtime; + u64 length; + char *name; + char *uid; + char *gid; + char *muid; + char *extension; /* 9p2000.u extensions */ + u32 n_uid; /* 9p2000.u extensions */ + u32 n_gid; /* 9p2000.u extensions */ + u32 n_muid; /* 9p2000.u extensions */ + char data[0]; +}; + +/* Structures for Protocol Operations */ + +struct Tversion { + u32 msize; + char *version; +}; + +struct Rversion { + u32 msize; + char *version; +}; + +struct Tauth { + u32 afid; + char *uname; + char *aname; +}; + +struct Rauth { + struct v9fs_qid qid; +}; + +struct Rerror { + char *error; + u32 errno; /* 9p2000.u extension */ +}; + +struct Tflush { + u32 oldtag; +}; + +struct Rflush { +}; + +struct Tattach { + u32 fid; + u32 afid; + char *uname; + char *aname; +}; + +struct Rattach { + struct v9fs_qid qid; +}; + +struct Twalk { + u32 fid; + u32 newfid; + u32 nwname; + char **wnames; +}; + +struct Rwalk { + u32 nwqid; + struct v9fs_qid *wqids; +}; + +struct Topen { + u32 fid; + u8 mode; +}; + +struct Ropen { + struct v9fs_qid qid; + u32 iounit; +}; + +struct Tcreate { + u32 fid; + char *name; + u32 perm; + u8 mode; +}; + +struct Rcreate { + struct v9fs_qid qid; + u32 iounit; +}; + +struct Tread { + u32 fid; + u64 offset; + u32 count; +}; + +struct Rread { + u32 count; + u8 *data; +}; + +struct Twrite { + u32 fid; + u64 offset; + u32 count; + u8 *data; +}; + +struct Rwrite { + u32 count; +}; + +struct Tclunk { + u32 fid; +}; + +struct Rclunk { +}; + +struct Tremove { + u32 fid; +}; + +struct Rremove { +}; + +struct Tstat { + u32 fid; +}; + +struct Rstat { + struct v9fs_stat *stat; +}; + +struct Twstat { + u32 fid; + struct v9fs_stat *stat; +}; + +struct Rwstat { +}; + +/* + * fcall is the primary packet structure + * + */ + +struct v9fs_fcall { + u32 size; + u8 id; + u16 tag; + + union { + struct Tversion tversion; + struct Rversion rversion; + struct Tauth tauth; + struct Rauth rauth; + struct Rerror rerror; + struct Tflush tflush; + struct Rflush rflush; + struct Tattach tattach; + struct Rattach rattach; + struct Twalk twalk; + struct Rwalk rwalk; + struct Topen topen; + struct Ropen ropen; + struct Tcreate tcreate; + struct Rcreate rcreate; + struct Tread tread; + struct Rread rread; + struct Twrite twrite; + struct Rwrite rwrite; + struct Tclunk tclunk; + struct Rclunk rclunk; + struct Tremove tremove; + struct Rremove rremove; + struct Tstat tstat; + struct Rstat rstat; + struct Twstat twstat; + struct Rwstat rwstat; + } params; +}; + +#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "") + +int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize, + char *version, struct v9fs_fcall **rcall); + +int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname, + u32 fid, u32 afid, struct v9fs_fcall **rcall); + +int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_fcall **rcall); + +int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag); + +int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_fcall **rcall); + +int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_stat *stat, struct v9fs_fcall **rcall); + +int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid, + char *name, struct v9fs_fcall **rcall); + +int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode, + struct v9fs_fcall **rcall); + +int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid, + struct v9fs_fcall **rcall); + +int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name, + u32 perm, u8 mode, struct v9fs_fcall **rcall); + +int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, + u64 offset, u32 count, struct v9fs_fcall **rcall); + +int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, + u32 count, void *data, struct v9fs_fcall **rcall); diff --git a/fs/9p/Makefile b/fs/9p/Makefile new file mode 100644 index 00000000000..e4e4ffe5a7d --- /dev/null +++ b/fs/9p/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_9P_FS) := 9p2000.o + +9p2000-objs := \ + vfs_super.o \ + vfs_inode.o \ + vfs_file.o \ + vfs_dir.o \ + vfs_dentry.o \ + error.o \ + mux.o \ + trans_fd.o \ + trans_sock.o \ + 9p.o \ + conv.o \ + v9fs.o \ + fid.o + diff --git a/fs/9p/conv.c b/fs/9p/conv.c new file mode 100644 index 00000000000..1554731bd65 --- /dev/null +++ b/fs/9p/conv.c @@ -0,0 +1,693 @@ +/* + * linux/fs/9p/conv.c + * + * 9P protocol conversion functions + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "conv.h" + +/* + * Buffer to help with string parsing + */ +struct cbuf { + unsigned char *sp; + unsigned char *p; + unsigned char *ep; +}; + +static inline void buf_init(struct cbuf *buf, void *data, int datalen) +{ + buf->sp = buf->p = data; + buf->ep = data + datalen; +} + +static inline int buf_check_overflow(struct cbuf *buf) +{ + return buf->p > buf->ep; +} + +static inline void buf_check_size(struct cbuf *buf, int len) +{ + if (buf->p+len > buf->ep) { + if (buf->p < buf->ep) { + eprintk(KERN_ERR, "buffer overflow\n"); + buf->p = buf->ep + 1; + } + } +} + +static inline void *buf_alloc(struct cbuf *buf, int len) +{ + void *ret = NULL; + + buf_check_size(buf, len); + ret = buf->p; + buf->p += len; + + return ret; +} + +static inline void buf_put_int8(struct cbuf *buf, u8 val) +{ + buf_check_size(buf, 1); + + buf->p[0] = val; + buf->p++; +} + +static inline void buf_put_int16(struct cbuf *buf, u16 val) +{ + buf_check_size(buf, 2); + + *(__le16 *) buf->p = cpu_to_le16(val); + buf->p += 2; +} + +static inline void buf_put_int32(struct cbuf *buf, u32 val) +{ + buf_check_size(buf, 4); + + *(__le32 *)buf->p = cpu_to_le32(val); + buf->p += 4; +} + +static inline void buf_put_int64(struct cbuf *buf, u64 val) +{ + buf_check_size(buf, 8); + + *(__le64 *)buf->p = cpu_to_le64(val); + buf->p += 8; +} + +static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) +{ + buf_check_size(buf, slen + 2); + + buf_put_int16(buf, slen); + memcpy(buf->p, s, slen); + buf->p += slen; +} + +static inline void buf_put_string(struct cbuf *buf, const char *s) +{ + buf_put_stringn(buf, s, strlen(s)); +} + +static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) +{ + buf_check_size(buf, datalen); + + memcpy(buf->p, data, datalen); + buf->p += datalen; +} + +static inline u8 buf_get_int8(struct cbuf *buf) +{ + u8 ret = 0; + + buf_check_size(buf, 1); + ret = buf->p[0]; + + buf->p++; + + return ret; +} + +static inline u16 buf_get_int16(struct cbuf *buf) +{ + u16 ret = 0; + + buf_check_size(buf, 2); + ret = le16_to_cpu(*(__le16 *)buf->p); + + buf->p += 2; + + return ret; +} + +static inline u32 buf_get_int32(struct cbuf *buf) +{ + u32 ret = 0; + + buf_check_size(buf, 4); + ret = le32_to_cpu(*(__le32 *)buf->p); + + buf->p += 4; + + return ret; +} + +static inline u64 buf_get_int64(struct cbuf *buf) +{ + u64 ret = 0; + + buf_check_size(buf, 8); + ret = le64_to_cpu(*(__le64 *)buf->p); + + buf->p += 8; + + return ret; +} + +static inline int +buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) +{ + + u16 len = buf_get_int16(buf); + buf_check_size(buf, len); + if (len + 1 > datalen) + return 0; + + memcpy(data, buf->p, len); + data[len] = 0; + buf->p += len; + + return len + 1; +} + +static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) +{ + char *ret = NULL; + int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p); + + if (n > 0) { + ret = sbuf->p; + sbuf->p += n; + } + + return ret; +} + +static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) +{ + buf_check_size(buf, datalen); + + memcpy(data, buf->p, datalen); + buf->p += datalen; + + return datalen; +} + +static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, + int datalen) +{ + char *ret = NULL; + int n = 0; + + buf_check_size(dbuf, datalen); + + n = buf_get_data(buf, dbuf->p, datalen); + + if (n > 0) { + ret = dbuf->p; + dbuf->p += n; + } + + return ret; +} + +/** + * v9fs_size_stat - calculate the size of a variable length stat struct + * @v9ses: session information + * @stat: metadata (stat) structure + * + */ + +static int v9fs_size_stat(struct v9fs_session_info *v9ses, + struct v9fs_stat *stat) +{ + int size = 0; + + if (stat == NULL) { + eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n"); + return 0; + } + + size = /* 2 + *//* size[2] */ + 2 + /* type[2] */ + 4 + /* dev[4] */ + 1 + /* qid.type[1] */ + 4 + /* qid.vers[4] */ + 8 + /* qid.path[8] */ + 4 + /* mode[4] */ + 4 + /* atime[4] */ + 4 + /* mtime[4] */ + 8 + /* length[8] */ + 8; /* minimum sum of string lengths */ + + if (stat->name) + size += strlen(stat->name); + if (stat->uid) + size += strlen(stat->uid); + if (stat->gid) + size += strlen(stat->gid); + if (stat->muid) + size += strlen(stat->muid); + + if (v9ses->extended) { + size += 4 + /* n_uid[4] */ + 4 + /* n_gid[4] */ + 4 + /* n_muid[4] */ + 2; /* string length of extension[4] */ + if (stat->extension) + size += strlen(stat->extension); + } + + return size; +} + +/** + * serialize_stat - safely format a stat structure for transmission + * @v9ses: session info + * @stat: metadata (stat) structure + * @bufp: buffer to serialize structure into + * + */ + +static int +serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat, + struct cbuf *bufp) +{ + buf_put_int16(bufp, stat->size); + buf_put_int16(bufp, stat->type); + buf_put_int32(bufp, stat->dev); + buf_put_int8(bufp, stat->qid.type); + buf_put_int32(bufp, stat->qid.version); + buf_put_int64(bufp, stat->qid.path); + buf_put_int32(bufp, stat->mode); + buf_put_int32(bufp, stat->atime); + buf_put_int32(bufp, stat->mtime); + buf_put_int64(bufp, stat->length); + + buf_put_string(bufp, stat->name); + buf_put_string(bufp, stat->uid); + buf_put_string(bufp, stat->gid); + buf_put_string(bufp, stat->muid); + + if (v9ses->extended) { + buf_put_string(bufp, stat->extension); + buf_put_int32(bufp, stat->n_uid); + buf_put_int32(bufp, stat->n_gid); + buf_put_int32(bufp, stat->n_muid); + } + + if (buf_check_overflow(bufp)) + return 0; + + return stat->size; +} + +/** + * deserialize_stat - safely decode a recieved metadata (stat) structure + * @v9ses: session info + * @bufp: buffer to deserialize + * @stat: metadata (stat) structure + * @dbufp: buffer to deserialize variable strings into + * + */ + +static inline int +deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp, + struct v9fs_stat *stat, struct cbuf *dbufp) +{ + + stat->size = buf_get_int16(bufp); + stat->type = buf_get_int16(bufp); + stat->dev = buf_get_int32(bufp); + stat->qid.type = buf_get_int8(bufp); + stat->qid.version = buf_get_int32(bufp); + stat->qid.path = buf_get_int64(bufp); + stat->mode = buf_get_int32(bufp); + stat->atime = buf_get_int32(bufp); + stat->mtime = buf_get_int32(bufp); + stat->length = buf_get_int64(bufp); + stat->name = buf_get_stringb(bufp, dbufp); + stat->uid = buf_get_stringb(bufp, dbufp); + stat->gid = buf_get_stringb(bufp, dbufp); + stat->muid = buf_get_stringb(bufp, dbufp); + + if (v9ses->extended) { + stat->extension = buf_get_stringb(bufp, dbufp); + stat->n_uid = buf_get_int32(bufp); + stat->n_gid = buf_get_int32(bufp); + stat->n_muid = buf_get_int32(bufp); + } + + if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) + return 0; + + return stat->size + 2; +} + +/** + * deserialize_statb - wrapper for decoding a received metadata structure + * @v9ses: session info + * @bufp: buffer to deserialize + * @dbufp: buffer to deserialize variable strings into + * + */ + +static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info + *v9ses, struct cbuf *bufp, + struct cbuf *dbufp) +{ + struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat)); + + if (ret) { + int n = deserialize_stat(v9ses, bufp, ret, dbufp); + if (n <= 0) + return NULL; + } + + return ret; +} + +/** + * v9fs_deserialize_stat - decode a received metadata structure + * @v9ses: session info + * @buf: buffer to deserialize + * @buflen: length of received buffer + * @stat: metadata structure to decode into + * @statlen: length of destination metadata structure + * + */ + +int +v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf, + u32 buflen, struct v9fs_stat *stat, u32 statlen) +{ + struct cbuf buffer; + struct cbuf *bufp = &buffer; + struct cbuf dbuffer; + struct cbuf *dbufp = &dbuffer; + + buf_init(bufp, buf, buflen); + buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat), + statlen - sizeof(struct v9fs_stat)); + + return deserialize_stat(v9ses, bufp, stat, dbufp); +} + +static inline int +v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall) +{ + int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */ + int i = 0; + + switch (fcall->id) { + default: + eprintk(KERN_ERR, "bad msg type %d\n", fcall->id); + return 0; + case TVERSION: /* msize[4] version[s] */ + size += 4 + 2 + strlen(fcall->params.tversion.version); + break; + case TAUTH: /* afid[4] uname[s] aname[s] */ + size += 4 + 2 + strlen(fcall->params.tauth.uname) + + 2 + strlen(fcall->params.tauth.aname); + break; + case TFLUSH: /* oldtag[2] */ + size += 2; + break; + case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */ + size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) + + 2 + strlen(fcall->params.tattach.aname); + break; + case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ + size += 4 + 4 + 2; + /* now compute total for the array of names */ + for (i = 0; i < fcall->params.twalk.nwname; i++) + size += 2 + strlen(fcall->params.twalk.wnames[i]); + break; + case TOPEN: /* fid[4] mode[1] */ + size += 4 + 1; + break; + case TCREATE: /* fid[4] name[s] perm[4] mode[1] */ + size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1; + break; + case TREAD: /* fid[4] offset[8] count[4] */ + size += 4 + 8 + 4; + break; + case TWRITE: /* fid[4] offset[8] count[4] data[count] */ + size += 4 + 8 + 4 + fcall->params.twrite.count; + break; + case TCLUNK: /* fid[4] */ + size += 4; + break; + case TREMOVE: /* fid[4] */ + size += 4; + break; + case TSTAT: /* fid[4] */ + size += 4; + break; + case TWSTAT: /* fid[4] stat[n] */ + fcall->params.twstat.stat->size = + v9fs_size_stat(v9ses, fcall->params.twstat.stat); + size += 4 + 2 + 2 + fcall->params.twstat.stat->size; + } + return size; +} + +/* + * v9fs_serialize_fcall - marshall fcall struct into a packet + * @v9ses: session information + * @fcall: structure to convert + * @data: buffer to serialize fcall into + * @datalen: length of buffer to serialize fcall into + * + */ + +int +v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall, + void *data, u32 datalen) +{ + int i = 0; + struct v9fs_stat *stat = NULL; + struct cbuf buffer; + struct cbuf *bufp = &buffer; + + buf_init(bufp, data, datalen); + + if (!fcall) { + eprintk(KERN_ERR, "no fcall\n"); + return -EINVAL; + } + + fcall->size = v9fs_size_fcall(v9ses, fcall); + + buf_put_int32(bufp, fcall->size); + buf_put_int8(bufp, fcall->id); + buf_put_int16(bufp, fcall->tag); + + dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id, + fcall->tag); + + /* now encode it */ + switch (fcall->id) { + default: + eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id); + return -EPROTO; + case TVERSION: + buf_put_int32(bufp, fcall->params.tversion.msize); + buf_put_string(bufp, fcall->params.tversion.version); + break; + case TAUTH: + buf_put_int32(bufp, fcall->params.tauth.afid); + buf_put_string(bufp, fcall->params.tauth.uname); + buf_put_string(bufp, fcall->params.tauth.aname); + break; + case TFLUSH: + buf_put_int16(bufp, fcall->params.tflush.oldtag); + break; + case TATTACH: + buf_put_int32(bufp, fcall->params.tattach.fid); + buf_put_int32(bufp, fcall->params.tattach.afid); + buf_put_string(bufp, fcall->params.tattach.uname); + buf_put_string(bufp, fcall->params.tattach.aname); + break; + case TWALK: + buf_put_int32(bufp, fcall->params.twalk.fid); + buf_put_int32(bufp, fcall->params.twalk.newfid); + buf_put_int16(bufp, fcall->params.twalk.nwname); + for (i = 0; i < fcall->params.twalk.nwname; i++) + buf_put_string(bufp, fcall->params.twalk.wnames[i]); + break; + case TOPEN: + buf_put_int32(bufp, fcall->params.topen.fid); + buf_put_int8(bufp, fcall->params.topen.mode); + break; + case TCREATE: + buf_put_int32(bufp, fcall->params.tcreate.fid); + buf_put_string(bufp, fcall->params.tcreate.name); + buf_put_int32(bufp, fcall->params.tcreate.perm); + buf_put_int8(bufp, fcall->params.tcreate.mode); + break; + case TREAD: + buf_put_int32(bufp, fcall->params.tread.fid); + buf_put_int64(bufp, fcall->params.tread.offset); + buf_put_int32(bufp, fcall->params.tread.count); + break; + case TWRITE: + buf_put_int32(bufp, fcall->params.twrite.fid); + buf_put_int64(bufp, fcall->params.twrite.offset); + buf_put_int32(bufp, fcall->params.twrite.count); + buf_put_data(bufp, fcall->params.twrite.data, + fcall->params.twrite.count); + break; + case TCLUNK: + buf_put_int32(bufp, fcall->params.tclunk.fid); + break; + case TREMOVE: + buf_put_int32(bufp, fcall->params.tremove.fid); + break; + case TSTAT: + buf_put_int32(bufp, fcall->params.tstat.fid); + break; + case TWSTAT: + buf_put_int32(bufp, fcall->params.twstat.fid); + stat = fcall->params.twstat.stat; + + buf_put_int16(bufp, stat->size + 2); + serialize_stat(v9ses, stat, bufp); + break; + } + + if (buf_check_overflow(bufp)) + return -EIO; + + return fcall->size; +} + +/** + * deserialize_fcall - unmarshal a response + * @v9ses: session information + * @msgsize: size of rcall message + * @buf: recieved buffer + * @buflen: length of received buffer + * @rcall: fcall structure to populate + * @rcalllen: length of fcall structure to populate + * + */ + +int +v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, + void *buf, u32 buflen, struct v9fs_fcall *rcall, + int rcalllen) +{ + + struct cbuf buffer; + struct cbuf *bufp = &buffer; + struct cbuf dbuffer; + struct cbuf *dbufp = &dbuffer; + int i = 0; + + buf_init(bufp, buf, buflen); + buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall), + rcalllen - sizeof(struct v9fs_fcall)); + + rcall->size = msgsize; + rcall->id = buf_get_int8(bufp); + rcall->tag = buf_get_int16(bufp); + + dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id, + rcall->tag); + switch (rcall->id) { + default: + eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id); + return -EPROTO; + case RVERSION: + rcall->params.rversion.msize = buf_get_int32(bufp); + rcall->params.rversion.version = buf_get_stringb(bufp, dbufp); + break; + case RFLUSH: + break; + case RATTACH: + rcall->params.rattach.qid.type = buf_get_int8(bufp); + rcall->params.rattach.qid.version = buf_get_int32(bufp); + rcall->params.rattach.qid.path = buf_get_int64(bufp); + break; + case RWALK: + rcall->params.rwalk.nwqid = buf_get_int16(bufp); + rcall->params.rwalk.wqids = buf_alloc(bufp, + rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); + if (rcall->params.rwalk.wqids) + for (i = 0; i < rcall->params.rwalk.nwqid; i++) { + rcall->params.rwalk.wqids[i].type = + buf_get_int8(bufp); + rcall->params.rwalk.wqids[i].version = + buf_get_int16(bufp); + rcall->params.rwalk.wqids[i].path = + buf_get_int64(bufp); + } + break; + case ROPEN: + rcall->params.ropen.qid.type = buf_get_int8(bufp); + rcall->params.ropen.qid.version = buf_get_int32(bufp); + rcall->params.ropen.qid.path = buf_get_int64(bufp); + rcall->params.ropen.iounit = buf_get_int32(bufp); + break; + case RCREATE: + rcall->params.rcreate.qid.type = buf_get_int8(bufp); + rcall->params.rcreate.qid.version = buf_get_int32(bufp); + rcall->params.rcreate.qid.path = buf_get_int64(bufp); + rcall->params.rcreate.iounit = buf_get_int32(bufp); + break; + case RREAD: + rcall->params.rread.count = buf_get_int32(bufp); + rcall->params.rread.data = buf_get_datab(bufp, dbufp, + rcall->params.rread.count); + break; + case RWRITE: + rcall->params.rwrite.count = buf_get_int32(bufp); + break; + case RCLUNK: + break; + case RREMOVE: + break; + case RSTAT: + buf_get_int16(bufp); + rcall->params.rstat.stat = + deserialize_statb(v9ses, bufp, dbufp); + break; + case RWSTAT: + break; + case RERROR: + rcall->params.rerror.error = buf_get_stringb(bufp, dbufp); + if (v9ses->extended) + rcall->params.rerror.errno = buf_get_int16(bufp); + break; + } + + if (buf_check_overflow(bufp) || buf_check_overflow(dbufp)) + return -EIO; + + return rcall->size; +} diff --git a/fs/9p/conv.h b/fs/9p/conv.h new file mode 100644 index 00000000000..ee849613c61 --- /dev/null +++ b/fs/9p/conv.h @@ -0,0 +1,36 @@ +/* + * linux/fs/9p/conv.h + * + * 9P protocol conversion definitions + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf, + u32 buflen, struct v9fs_stat *stat, u32 statlen); +int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall, + void *buf, u32 buflen); +int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen, + void *buf, u32 buflen, struct v9fs_fcall *rcall, + int rcalllen); + +/* this one is actually in error.c right now */ +int v9fs_errstr2errno(char *errstr); diff --git a/fs/9p/debug.h b/fs/9p/debug.h new file mode 100644 index 00000000000..4445f06919d --- /dev/null +++ b/fs/9p/debug.h @@ -0,0 +1,70 @@ +/* + * linux/fs/9p/debug.h - V9FS Debug Definitions + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#define DEBUG_ERROR (1<<0) +#define DEBUG_CURRENT (1<<1) +#define DEBUG_9P (1<<2) +#define DEBUG_VFS (1<<3) +#define DEBUG_CONV (1<<4) +#define DEBUG_MUX (1<<5) +#define DEBUG_TRANS (1<<6) +#define DEBUG_SLABS (1<<7) + +#define DEBUG_DUMP_PKT 0 + +extern int v9fs_debug_level; + +#define dprintk(level, format, arg...) \ +do { \ + if((v9fs_debug_level & level)==level) \ + printk(KERN_NOTICE "-- %s (%d): " \ + format , __FUNCTION__, current->pid , ## arg); \ +} while(0) + +#define eprintk(level, format, arg...) \ +do { \ + printk(level "v9fs: %s (%d): " \ + format , __FUNCTION__, current->pid , ## arg); \ +} while(0) + +#if DEBUG_DUMP_PKT +static inline void dump_data(const unsigned char *data, unsigned int datalen) +{ + int i, j; + int len = datalen; + + printk(KERN_DEBUG "data "); + for (i = 0; i < len; i += 4) { + for (j = 0; (j < 4) && (i + j < len); j++) + printk(KERN_DEBUG "%02x", data[i + j]); + printk(KERN_DEBUG " "); + } + printk(KERN_DEBUG "\n"); +} +#else /* DEBUG_DUMP_PKT */ +static inline void dump_data(const unsigned char *data, unsigned int datalen) +{ + +} +#endif /* DEBUG_DUMP_PKT */ diff --git a/fs/9p/error.c b/fs/9p/error.c new file mode 100644 index 00000000000..fee5d19179c --- /dev/null +++ b/fs/9p/error.c @@ -0,0 +1,93 @@ +/* + * linux/fs/9p/error.c + * + * Error string handling + * + * Plan 9 uses error strings, Unix uses error numbers. These functions + * try to help manage that and provide for dynamically adding error + * mappings. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/list.h> +#include <linux/jhash.h> + +#include "debug.h" +#include "error.h" + +/** + * v9fs_error_init - preload + * @errstr: error string + * + */ + +int v9fs_error_init(void) +{ + struct errormap *c; + int bucket; + + /* initialize hash table */ + for (bucket = 0; bucket < ERRHASHSZ; bucket++) + INIT_HLIST_HEAD(&hash_errmap[bucket]); + + /* load initial error map into hash table */ + for (c = errmap; c->name != NULL; c++) { + bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ; + INIT_HLIST_NODE(&c->list); + hlist_add_head(&c->list, &hash_errmap[bucket]); + } + + return 1; +} + +/** + * errstr2errno - convert error string to error number + * @errstr: error string + * + */ + +int v9fs_errstr2errno(char *errstr) +{ + int errno = 0; + struct hlist_node *p = NULL; + struct errormap *c = NULL; + int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ; + + hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + if (!strcmp(c->name, errstr)) { + errno = c->val; + break; + } + } + + if (errno == 0) { + /* TODO: if error isn't found, add it dynamically */ + printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, + errstr); + errno = 1; + } + + return -errno; +} diff --git a/fs/9p/error.h b/fs/9p/error.h new file mode 100644 index 00000000000..78f89acf7c9 --- /dev/null +++ b/fs/9p/error.h @@ -0,0 +1,178 @@ +/* + * linux/fs/9p/error.h + * + * Huge Nasty Error Table + * + * Plan 9 uses error strings, Unix uses error numbers. This table tries to + * match UNIX strings and Plan 9 strings to unix error numbers. It is used + * to preload the dynamic error table which can also track user-specific error + * strings. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/errno.h> +#include <asm/errno.h> + +struct errormap { + char *name; + int val; + + struct hlist_node list; +}; + +#define ERRHASHSZ 32 +static struct hlist_head hash_errmap[ERRHASHSZ]; + +/* FixMe - reduce to a reasonable size */ +static struct errormap errmap[] = { + {"Operation not permitted", EPERM}, + {"wstat prohibited", EPERM}, + {"No such file or directory", ENOENT}, + {"directory entry not found", ENOENT}, + {"file not found", ENOENT}, + {"Interrupted system call", EINTR}, + {"Input/output error", EIO}, + {"No such device or address", ENXIO}, + {"Argument list too long", E2BIG}, + {"Bad file descriptor", EBADF}, + {"Resource temporarily unavailable", EAGAIN}, + {"Cannot allocate memory", ENOMEM}, + {"Permission denied", EACCES}, + {"Bad address", EFAULT}, + {"Block device required", ENOTBLK}, + {"Device or resource busy", EBUSY}, + {"File exists", EEXIST}, + {"Invalid cross-device link", EXDEV}, + {"No such device", ENODEV}, + {"Not a directory", ENOTDIR}, + {"Is a directory", EISDIR}, + {"Invalid argument", EINVAL}, + {"Too many open files in system", ENFILE}, + {"Too many open files", EMFILE}, + {"Text file busy", ETXTBSY}, + {"File too large", EFBIG}, + {"No space left on device", ENOSPC}, + {"Illegal seek", ESPIPE}, + {"Read-only file system", EROFS}, + {"Too many links", EMLINK}, + {"Broken pipe", EPIPE}, + {"Numerical argument out of domain", EDOM}, + {"Numerical result out of range", ERANGE}, + {"Resource deadlock avoided", EDEADLK}, + {"File name too long", ENAMETOOLONG}, + {"No locks available", ENOLCK}, + {"Function not implemented", ENOSYS}, + {"Directory not empty", ENOTEMPTY}, + {"Too many levels of symbolic links", ELOOP}, + {"No message of desired type", ENOMSG}, + {"Identifier removed", EIDRM}, + {"No data available", ENODATA}, + {"Machine is not on the network", ENONET}, + {"Package not installed", ENOPKG}, + {"Object is remote", EREMOTE}, + {"Link has been severed", ENOLINK}, + {"Communication error on send", ECOMM}, + {"Protocol error", EPROTO}, + {"Bad message", EBADMSG}, + {"File descriptor in bad state", EBADFD}, + {"Streams pipe error", ESTRPIPE}, + {"Too many users", EUSERS}, + {"Socket operation on non-socket", ENOTSOCK}, + {"Message too long", EMSGSIZE}, + {"Protocol not available", ENOPROTOOPT}, + {"Protocol not supported", EPROTONOSUPPORT}, + {"Socket type not supported", ESOCKTNOSUPPORT}, + {"Operation not supported", EOPNOTSUPP}, + {"Protocol family not supported", EPFNOSUPPORT}, + {"Network is down", ENETDOWN}, + {"Network is unreachable", ENETUNREACH}, + {"Network dropped connection on reset", ENETRESET}, + {"Software caused connection abort", ECONNABORTED}, + {"Connection reset by peer", ECONNRESET}, + {"No buffer space available", ENOBUFS}, + {"Transport endpoint is already connected", EISCONN}, + {"Transport endpoint is not connected", ENOTCONN}, + {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, + {"Connection timed out", ETIMEDOUT}, + {"Connection refused", ECONNREFUSED}, + {"Host is down", EHOSTDOWN}, + {"No route to host", EHOSTUNREACH}, + {"Operation already in progress", EALREADY}, + {"Operation now in progress", EINPROGRESS}, + {"Is a named type file", EISNAM}, + {"Remote I/O error", EREMOTEIO}, + {"Disk quota exceeded", EDQUOT}, +/* errors from fossil, vacfs, and u9fs */ + {"fid unknown or out of range", EBADF}, + {"permission denied", EACCES}, + {"file does not exist", ENOENT}, + {"authentication failed", ECONNREFUSED}, + {"bad offset in directory read", ESPIPE}, + {"bad use of fid", EBADF}, + {"wstat can't convert between files and directories", EPERM}, + {"directory is not empty", ENOTEMPTY}, + {"file exists", EEXIST}, + {"file already exists", EEXIST}, + {"file or directory already exists", EEXIST}, + {"fid already in use", EBADF}, + {"file in use", ETXTBSY}, + {"i/o error", EIO}, + {"file already open for I/O", ETXTBSY}, + {"illegal mode", EINVAL}, + {"illegal name", ENAMETOOLONG}, + {"not a directory", ENOTDIR}, + {"not a member of proposed group", EPERM}, + {"not owner", EACCES}, + {"only owner can change group in wstat", EACCES}, + {"read only file system", EROFS}, + {"no access to special file", EPERM}, + {"i/o count too large", EIO}, + {"unknown group", EINVAL}, + {"unknown user", EINVAL}, + {"bogus wstat buffer", EPROTO}, + {"exclusive use file already open", EAGAIN}, + {"corrupted directory entry", EIO}, + {"corrupted file entry", EIO}, + {"corrupted block label", EIO}, + {"corrupted meta data", EIO}, + {"illegal offset", EINVAL}, + {"illegal path element", ENOENT}, + {"root of file system is corrupted", EIO}, + {"corrupted super block", EIO}, + {"protocol botch", EPROTO}, + {"file system is full", ENOSPC}, + {"file is in use", EAGAIN}, + {"directory entry is not allocated", ENOENT}, + {"file is read only", EROFS}, + {"file has been removed", EIDRM}, + {"only support truncation to zero length", EPERM}, + {"cannot remove root", EPERM}, + {"file too big", EFBIG}, + {"venti i/o error", EIO}, + /* these are not errors */ + {"u9fs rhostsauth: no authentication required", 0}, + {"u9fs authnone: no authentication required", 0}, + {NULL, -1} +}; + +extern int v9fs_error_init(void); +extern int v9fs_errstr2errno(char *errstr); diff --git a/fs/9p/fid.c b/fs/9p/fid.c new file mode 100644 index 00000000000..821c9c4d76a --- /dev/null +++ b/fs/9p/fid.c @@ -0,0 +1,241 @@ +/* + * V9FS FID Management + * + * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "transport.h" +#include "mux.h" +#include "conv.h" +#include "fid.h" + +/** + * v9fs_fid_insert - add a fid to a dentry + * @fid: fid to add + * @dentry: dentry that it is being added to + * + */ + +static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry) +{ + struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; + dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid, + dentry->d_iname, dentry); + if (dentry->d_fsdata == NULL) { + dentry->d_fsdata = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (dentry->d_fsdata == NULL) { + dprintk(DEBUG_ERROR, "Out of memory\n"); + return -ENOMEM; + } + fid_list = (struct list_head *)dentry->d_fsdata; + INIT_LIST_HEAD(fid_list); /* Initialize list head */ + } + + fid->uid = current->uid; + fid->pid = current->pid; + list_add(&fid->list, fid_list); + return 0; +} + +/** + * v9fs_fid_create - allocate a FID structure + * @dentry - dentry to link newly created fid to + * + */ + +struct v9fs_fid *v9fs_fid_create(struct dentry *dentry) +{ + struct v9fs_fid *new; + + new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL); + if (new == NULL) { + dprintk(DEBUG_ERROR, "Out of Memory\n"); + return ERR_PTR(-ENOMEM); + } + + new->fid = -1; + new->fidopen = 0; + new->fidcreate = 0; + new->fidclunked = 0; + new->iounit = 0; + + if (v9fs_fid_insert(new, dentry) == 0) + return new; + else { + dprintk(DEBUG_ERROR, "Problems inserting to dentry\n"); + kfree(new); + return NULL; + } +} + +/** + * v9fs_fid_destroy - deallocate a FID structure + * @fid: fid to destroy + * + */ + +void v9fs_fid_destroy(struct v9fs_fid *fid) +{ + list_del(&fid->list); + kfree(fid); +} + +/** + * v9fs_fid_lookup - retrieve the right fid from a particular dentry + * @dentry: dentry to look for fid in + * @type: intent of lookup (operation or traversal) + * + * search list of fids associated with a dentry for a fid with a matching + * thread id or uid. If that fails, look up the dentry's parents to see if you + * can find a matching fid. + * + */ + +struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type) +{ + struct list_head *fid_list = (struct list_head *)dentry->d_fsdata; + struct v9fs_fid *current_fid = NULL; + struct v9fs_fid *temp = NULL; + struct v9fs_fid *return_fid = NULL; + int found_parent = 0; + int found_user = 0; + + dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry, + type); + + if (fid_list && !list_empty(fid_list)) { + list_for_each_entry_safe(current_fid, temp, fid_list, list) { + if (current_fid->uid == current->uid) { + if (return_fid == NULL) { + if ((type == FID_OP) + || (!current_fid->fidopen)) { + return_fid = current_fid; + found_user = 1; + } + } + } + if (current_fid->pid == current->real_parent->pid) { + if ((return_fid == NULL) || (found_parent) + || (found_user)) { + if ((type == FID_OP) + || (!current_fid->fidopen)) { + return_fid = current_fid; + found_parent = 1; + found_user = 0; + } + } + } + if (current_fid->pid == current->pid) { + if ((type == FID_OP) || + (!current_fid->fidopen)) { + return_fid = current_fid; + found_parent = 0; + found_user = 0; + } + } + } + } + + /* we are at the root but didn't match */ + if ((!return_fid) && (dentry->d_parent == dentry)) { + /* TODO: clone attach with new uid */ + return_fid = current_fid; + } + + if (!return_fid) { + struct dentry *par = current->fs->pwd->d_parent; + int count = 1; + while (par != NULL) { + if (par == dentry) + break; + count++; + if (par == par->d_parent) { + dprintk(DEBUG_ERROR, + "got to root without finding dentry\n"); + break; + } + par = par->d_parent; + } + +/* XXX - there may be some duplication we can get rid of */ + if (par == dentry) { + /* we need to fid_lookup the starting point */ + int fidnum = -1; + int oldfid = -1; + int result = -1; + struct v9fs_session_info *v9ses = + v9fs_inode2v9ses(current->fs->pwd->d_inode); + + current_fid = + v9fs_fid_lookup(current->fs->pwd, FID_WALK); + if (current_fid == NULL) { + dprintk(DEBUG_ERROR, + "process cwd doesn't have a fid\n"); + return return_fid; + } + oldfid = current_fid->fid; + par = current->fs->pwd; + /* TODO: take advantage of multiwalk */ + + fidnum = v9fs_get_idpool(&v9ses->fidpool); + if (fidnum < 0) { + dprintk(DEBUG_ERROR, + "could not get a new fid num\n"); + return return_fid; + } + + while (par != dentry) { + result = + v9fs_t_walk(v9ses, oldfid, fidnum, "..", + NULL); + if (result < 0) { + dprintk(DEBUG_ERROR, + "problem walking to parent\n"); + + break; + } + oldfid = fidnum; + if (par == par->d_parent) { + dprintk(DEBUG_ERROR, + "can't find dentry\n"); + break; + } + par = par->d_parent; + } + if (par == dentry) { + return_fid = v9fs_fid_create(dentry); + return_fid->fid = fidnum; + } + } + } + + return return_fid; +} diff --git a/fs/9p/fid.h b/fs/9p/fid.h new file mode 100644 index 00000000000..7db478ccca3 --- /dev/null +++ b/fs/9p/fid.h @@ -0,0 +1,57 @@ +/* + * V9FS FID Management + * + * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/list.h> + +#define FID_OP 0 +#define FID_WALK 1 + +struct v9fs_fid { + struct list_head list; /* list of fids associated with a dentry */ + struct list_head active; /* XXX - debug */ + + u32 fid; + unsigned char fidopen; /* set when fid is opened */ + unsigned char fidcreate; /* set when fid was just created */ + unsigned char fidclunked; /* set when fid has already been clunked */ + + struct v9fs_qid qid; + u32 iounit; + + /* readdir stuff */ + int rdir_fpos; + loff_t rdir_pos; + struct v9fs_fcall *rdir_fcall; + + /* management stuff */ + pid_t pid; /* thread associated with this fid */ + uid_t uid; /* user associated with this fid */ + + /* private data */ + struct file *filp; /* backpointer to File struct for open files */ + struct v9fs_session_info *v9ses; /* session info for this FID */ +}; + +struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type); +void v9fs_fid_destroy(struct v9fs_fid *fid); +struct v9fs_fid *v9fs_fid_create(struct dentry *); diff --git a/fs/9p/mux.c b/fs/9p/mux.c new file mode 100644 index 00000000000..8835b576f74 --- /dev/null +++ b/fs/9p/mux.c @@ -0,0 +1,475 @@ +/* + * linux/fs/9p/mux.c + * + * Protocol Multiplexer + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/kthread.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "transport.h" +#include "conv.h" +#include "mux.h" + +/** + * dprintcond - print condition of session info + * @v9ses: session info structure + * @req: RPC request structure + * + */ + +static inline int +dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +{ + dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status, + req->rcall); + return 0; +} + +/** + * xread - force read of a certain number of bytes + * @v9ses: session info structure + * @ptr: pointer to buffer + * @sz: number of bytes to read + * + * Chuck Cranor CS-533 project1 + */ + +static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz) +{ + int rd = 0; + int ret = 0; + while (rd < sz) { + ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd); + if (ret <= 0) { + dprintk(DEBUG_ERROR, "xread errno %d\n", ret); + return ret; + } + rd += ret; + ptr += ret; + } + return (rd); +} + +/** + * read_message - read a full 9P2000 fcall packet + * @v9ses: session info structure + * @rcall: fcall structure to read into + * @rcalllen: size of fcall buffer + * + */ + +static int +read_message(struct v9fs_session_info *v9ses, + struct v9fs_fcall *rcall, int rcalllen) +{ + unsigned char buf[4]; + void *data; + int size = 0; + int res = 0; + + res = xread(v9ses, buf, sizeof(buf)); + if (res < 0) { + dprintk(DEBUG_ERROR, + "Reading of count field failed returned: %d\n", res); + return res; + } + + if (res < 4) { + dprintk(DEBUG_ERROR, + "Reading of count field failed returned: %d\n", res); + return -EIO; + } + + size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); + dprintk(DEBUG_MUX, "got a packet count: %d\n", size); + + /* adjust for the four bytes of size */ + size -= 4; + + if (size > v9ses->maxdata) { + dprintk(DEBUG_ERROR, "packet too big: %d\n", size); + return -E2BIG; + } + + data = kmalloc(size, GFP_KERNEL); + if (!data) { + eprintk(KERN_WARNING, "out of memory\n"); + return -ENOMEM; + } + + res = xread(v9ses, data, size); + if (res < size) { + dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n", + res); + kfree(data); + return res; + } + + /* we now have an in-memory string that is the reply. + * deserialize it. There is very little to go wrong at this point + * save for v9fs_alloc errors. + */ + res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata, + rcall, rcalllen); + + kfree(data); + + if (res < 0) + return res; + + return 0; +} + +/** + * v9fs_recv - receive an RPC response for a particular tag + * @v9ses: session info structure + * @req: RPC request structure + * + */ + +static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +{ + int ret = 0; + + dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag); + ret = wait_event_interruptible(v9ses->read_wait, + ((v9ses->transport->status != Connected) || + (req->rcall != 0) || (req->err < 0) || + dprintcond(v9ses, req))); + + dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall); + + spin_lock(&v9ses->muxlock); + list_del(&req->next); + spin_unlock(&v9ses->muxlock); + + if (req->err < 0) + return req->err; + + if (v9ses->transport->status == Disconnected) + return -ECONNRESET; + + return ret; +} + +/** + * v9fs_send - send a 9P request + * @v9ses: session info structure + * @req: RPC request to send + * + */ + +static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req) +{ + int ret = -1; + void *data = NULL; + struct v9fs_fcall *tcall = req->tcall; + + data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); + if (!data) + return -ENOMEM; + + tcall->size = 0; /* enforce size recalculation */ + ret = + v9fs_serialize_fcall(v9ses, tcall, data, + v9ses->maxdata + V9FS_IOHDRSZ); + if (ret < 0) + goto free_data; + + spin_lock(&v9ses->muxlock); + list_add(&req->next, &v9ses->mux_fcalls); + spin_unlock(&v9ses->muxlock); + + dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag, + tcall->size); + ret = v9ses->transport->write(v9ses->transport, data, tcall->size); + + if (ret != tcall->size) { + spin_lock(&v9ses->muxlock); + list_del(&req->next); + kfree(req->rcall); + + spin_unlock(&v9ses->muxlock); + if (ret >= 0) + ret = -EREMOTEIO; + } else + ret = 0; + + free_data: + kfree(data); + return ret; +} + +/** + * v9fs_mux_rpc - send a request, receive a response + * @v9ses: session info structure + * @tcall: fcall to send + * @rcall: buffer to place response into + * + */ + +long +v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall, + struct v9fs_fcall **rcall) +{ + int tid = -1; + struct v9fs_fcall *fcall = NULL; + struct v9fs_rpcreq req; + int ret = -1; + + if (!v9ses) + return -EINVAL; + + if (!v9ses->transport || v9ses->transport->status != Connected) + return -EIO; + + if (rcall) + *rcall = NULL; + + if (tcall->id != TVERSION) { + tid = v9fs_get_idpool(&v9ses->tidpool); + if (tid < 0) + return -ENOMEM; + } + + tcall->tag = tid; + + req.tcall = tcall; + req.err = 0; + req.rcall = NULL; + + ret = v9fs_send(v9ses, &req); + + if (ret < 0) { + if (tcall->id != TVERSION) + v9fs_put_idpool(tid, &v9ses->tidpool); + dprintk(DEBUG_MUX, "error %d\n", ret); + return ret; + } + + ret = v9fs_recv(v9ses, &req); + + fcall = req.rcall; + + dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret); + if (ret == -ERESTARTSYS) { + if (v9ses->transport->status != Disconnected + && tcall->id != TFLUSH) { + unsigned long flags; + + dprintk(DEBUG_MUX, "flushing the tag: %d\n", + tcall->tag); + clear_thread_flag(TIF_SIGPENDING); + v9fs_t_flush(v9ses, tcall->tag); + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, + flags); + dprintk(DEBUG_MUX, "flushing done\n"); + } + + goto release_req; + } else if (ret < 0) + goto release_req; + + if (!fcall) + ret = -EIO; + else { + if (fcall->id == RERROR) { + ret = v9fs_errstr2errno(fcall->params.rerror.error); + if (ret == 0) { /* string match failed */ + if (fcall->params.rerror.errno) + ret = -(fcall->params.rerror.errno); + else + ret = -ESERVERFAULT; + } + } else if (fcall->id != tcall->id + 1) { + dprintk(DEBUG_ERROR, + "fcall mismatch: expected %d, got %d\n", + tcall->id + 1, fcall->id); + ret = -EIO; + } + } + + release_req: + if (tcall->id != TVERSION) + v9fs_put_idpool(tid, &v9ses->tidpool); + if (rcall) + *rcall = fcall; + else + kfree(fcall); + + return ret; +} + +/** + * v9fs_mux_cancel_requests - cancels all pending requests + * + * @v9ses: session info structure + * @err: error code to return to the requests + */ +void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err) +{ + struct v9fs_rpcreq *rptr; + struct v9fs_rpcreq *rreq; + + dprintk(DEBUG_MUX, " %d\n", err); + spin_lock(&v9ses->muxlock); + list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { + rreq->err = err; + } + spin_unlock(&v9ses->muxlock); + wake_up_all(&v9ses->read_wait); +} + +/** + * v9fs_recvproc - kproc to handle demultiplexing responses + * @data: session info structure + * + */ + +static int v9fs_recvproc(void *data) +{ + struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data; + struct v9fs_fcall *rcall = NULL; + struct v9fs_rpcreq *rptr; + struct v9fs_rpcreq *req; + struct v9fs_rpcreq *rreq; + int err = 0; + + allow_signal(SIGKILL); + set_current_state(TASK_INTERRUPTIBLE); + complete(&v9ses->proccmpl); + while (!kthread_should_stop() && err >= 0) { + req = rptr = rreq = NULL; + + rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL); + if (!rcall) { + eprintk(KERN_ERR, "no memory for buffers\n"); + break; + } + + err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ); + spin_lock(&v9ses->muxlock); + if (err < 0) { + list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { + rreq->err = err; + } + if(err != -ERESTARTSYS) + eprintk(KERN_ERR, + "Transport error while reading message %d\n", err); + } else { + list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) { + if (rreq->tcall->tag == rcall->tag) { + req = rreq; + req->rcall = rcall; + break; + } + } + } + + if (req && (req->tcall->id == TFLUSH)) { + struct v9fs_rpcreq *treq = NULL; + list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) { + if (treq->tcall->tag == + req->tcall->params.tflush.oldtag) { + list_del(&rptr->next); + kfree(treq->rcall); + break; + } + } + } + + spin_unlock(&v9ses->muxlock); + + if (!req) { + if (err >= 0) + dprintk(DEBUG_ERROR, + "unexpected response: id %d tag %d\n", + rcall->id, rcall->tag); + + kfree(rcall); + } + + wake_up_all(&v9ses->read_wait); + set_current_state(TASK_INTERRUPTIBLE); + } + + v9ses->transport->close(v9ses->transport); + + /* Inform all pending processes about the failure */ + wake_up_all(&v9ses->read_wait); + + if (signal_pending(current)) + complete(&v9ses->proccmpl); + + dprintk(DEBUG_MUX, "recvproc: end\n"); + v9ses->recvproc = NULL; + + return err >= 0; +} + +/** + * v9fs_mux_init - initialize multiplexer (spawn kproc) + * @v9ses: session info structure + * @dev_name: mount device information (to create unique kproc) + * + */ + +int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name) +{ + char procname[60]; + + strncpy(procname, dev_name, sizeof(procname)); + procname[sizeof(procname) - 1] = 0; + + init_waitqueue_head(&v9ses->read_wait); + init_completion(&v9ses->fcread); + init_completion(&v9ses->proccmpl); + spin_lock_init(&v9ses->muxlock); + INIT_LIST_HEAD(&v9ses->mux_fcalls); + v9ses->recvproc = NULL; + v9ses->curfcall = NULL; + + v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses, + "v9fs_recvproc %s", procname); + + if (IS_ERR(v9ses->recvproc)) { + eprintk(KERN_ERR, "cannot create receiving thread\n"); + v9fs_session_close(v9ses); + return -ECONNABORTED; + } + + wake_up_process(v9ses->recvproc); + wait_for_completion(&v9ses->proccmpl); + + return 0; +} diff --git a/fs/9p/mux.h b/fs/9p/mux.h new file mode 100644 index 00000000000..4994cb10bad --- /dev/null +++ b/fs/9p/mux.h @@ -0,0 +1,41 @@ +/* + * linux/fs/9p/mux.h + * + * Multiplexer Definitions + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +/* structure to manage each RPC transaction */ + +struct v9fs_rpcreq { + struct v9fs_fcall *tcall; + struct v9fs_fcall *rcall; + int err; /* error code if response failed */ + + /* XXX - could we put scatter/gather buffers here? */ + + struct list_head next; +}; + +int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name); +long v9fs_mux_rpc(struct v9fs_session_info *v9ses, + struct v9fs_fcall *tcall, struct v9fs_fcall **rcall); +void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err); diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c new file mode 100644 index 00000000000..63b58ce98ff --- /dev/null +++ b/fs/9p/trans_fd.c @@ -0,0 +1,172 @@ +/* + * linux/fs/9p/trans_fd.c + * + * File Descriptor Transport Layer + * + * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <asm/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> + +#include "debug.h" +#include "v9fs.h" +#include "transport.h" + +struct v9fs_trans_fd { + struct file *in_file; + struct file *out_file; +}; + +/** + * v9fs_fd_recv - receive from a socket + * @v9ses: session information + * @v: buffer to receive data into + * @len: size of receive buffer + * + */ + +static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len) +{ + struct v9fs_trans_fd *ts = trans ? trans->priv : NULL; + + if (!trans || trans->status != Connected || !ts) + return -EIO; + + return kernel_read(ts->in_file, ts->in_file->f_pos, v, len); +} + +/** + * v9fs_fd_send - send to a socket + * @v9ses: session information + * @v: buffer to send data from + * @len: size of send buffer + * + */ + +static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len) +{ + struct v9fs_trans_fd *ts = trans ? trans->priv : NULL; + mm_segment_t oldfs = get_fs(); + int ret = 0; + + if (!trans || trans->status != Connected || !ts) + return -EIO; + + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos); + set_fs(oldfs); + + return ret; +} + +/** + * v9fs_fd_init - initialize file descriptor transport + * @v9ses: session information + * @addr: address of server to mount + * @data: mount options + * + */ + +static int +v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data) +{ + struct v9fs_trans_fd *ts = NULL; + struct v9fs_transport *trans = v9ses->transport; + + if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) { + printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + return -ENOPROTOOPT; + } + + sema_init(&trans->writelock, 1); + sema_init(&trans->readlock, 1); + + ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL); + + if (!ts) + return -ENOMEM; + + ts->in_file = fget( v9ses->rfdno ); + ts->out_file = fget( v9ses->wfdno ); + + if (!ts->in_file || !ts->out_file) { + if (ts->in_file) + fput(ts->in_file); + + if (ts->out_file) + fput(ts->out_file); + + kfree(ts); + return -EIO; + } + + trans->priv = ts; + trans->status = Connected; + + return 0; +} + + +/** + * v9fs_fd_close - shutdown file descriptor + * @trans: private socket structure + * + */ + +static void v9fs_fd_close(struct v9fs_transport *trans) +{ + struct v9fs_trans_fd *ts; + + if (!trans) + return; + + trans->status = Disconnected; + ts = trans->priv; + + if (!ts) + return; + + if (ts->in_file) + fput(ts->in_file); + + if (ts->out_file) + fput(ts->out_file); + + kfree(ts); +} + +struct v9fs_transport v9fs_trans_fd = { + .init = v9fs_fd_init, + .write = v9fs_fd_send, + .read = v9fs_fd_recv, + .close = v9fs_fd_close, +}; + diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c new file mode 100644 index 00000000000..01e26f0013a --- /dev/null +++ b/fs/9p/trans_sock.c @@ -0,0 +1,290 @@ +/* + * linux/fs/9p/trans_socket.c + * + * Socket Transport Layer + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <asm/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "transport.h" + +#define V9FS_PORT 564 + +struct v9fs_trans_sock { + struct socket *s; +}; + +/** + * v9fs_sock_recv - receive from a socket + * @v9ses: session information + * @v: buffer to receive data into + * @len: size of receive buffer + * + */ + +static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len) +{ + struct msghdr msg; + struct kvec iov; + int result; + mm_segment_t oldfs; + struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + + if (trans->status == Disconnected) + return -EREMOTEIO; + + result = -EINVAL; + + oldfs = get_fs(); + set_fs(get_ds()); + + iov.iov_base = v; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_flags = MSG_NOSIGNAL; + + result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0); + + dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state); + set_fs(oldfs); + + if (result <= 0) { + if (result != -ERESTARTSYS) + trans->status = Disconnected; + } + + return result; +} + +/** + * v9fs_sock_send - send to a socket + * @v9ses: session information + * @v: buffer to send data from + * @len: size of send buffer + * + */ + +static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len) +{ + struct kvec iov; + struct msghdr msg; + int result = -1; + mm_segment_t oldfs; + struct v9fs_trans_sock *ts = trans ? trans->priv : NULL; + + dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len); + dump_data(v, len); + + down(&trans->writelock); + + oldfs = get_fs(); + set_fs(get_ds()); + iov.iov_base = v; + iov.iov_len = len; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_flags = MSG_NOSIGNAL; + result = kernel_sendmsg(ts->s, &msg, &iov, 1, len); + set_fs(oldfs); + + if (result < 0) { + if (result != -ERESTARTSYS) + trans->status = Disconnected; + } + + up(&trans->writelock); + return result; +} + +/** + * v9fs_tcp_init - initialize TCP socket + * @v9ses: session information + * @addr: address of server to mount + * @data: mount options + * + */ + +static int +v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data) +{ + struct socket *csocket = NULL; + struct sockaddr_in sin_server; + int rc = 0; + struct v9fs_trans_sock *ts = NULL; + struct v9fs_transport *trans = v9ses->transport; + + sema_init(&trans->writelock, 1); + sema_init(&trans->readlock, 1); + + ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); + + if (!ts) + return -ENOMEM; + + trans->priv = ts; + ts->s = NULL; + + if (!addr) + return -EINVAL; + + dprintk(DEBUG_TRANS, "Connecting to %s\n", addr); + + sin_server.sin_family = AF_INET; + sin_server.sin_addr.s_addr = in_aton(addr); + sin_server.sin_port = htons(v9ses->port); + sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + rc = csocket->ops->connect(csocket, + (struct sockaddr *)&sin_server, + sizeof(struct sockaddr_in), 0); + if (rc < 0) { + eprintk(KERN_ERR, + "v9fs_trans_tcp: problem connecting socket to %s\n", + addr); + return rc; + } + csocket->sk->sk_allocation = GFP_NOIO; + ts->s = csocket; + trans->status = Connected; + + return 0; +} + +/** + * v9fs_unix_init - initialize UNIX domain socket + * @v9ses: session information + * @dev_name: path to named pipe + * @data: mount options + * + */ + +static int +v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name, + char *data) +{ + int rc; + struct socket *csocket; + struct sockaddr_un sun_server; + struct v9fs_transport *trans; + struct v9fs_trans_sock *ts; + + rc = 0; + csocket = NULL; + trans = v9ses->transport; + + if (strlen(dev_name) > UNIX_PATH_MAX) { + eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n", + dev_name); + return -ENOMEM; + } + + ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + trans->priv = ts; + ts->s = NULL; + + sema_init(&trans->writelock, 1); + sema_init(&trans->readlock, 1); + + sun_server.sun_family = PF_UNIX; + strcpy(sun_server.sun_path, dev_name); + sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */ + if (rc < 0) { + eprintk(KERN_ERR, + "v9fs_trans_unix: problem connecting socket: %s: %d\n", + dev_name, rc); + return rc; + } + csocket->sk->sk_allocation = GFP_NOIO; + ts->s = csocket; + trans->status = Connected; + + return 0; +} + +/** + * v9fs_sock_close - shutdown socket + * @trans: private socket structure + * + */ + +static void v9fs_sock_close(struct v9fs_transport *trans) +{ + struct v9fs_trans_sock *ts; + + if (!trans) + return; + + ts = trans->priv; + + if ((ts) && (ts->s)) { + dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s); + sock_release(ts->s); + ts->s = NULL; + trans->status = Disconnected; + dprintk(DEBUG_TRANS, "socket closed\n"); + } + + if (ts) + kfree(ts); + + trans->priv = NULL; +} + +struct v9fs_transport v9fs_trans_tcp = { + .init = v9fs_tcp_init, + .write = v9fs_sock_send, + .read = v9fs_sock_recv, + .close = v9fs_sock_close, +}; + +struct v9fs_transport v9fs_trans_unix = { + .init = v9fs_unix_init, + .write = v9fs_sock_send, + .read = v9fs_sock_recv, + .close = v9fs_sock_close, +}; diff --git a/fs/9p/transport.h b/fs/9p/transport.h new file mode 100644 index 00000000000..9e9cd418efd --- /dev/null +++ b/fs/9p/transport.h @@ -0,0 +1,46 @@ +/* + * linux/fs/9p/transport.h + * + * Transport Definition + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +enum v9fs_transport_status { + Connected, + Disconnected, + Hung, +}; + +struct v9fs_transport { + enum v9fs_transport_status status; + struct semaphore writelock; + struct semaphore readlock; + void *priv; + + int (*init) (struct v9fs_session_info *, const char *, char *); + int (*write) (struct v9fs_transport *, void *, int); + int (*read) (struct v9fs_transport *, void *, int); + void (*close) (struct v9fs_transport *); +}; + +extern struct v9fs_transport v9fs_trans_tcp; +extern struct v9fs_transport v9fs_trans_unix; +extern struct v9fs_transport v9fs_trans_fd; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c new file mode 100644 index 00000000000..13bdbbab438 --- /dev/null +++ b/fs/9p/v9fs.c @@ -0,0 +1,452 @@ +/* + * linux/fs/9p/v9fs.c + * + * This file contains functions assisting in mapping VFS to 9P2000 + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/parser.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "transport.h" +#include "mux.h" +#include "conv.h" + +/* TODO: sysfs or debugfs interface */ +int v9fs_debug_level = 0; /* feature-rific global debug level */ + +/* + * Option Parsing (code inspired by NFS code) + * + */ + +enum { + /* Options that take integer arguments */ + Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug, + Opt_rfdno, Opt_wfdno, + /* String options */ + Opt_name, Opt_remotename, + /* Options that take no arguments */ + Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd, + /* Error token */ + Opt_err +}; + +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_msize, "msize=%u"}, + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_afid, "afid=%u"}, + {Opt_rfdno, "rfdno=%u"}, + {Opt_wfdno, "wfdno=%u"}, + {Opt_debug, "debug=%u"}, + {Opt_name, "name=%s"}, + {Opt_remotename, "aname=%s"}, + {Opt_unix, "proto=unix"}, + {Opt_tcp, "proto=tcp"}, + {Opt_fd, "proto=fd"}, + {Opt_tcp, "tcp"}, + {Opt_unix, "unix"}, + {Opt_fd, "fd"}, + {Opt_legacy, "noextend"}, + {Opt_nodevmap, "nodevmap"}, + {Opt_err, NULL} +}; + +/* + * Parse option string. + */ + +/** + * v9fs_parse_options - parse mount options into session structure + * @options: options string passed from mount + * @v9ses: existing v9fs session information + * + */ + +static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int ret; + + /* setup defaults */ + v9ses->port = V9FS_PORT; + v9ses->maxdata = 9000; + v9ses->proto = PROTO_TCP; + v9ses->extended = 1; + v9ses->afid = ~0; + v9ses->debug = 0; + v9ses->rfdno = ~0; + v9ses->wfdno = ~0; + + if (!options) + return; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + if (token < Opt_name) { + if ((ret = match_int(&args[0], &option)) < 0) { + dprintk(DEBUG_ERROR, + "integer field, but no integer?\n"); + continue; + } + + } + switch (token) { + case Opt_port: + v9ses->port = option; + break; + case Opt_msize: + v9ses->maxdata = option; + break; + case Opt_uid: + v9ses->uid = option; + break; + case Opt_gid: + v9ses->gid = option; + break; + case Opt_afid: + v9ses->afid = option; + break; + case Opt_rfdno: + v9ses->rfdno = option; + break; + case Opt_wfdno: + v9ses->wfdno = option; + break; + case Opt_debug: + v9ses->debug = option; + break; + case Opt_tcp: + v9ses->proto = PROTO_TCP; + break; + case Opt_unix: + v9ses->proto = PROTO_UNIX; + break; + case Opt_fd: + v9ses->proto = PROTO_FD; + break; + case Opt_name: + match_strcpy(v9ses->name, &args[0]); + break; + case Opt_remotename: + match_strcpy(v9ses->remotename, &args[0]); + break; + case Opt_legacy: + v9ses->extended = 0; + break; + case Opt_nodevmap: + v9ses->nodev = 1; + break; + default: + continue; + } + } +} + +/** + * v9fs_inode2v9ses - safely extract v9fs session info from super block + * @inode: inode to extract information from + * + * Paranoid function to extract v9ses information from superblock, + * if anything is missing it will report an error. + * + */ + +struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) +{ + return (inode->i_sb->s_fs_info); +} + +/** + * v9fs_get_idpool - allocate numeric id from pool + * @p - pool to allocate from + * + * XXX - This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +int v9fs_get_idpool(struct v9fs_idpool *p) +{ + int i = 0; + int error; + +retry: + if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) + return 0; + + if (down_interruptible(&p->lock) == -EINTR) { + eprintk(KERN_WARNING, "Interrupted while locking\n"); + return -1; + } + + error = idr_get_new(&p->pool, NULL, &i); + up(&p->lock); + + if (error == -EAGAIN) + goto retry; + else if (error) + return -1; + + return i; +} + +/** + * v9fs_put_idpool - release numeric id from pool + * @p - pool to allocate from + * + * XXX - This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +void v9fs_put_idpool(int id, struct v9fs_idpool *p) +{ + if (down_interruptible(&p->lock) == -EINTR) { + eprintk(KERN_WARNING, "Interrupted while locking\n"); + return; + } + idr_remove(&p->pool, id); + up(&p->lock); +} + +/** + * v9fs_session_init - initialize session + * @v9ses: session information structure + * @dev_name: device being mounted + * @data: options + * + */ + +int +v9fs_session_init(struct v9fs_session_info *v9ses, + const char *dev_name, char *data) +{ + struct v9fs_fcall *fcall = NULL; + struct v9fs_transport *trans_proto; + int n = 0; + int newfid = -1; + int retval = -EINVAL; + + v9ses->name = __getname(); + if (!v9ses->name) + return -ENOMEM; + + v9ses->remotename = __getname(); + if (!v9ses->remotename) { + putname(v9ses->name); + return -ENOMEM; + } + + strcpy(v9ses->name, V9FS_DEFUSER); + strcpy(v9ses->remotename, V9FS_DEFANAME); + + v9fs_parse_options(data, v9ses); + + /* set global debug level */ + v9fs_debug_level = v9ses->debug; + + /* id pools that are session-dependent: FIDs and TIDs */ + idr_init(&v9ses->fidpool.pool); + init_MUTEX(&v9ses->fidpool.lock); + idr_init(&v9ses->tidpool.pool); + init_MUTEX(&v9ses->tidpool.lock); + + + switch (v9ses->proto) { + case PROTO_TCP: + trans_proto = &v9fs_trans_tcp; + break; + case PROTO_UNIX: + trans_proto = &v9fs_trans_unix; + *v9ses->remotename = 0; + break; + case PROTO_FD: + trans_proto = &v9fs_trans_fd; + *v9ses->remotename = 0; + break; + default: + printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto); + retval = -ENOPROTOOPT; + goto SessCleanUp; + }; + + v9ses->transport = trans_proto; + + if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { + eprintk(KERN_ERR, "problem initializing transport\n"); + goto SessCleanUp; + } + + v9ses->inprogress = 0; + v9ses->shutdown = 0; + v9ses->session_hung = 0; + + if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) { + dprintk(DEBUG_ERROR, "problem initializing mux\n"); + goto SessCleanUp; + } + + if (v9ses->afid == ~0) { + if (v9ses->extended) + retval = + v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u", + &fcall); + else + retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000", + &fcall); + + if (retval < 0) { + dprintk(DEBUG_ERROR, "v9fs_t_version failed\n"); + goto FreeFcall; + } + + /* Really should check for 9P1 and report error */ + if (!strcmp(fcall->params.rversion.version, "9P2000.u")) { + dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n"); + v9ses->extended = 1; + } else { + dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n"); + v9ses->extended = 0; + } + + n = fcall->params.rversion.msize; + kfree(fcall); + + if (n < v9ses->maxdata) + v9ses->maxdata = n; + } + + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "couldn't allocate FID\n"); + retval = -ENOMEM; + goto SessCleanUp; + } + /* it is a little bit ugly, but we have to prevent newfid */ + /* being the same as afid, so if it is, get a new fid */ + if (v9ses->afid != ~0 && newfid == v9ses->afid) { + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "couldn't allocate FID\n"); + retval = -ENOMEM; + goto SessCleanUp; + } + } + + if ((retval = + v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid, + v9ses->afid, NULL)) + < 0) { + dprintk(DEBUG_ERROR, "cannot attach\n"); + goto SessCleanUp; + } + + if (v9ses->afid != ~0) { + if (v9fs_t_clunk(v9ses, v9ses->afid, NULL)) + dprintk(DEBUG_ERROR, "clunk failed\n"); + } + + return newfid; + + FreeFcall: + kfree(fcall); + + SessCleanUp: + v9fs_session_close(v9ses); + return retval; +} + +/** + * v9fs_session_close - shutdown a session + * @v9ses: session information structure + * + */ + +void v9fs_session_close(struct v9fs_session_info *v9ses) +{ + if (v9ses->recvproc) { + send_sig(SIGKILL, v9ses->recvproc, 1); + wait_for_completion(&v9ses->proccmpl); + } + + if (v9ses->transport) + v9ses->transport->close(v9ses->transport); + + putname(v9ses->name); + putname(v9ses->remotename); +} + +/** + * v9fs_session_cancel - mark transport as disconnected + * and cancel all pending requests. + */ +void v9fs_session_cancel(struct v9fs_session_info *v9ses) { + v9ses->transport->status = Disconnected; + v9fs_mux_cancel_requests(v9ses, -EIO); +} + +extern int v9fs_error_init(void); + +/** + * v9fs_init - Initialize module + * + */ + +static int __init init_v9fs(void) +{ + v9fs_error_init(); + + printk(KERN_INFO "Installing v9fs 9P2000 file system support\n"); + + return register_filesystem(&v9fs_fs_type); +} + +/** + * v9fs_init - shutdown module + * + */ + +static void __exit exit_v9fs(void) +{ + unregister_filesystem(&v9fs_fs_type); +} + +module_init(init_v9fs) +module_exit(exit_v9fs) + +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); +MODULE_LICENSE("GPL"); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h new file mode 100644 index 00000000000..45dcef42bdd --- /dev/null +++ b/fs/9p/v9fs.h @@ -0,0 +1,103 @@ +/* + * V9FS definitions. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +/* + * Idpool structure provides lock and id management + * + */ + +struct v9fs_idpool { + struct semaphore lock; + struct idr pool; +}; + +/* + * Session structure provides information for an opened session + * + */ + +struct v9fs_session_info { + /* options */ + unsigned int maxdata; + unsigned char extended; /* set to 1 if we are using UNIX extensions */ + unsigned char nodev; /* set to 1 if no disable device mapping */ + unsigned short port; /* port to connect to */ + unsigned short debug; /* debug level */ + unsigned short proto; /* protocol to use */ + unsigned int afid; /* authentication fid */ + unsigned int rfdno; /* read file descriptor number */ + unsigned int wfdno; /* write file descriptor number */ + + + char *name; /* user name to mount as */ + char *remotename; /* name of remote hierarchy being mounted */ + unsigned int uid; /* default uid/muid for legacy support */ + unsigned int gid; /* default gid for legacy support */ + + /* book keeping */ + struct v9fs_idpool fidpool; /* The FID pool for file descriptors */ + struct v9fs_idpool tidpool; /* The TID pool for transactions ids */ + + /* transport information */ + struct v9fs_transport *transport; + + int inprogress; /* session in progress => true */ + int shutdown; /* session shutting down. no more attaches. */ + unsigned char session_hung; + + /* mux private data */ + struct v9fs_fcall *curfcall; + wait_queue_head_t read_wait; + struct completion fcread; + struct completion proccmpl; + struct task_struct *recvproc; + + spinlock_t muxlock; + struct list_head mux_fcalls; +}; + +/* possible values of ->proto */ +enum { + PROTO_TCP, + PROTO_UNIX, + PROTO_FD, +}; + +int v9fs_session_init(struct v9fs_session_info *, const char *, char *); +struct v9fs_session_info *v9fs_inode2v9ses(struct inode *); +void v9fs_session_close(struct v9fs_session_info *v9ses); +int v9fs_get_idpool(struct v9fs_idpool *p); +void v9fs_put_idpool(int id, struct v9fs_idpool *p); +void v9fs_session_cancel(struct v9fs_session_info *v9ses); + +#define V9FS_MAGIC 0x01021997 + +/* other default globals */ +#define V9FS_PORT 564 +#define V9FS_DEFUSER "nobody" +#define V9FS_DEFANAME "" + +/* inital pool sizes for fids and tags */ +#define V9FS_START_FIDS 8192 +#define V9FS_START_TIDS 256 diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h new file mode 100644 index 00000000000..2f2cea7ee3e --- /dev/null +++ b/fs/9p/v9fs_vfs.h @@ -0,0 +1,53 @@ +/* + * V9FS VFS extensions. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +/* plan9 semantics are that created files are implicitly opened. + * But linux semantics are that you call create, then open. + * the plan9 approach is superior as it provides an atomic + * open. + * we track the create fid here. When the file is opened, if fidopen is + * non-zero, we use the fid and can skip some steps. + * there may be a better way to do this, but I don't know it. + * one BAD way is to clunk the fid on create, then open it again: + * you lose the atomicity of file open + */ + +/* special case: + * unlink calls remove, which is an implicit clunk. So we have to track + * that kind of thing so that we don't try to clunk a dead fid. + */ + +extern struct file_system_type v9fs_fs_type; +extern struct file_operations v9fs_file_operations; +extern struct file_operations v9fs_dir_operations; +extern struct dentry_operations v9fs_dentry_operations; + +struct inode *v9fs_get_inode(struct super_block *sb, int mode); +ino_t v9fs_qid2ino(struct v9fs_qid *qid); +void v9fs_mistat2inode(struct v9fs_stat *, struct inode *, + struct super_block *); +int v9fs_dir_release(struct inode *inode, struct file *filp); +int v9fs_file_open(struct inode *inode, struct file *file); +void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat); +void v9fs_dentry_release(struct dentry *); diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c new file mode 100644 index 00000000000..306c96741f8 --- /dev/null +++ b/fs/9p/vfs_dentry.c @@ -0,0 +1,126 @@ +/* + * linux/fs/9p/vfs_dentry.c + * + * This file contians vfs dentry ops for the 9P2000 protocol. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/pagemap.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/inet.h> +#include <linux/namei.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "conv.h" +#include "fid.h" + +/** + * v9fs_dentry_validate - VFS dcache hook to validate cache + * @dentry: dentry that is being validated + * @nd: path data + * + * dcache really shouldn't be used for 9P2000 as at all due to + * potential attached semantics to directory traversal (walk). + * + * FUTURE: look into how to use dcache to allow multi-stage + * walks in Plan 9 & potential for better dcache operation which + * would remain valid for Plan 9 semantics. Older versions + * had validation via stat for those interested. However, since + * stat has the same approximate overhead as walk there really + * is no difference. The only improvement would be from a + * time-decay cache like NFS has and that undermines the + * synchronous nature of 9P2000. + * + */ + +static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *dc = current->fs->pwd; + + dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry); + if (v9fs_fid_lookup(dentry, FID_OP)) { + dprintk(DEBUG_VFS, "VALID\n"); + return 1; + } + + while (dc != NULL) { + if (dc == dentry) { + dprintk(DEBUG_VFS, "VALID\n"); + return 1; + } + if (dc == dc->d_parent) + break; + + dc = dc->d_parent; + } + + dprintk(DEBUG_VFS, "INVALID\n"); + return 0; +} + +/** + * v9fs_dentry_release - called when dentry is going to be freed + * @dentry: dentry that is being release + * + */ + +void v9fs_dentry_release(struct dentry *dentry) +{ + dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + + if (dentry->d_fsdata != NULL) { + struct list_head *fid_list = dentry->d_fsdata; + struct v9fs_fid *temp = NULL; + struct v9fs_fid *current_fid = NULL; + struct v9fs_fcall *fcall = NULL; + + list_for_each_entry_safe(current_fid, temp, fid_list, list) { + if (v9fs_t_clunk + (current_fid->v9ses, current_fid->fid, &fcall)) + dprintk(DEBUG_ERROR, "clunk failed: %s\n", + FCALL_ERROR(fcall)); + + v9fs_put_idpool(current_fid->fid, + ¤t_fid->v9ses->fidpool); + + kfree(fcall); + v9fs_fid_destroy(current_fid); + } + + kfree(dentry->d_fsdata); /* free the list_head */ + } +} + +struct dentry_operations v9fs_dentry_operations = { + .d_revalidate = v9fs_dentry_validate, + .d_release = v9fs_dentry_release, +}; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c new file mode 100644 index 00000000000..c478a738418 --- /dev/null +++ b/fs/9p/vfs_dir.c @@ -0,0 +1,226 @@ +/* + * linux/fs/9p/vfs_dir.c + * + * This file contains vfs directory ops for the 9P2000 protocol. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/inet.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "conv.h" +#include "fid.h" + +/** + * dt_type - return file type + * @mistat: mistat structure + * + */ + +static inline int dt_type(struct v9fs_stat *mistat) +{ + unsigned long perm = mistat->mode; + int rettype = DT_REG; + + if (perm & V9FS_DMDIR) + rettype = DT_DIR; + if (perm & V9FS_DMSYMLINK) + rettype = DT_LNK; + + return rettype; +} + +/** + * v9fs_dir_readdir - read a directory + * @filep: opened file structure + * @dirent: directory structure ??? + * @filldir: function to populate directory structure ??? + * + */ + +static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct v9fs_fcall *fcall = NULL; + struct inode *inode = filp->f_dentry->d_inode; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *file = filp->private_data; + unsigned int i, n; + int fid = -1; + int ret = 0; + struct v9fs_stat *mi = NULL; + int over = 0; + + dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name); + + fid = file->fid; + + mi = kmalloc(v9ses->maxdata, GFP_KERNEL); + if (!mi) + return -ENOMEM; + + if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) { + kfree(file->rdir_fcall); + file->rdir_fcall = NULL; + } + + if (file->rdir_fcall) { + n = file->rdir_fcall->params.rread.count; + i = file->rdir_fpos; + while (i < n) { + int s = v9fs_deserialize_stat(v9ses, + file->rdir_fcall->params.rread.data + i, + n - i, mi, v9ses->maxdata); + + if (s == 0) { + dprintk(DEBUG_ERROR, + "error while deserializing mistat\n"); + ret = -EIO; + goto FreeStructs; + } + + over = filldir(dirent, mi->name, strlen(mi->name), + filp->f_pos, v9fs_qid2ino(&mi->qid), + dt_type(mi)); + + if (over) { + file->rdir_fpos = i; + file->rdir_pos = filp->f_pos; + break; + } + + i += s; + filp->f_pos += s; + } + + if (!over) { + kfree(file->rdir_fcall); + file->rdir_fcall = NULL; + } + } + + while (!over) { + ret = v9fs_t_read(v9ses, fid, filp->f_pos, + v9ses->maxdata-V9FS_IOHDRSZ, &fcall); + if (ret < 0) { + dprintk(DEBUG_ERROR, "error while reading: %d: %p\n", + ret, fcall); + goto FreeStructs; + } else if (ret == 0) + break; + + n = ret; + i = 0; + while (i < n) { + int s = v9fs_deserialize_stat(v9ses, + fcall->params.rread.data + i, n - i, mi, + v9ses->maxdata); + + if (s == 0) { + dprintk(DEBUG_ERROR, + "error while deserializing mistat\n"); + return -EIO; + } + + over = filldir(dirent, mi->name, strlen(mi->name), + filp->f_pos, v9fs_qid2ino(&mi->qid), + dt_type(mi)); + + if (over) { + file->rdir_fcall = fcall; + file->rdir_fpos = i; + file->rdir_pos = filp->f_pos; + fcall = NULL; + break; + } + + i += s; + filp->f_pos += s; + } + + kfree(fcall); + } + + FreeStructs: + kfree(fcall); + kfree(mi); + return ret; +} + +/** + * v9fs_dir_release - close a directory + * @inode: inode of the directory + * @filp: file pointer to a directory + * + */ + +int v9fs_dir_release(struct inode *inode, struct file *filp) +{ + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *fid = filp->private_data; + int fidnum = -1; + + dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, + fid->fid); + fidnum = fid->fid; + + filemap_fdatawrite(inode->i_mapping); + filemap_fdatawait(inode->i_mapping); + + if (fidnum >= 0) { + fid->fidopen--; + dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen, + fid->fid); + + if (fid->fidopen == 0) { + if (v9fs_t_clunk(v9ses, fidnum, NULL)) + dprintk(DEBUG_ERROR, "clunk failed\n"); + + v9fs_put_idpool(fid->fid, &v9ses->fidpool); + } + + kfree(fid->rdir_fcall); + + filp->private_data = NULL; + v9fs_fid_destroy(fid); + } + + d_drop(filp->f_dentry); + return 0; +} + +struct file_operations v9fs_dir_operations = { + .read = generic_read_dir, + .readdir = v9fs_dir_readdir, + .open = v9fs_file_open, + .release = v9fs_dir_release, +}; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c new file mode 100644 index 00000000000..1f8ae7d580a --- /dev/null +++ b/fs/9p/vfs_file.c @@ -0,0 +1,401 @@ +/* + * linux/fs/9p/vfs_file.c + * + * This file contians vfs file ops for 9P2000. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/inet.h> +#include <linux/version.h> +#include <linux/list.h> +#include <asm/uaccess.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "fid.h" + +/** + * v9fs_file_open - open a file (or directory) + * @inode: inode to be opened + * @file: file being opened + * + */ + +int v9fs_file_open(struct inode *inode, struct file *file) +{ + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK); + struct v9fs_fid *v9newfid = NULL; + struct v9fs_fcall *fcall = NULL; + int open_mode = 0; + unsigned int iounit = 0; + int newfid = -1; + long result = -1; + + dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file, + v9fid); + + if (!v9fid) { + struct dentry *dentry = file->f_dentry; + dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n"); + + /* XXX - some duplication from lookup, generalize later */ + /* basically vfs_lookup is too heavy weight */ + v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP); + if (!v9fid) + return -EBADF; + + v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); + if (!v9fid) + return -EBADF; + + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "newfid fails!\n"); + return -ENOSPC; + } + + result = + v9fs_t_walk(v9ses, v9fid->fid, newfid, + (char *)file->f_dentry->d_name.name, NULL); + if (result < 0) { + v9fs_put_idpool(newfid, &v9ses->fidpool); + dprintk(DEBUG_ERROR, "rewalk didn't work\n"); + return -EBADF; + } + + v9fid = v9fs_fid_create(dentry); + if (v9fid == NULL) { + dprintk(DEBUG_ERROR, "couldn't insert\n"); + return -ENOMEM; + } + v9fid->fid = newfid; + } + + if (v9fid->fidcreate) { + /* create case */ + newfid = v9fid->fid; + iounit = v9fid->iounit; + v9fid->fidcreate = 0; + } else { + if (!S_ISDIR(inode->i_mode)) + newfid = v9fid->fid; + else { + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "allocation failed\n"); + return -ENOSPC; + } + /* This would be a somewhat critical clone */ + result = + v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL, + &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, "clone error: %s\n", + FCALL_ERROR(fcall)); + kfree(fcall); + return result; + } + + v9newfid = v9fs_fid_create(file->f_dentry); + v9newfid->fid = newfid; + v9newfid->qid = v9fid->qid; + v9newfid->iounit = v9fid->iounit; + v9newfid->fidopen = 0; + v9newfid->fidclunked = 0; + v9newfid->v9ses = v9ses; + v9fid = v9newfid; + kfree(fcall); + } + + /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */ + /* translate open mode appropriately */ + open_mode = file->f_flags & 0x3; + + if (file->f_flags & O_EXCL) + open_mode |= V9FS_OEXCL; + + if (v9ses->extended) { + if (file->f_flags & O_TRUNC) + open_mode |= V9FS_OTRUNC; + + if (file->f_flags & O_APPEND) + open_mode |= V9FS_OAPPEND; + } + + result = v9fs_t_open(v9ses, newfid, open_mode, &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, + "open failed, open_mode 0x%x: %s\n", open_mode, + FCALL_ERROR(fcall)); + kfree(fcall); + return result; + } + + iounit = fcall->params.ropen.iounit; + kfree(fcall); + } + + + file->private_data = v9fid; + + v9fid->rdir_pos = 0; + v9fid->rdir_fcall = NULL; + v9fid->fidopen = 1; + v9fid->filp = file; + v9fid->iounit = iounit; + + return 0; +} + +/** + * v9fs_file_lock - lock a file (or directory) + * @inode: inode to be opened + * @file: file being opened + * + * XXX - this looks like a local only lock, we should extend into 9P + * by using open exclusive + */ + +static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + int res = 0; + struct inode *inode = filp->f_dentry->d_inode; + + dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); + + /* No mandatory locks */ + if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { + filemap_fdatawrite(inode->i_mapping); + filemap_fdatawait(inode->i_mapping); + invalidate_inode_pages(&inode->i_data); + } + + return res; +} + +/** + * v9fs_read - read from a file (internal) + * @filep: file pointer to read + * @data: data buffer to read data into + * @count: size of buffer + * @offset: offset at which to read data + * + */ + +static ssize_t +v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *v9f = filp->private_data; + struct v9fs_fcall *fcall = NULL; + int fid = v9f->fid; + int rsize = 0; + int result = 0; + int total = 0; + + dprintk(DEBUG_VFS, "\n"); + + rsize = v9ses->maxdata - V9FS_IOHDRSZ; + if (v9f->iounit != 0 && rsize > v9f->iounit) + rsize = v9f->iounit; + + do { + if (count < rsize) + rsize = count; + + result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall); + + if (result < 0) { + printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n", + result); + + kfree(fcall); + return total; + } else + *offset += result; + + /* XXX - extra copy */ + memcpy(buffer, fcall->params.rread.data, result); + count -= result; + buffer += result; + total += result; + + kfree(fcall); + + if (result < rsize) + break; + } while (count); + + return total; +} + +/** + * v9fs_file_read - read from a file + * @filep: file pointer to read + * @data: data buffer to read data into + * @count: size of buffer + * @offset: offset at which to read data + * + */ + +static ssize_t +v9fs_file_read(struct file *filp, char __user * data, size_t count, + loff_t * offset) +{ + int retval = -1; + int ret = 0; + char *buffer; + + buffer = kmalloc(count, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + retval = v9fs_read(filp, buffer, count, offset); + if (retval > 0) { + if ((ret = copy_to_user(data, buffer, retval)) != 0) { + dprintk(DEBUG_ERROR, "Problem copying to user %d\n", + ret); + retval = ret; + } + } + + kfree(buffer); + + return retval; +} + +/** + * v9fs_write - write to a file + * @filep: file pointer to write + * @data: data buffer to write data from + * @count: size of buffer + * @offset: offset at which to write data + * + */ + +static ssize_t +v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + struct v9fs_fid *v9fid = filp->private_data; + struct v9fs_fcall *fcall; + int fid = v9fid->fid; + int result = -EIO; + int rsize = 0; + int total = 0; + + dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count, + (int)*offset); + rsize = v9ses->maxdata - V9FS_IOHDRSZ; + if (v9fid->iounit != 0 && rsize > v9fid->iounit) + rsize = v9fid->iounit; + + dump_data(buffer, count); + + do { + if (count < rsize) + rsize = count; + + result = + v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall); + if (result < 0) { + eprintk(KERN_ERR, "error while writing: %s(%d)\n", + FCALL_ERROR(fcall), result); + kfree(fcall); + return result; + } else + *offset += result; + + kfree(fcall); + + if (result != rsize) { + eprintk(KERN_ERR, + "short write: v9fs_t_write returned %d\n", + result); + break; + } + + count -= result; + buffer += result; + total += result; + } while (count); + + return total; +} + +/** + * v9fs_file_write - write to a file + * @filep: file pointer to write + * @data: data buffer to write data from + * @count: size of buffer + * @offset: offset at which to write data + * + */ + +static ssize_t +v9fs_file_write(struct file *filp, const char __user * data, + size_t count, loff_t * offset) +{ + int ret = -1; + char *buffer; + + buffer = kmalloc(count, GFP_KERNEL); + if (buffer == NULL) + return -ENOMEM; + + ret = copy_from_user(buffer, data, count); + if (ret) { + dprintk(DEBUG_ERROR, "Problem copying from user\n"); + ret = -EFAULT; + } else { + ret = v9fs_write(filp, buffer, count, offset); + } + + kfree(buffer); + + return ret; +} + +struct file_operations v9fs_file_operations = { + .llseek = generic_file_llseek, + .read = v9fs_file_read, + .write = v9fs_file_write, + .open = v9fs_file_open, + .release = v9fs_dir_release, + .lock = v9fs_file_lock, +}; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c new file mode 100644 index 00000000000..0c13fc60004 --- /dev/null +++ b/fs/9p/vfs_inode.c @@ -0,0 +1,1338 @@ +/* + * linux/fs/9p/vfs_inode.c + * + * This file contains vfs inode ops for the 9P2000 protocol. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/pagemap.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/inet.h> +#include <linux/namei.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "conv.h" +#include "fid.h" + +static struct inode_operations v9fs_dir_inode_operations; +static struct inode_operations v9fs_dir_inode_operations_ext; +static struct inode_operations v9fs_file_inode_operations; +static struct inode_operations v9fs_symlink_inode_operations; + +/** + * unixmode2p9mode - convert unix mode bits to plan 9 + * @v9ses: v9fs session information + * @mode: mode to convert + * + */ + +static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) +{ + int res; + res = mode & 0777; + if (S_ISDIR(mode)) + res |= V9FS_DMDIR; + if (v9ses->extended) { + if (S_ISLNK(mode)) + res |= V9FS_DMSYMLINK; + if (v9ses->nodev == 0) { + if (S_ISSOCK(mode)) + res |= V9FS_DMSOCKET; + if (S_ISFIFO(mode)) + res |= V9FS_DMNAMEDPIPE; + if (S_ISBLK(mode)) + res |= V9FS_DMDEVICE; + if (S_ISCHR(mode)) + res |= V9FS_DMDEVICE; + } + + if ((mode & S_ISUID) == S_ISUID) + res |= V9FS_DMSETUID; + if ((mode & S_ISGID) == S_ISGID) + res |= V9FS_DMSETGID; + if ((mode & V9FS_DMLINK)) + res |= V9FS_DMLINK; + } + + return res; +} + +/** + * p9mode2unixmode- convert plan9 mode bits to unix mode bits + * @v9ses: v9fs session information + * @mode: mode to convert + * + */ + +static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) +{ + int res; + + res = mode & 0777; + + if ((mode & V9FS_DMDIR) == V9FS_DMDIR) + res |= S_IFDIR; + else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended)) + res |= S_IFLNK; + else if ((mode & V9FS_DMSOCKET) && (v9ses->extended) + && (v9ses->nodev == 0)) + res |= S_IFSOCK; + else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended) + && (v9ses->nodev == 0)) + res |= S_IFIFO; + else if ((mode & V9FS_DMDEVICE) && (v9ses->extended) + && (v9ses->nodev == 0)) + res |= S_IFBLK; + else + res |= S_IFREG; + + if (v9ses->extended) { + if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID) + res |= S_ISUID; + + if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID) + res |= S_ISGID; + } + + return res; +} + +/** + * v9fs_blank_mistat - helper function to setup a 9P stat structure + * @v9ses: 9P session info (for determining extended mode) + * @mistat: structure to initialize + * + */ + +static void +v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat) +{ + mistat->type = ~0; + mistat->dev = ~0; + mistat->qid.type = ~0; + mistat->qid.version = ~0; + *((long long *)&mistat->qid.path) = ~0; + mistat->mode = ~0; + mistat->atime = ~0; + mistat->mtime = ~0; + mistat->length = ~0; + mistat->name = mistat->data; + mistat->uid = mistat->data; + mistat->gid = mistat->data; + mistat->muid = mistat->data; + if (v9ses->extended) { + mistat->n_uid = ~0; + mistat->n_gid = ~0; + mistat->n_muid = ~0; + mistat->extension = mistat->data; + } + *mistat->data = 0; +} + +/** + * v9fs_mistat2unix - convert mistat to unix stat + * @mistat: Plan 9 metadata (mistat) structure + * @buf: unix metadata (stat) structure to populate + * @sb: superblock + * + */ + +static void +v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf, + struct super_block *sb) +{ + struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL; + + buf->st_nlink = 1; + + buf->st_atime = mistat->atime; + buf->st_mtime = mistat->mtime; + buf->st_ctime = mistat->mtime; + + buf->st_uid = (unsigned short)-1; + buf->st_gid = (unsigned short)-1; + + if (v9ses && v9ses->extended) { + /* TODO: string to uid mapping via user-space daemon */ + if (mistat->n_uid != -1) + sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid); + + if (mistat->n_gid != -1) + sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid); + } + + if (buf->st_uid == (unsigned short)-1) + buf->st_uid = v9ses->uid; + if (buf->st_gid == (unsigned short)-1) + buf->st_gid = v9ses->gid; + + buf->st_mode = p9mode2unixmode(v9ses, mistat->mode); + if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) { + char type = 0; + int major = -1; + int minor = -1; + sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); + switch (type) { + case 'c': + buf->st_mode &= ~S_IFBLK; + buf->st_mode |= S_IFCHR; + break; + case 'b': + break; + default: + dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", + type, mistat->extension); + }; + buf->st_rdev = MKDEV(major, minor); + } else + buf->st_rdev = 0; + + buf->st_size = mistat->length; + + buf->st_blksize = sb->s_blocksize; + buf->st_blocks = + (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits; +} + +/** + * v9fs_get_inode - helper function to setup an inode + * @sb: superblock + * @mode: mode to setup inode with + * + */ + +struct inode *v9fs_get_inode(struct super_block *sb, int mode) +{ + struct inode *inode = NULL; + struct v9fs_session_info *v9ses = sb->s_fs_info; + + dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); + + inode = new_inode(sb); + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = sb->s_blocksize; + inode->i_blocks = 0; + inode->i_rdev = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFBLK: + case S_IFCHR: + case S_IFSOCK: + if(!v9ses->extended) { + dprintk(DEBUG_ERROR, "special files without extended mode\n"); + return ERR_PTR(-EINVAL); + } + init_special_inode(inode, inode->i_mode, + inode->i_rdev); + break; + case S_IFREG: + inode->i_op = &v9fs_file_inode_operations; + inode->i_fop = &v9fs_file_operations; + break; + case S_IFLNK: + if(!v9ses->extended) { + dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n"); + return ERR_PTR(-EINVAL); + } + inode->i_op = &v9fs_symlink_inode_operations; + break; + case S_IFDIR: + inode->i_nlink++; + if(v9ses->extended) + inode->i_op = &v9fs_dir_inode_operations_ext; + else + inode->i_op = &v9fs_dir_inode_operations; + inode->i_fop = &v9fs_dir_operations; + break; + default: + dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", + mode, mode & S_IFMT); + return ERR_PTR(-EINVAL); + } + } else { + eprintk(KERN_WARNING, "Problem allocating inode\n"); + return ERR_PTR(-ENOMEM); + } + return inode; +} + +/** + * v9fs_create - helper function to create files and directories + * @dir: directory inode file is being created in + * @file_dentry: dentry file is being created in + * @perm: permissions file is being created with + * @open_mode: resulting open mode for file + * + */ + +static int +v9fs_create(struct inode *dir, + struct dentry *file_dentry, + unsigned int perm, unsigned int open_mode) +{ + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + struct super_block *sb = dir->i_sb; + struct v9fs_fid *dirfid = + v9fs_fid_lookup(file_dentry->d_parent, FID_WALK); + struct v9fs_fid *fid = NULL; + struct inode *file_inode = NULL; + struct v9fs_fcall *fcall = NULL; + struct v9fs_qid qid; + struct stat newstat; + int dirfidnum = -1; + long newfid = -1; + int result = 0; + unsigned int iounit = 0; + + perm = unixmode2p9mode(v9ses, perm); + + dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir, + file_dentry, perm, open_mode); + + if (!dirfid) + return -EBADF; + + dirfidnum = dirfid->fid; + if (dirfidnum < 0) { + dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n", + dir->i_ino); + return -EBADF; + } + + if (file_dentry->d_inode) { + dprintk(DEBUG_ERROR, + "Odd. There is an inode for dir %lu, name :%s:\n", + dir->i_ino, file_dentry->d_name.name); + return -EEXIST; + } + + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "no free fids available\n"); + return -ENOSPC; + } + + result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall)); + v9fs_put_idpool(newfid, &v9ses->fidpool); + newfid = 0; + goto CleanUpFid; + } + + kfree(fcall); + + result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name, + perm, open_mode, &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, "create fails: %s(%d)\n", + FCALL_ERROR(fcall), result); + + goto CleanUpFid; + } + + iounit = fcall->params.rcreate.iounit; + qid = fcall->params.rcreate.qid; + kfree(fcall); + + fid = v9fs_fid_create(file_dentry); + if (!fid) { + result = -ENOMEM; + goto CleanUpFid; + } + + fid->fid = newfid; + fid->fidopen = 0; + fid->fidcreate = 1; + fid->qid = qid; + fid->iounit = iounit; + fid->rdir_pos = 0; + fid->rdir_fcall = NULL; + fid->v9ses = v9ses; + + if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) || + (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) || + (perm & V9FS_DMDEVICE)) + return 0; + + result = v9fs_t_stat(v9ses, newfid, &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall), + result); + goto CleanUpFid; + } + + v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); + + file_inode = v9fs_get_inode(sb, newstat.st_mode); + if ((!file_inode) || IS_ERR(file_inode)) { + dprintk(DEBUG_ERROR, "create inode failed\n"); + result = -EBADF; + goto CleanUpFid; + } + + v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb); + kfree(fcall); + d_instantiate(file_dentry, file_inode); + + if (perm & V9FS_DMDIR) { + if (v9fs_t_clunk(v9ses, newfid, &fcall)) + dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n", + FCALL_ERROR(fcall)); + + v9fs_put_idpool(newfid, &v9ses->fidpool); + kfree(fcall); + fid->fidopen = 0; + fid->fidcreate = 0; + d_drop(file_dentry); + } + + return 0; + + CleanUpFid: + kfree(fcall); + + if (newfid) { + if (v9fs_t_clunk(v9ses, newfid, &fcall)) + dprintk(DEBUG_ERROR, "clunk failed: %s\n", + FCALL_ERROR(fcall)); + + v9fs_put_idpool(newfid, &v9ses->fidpool); + kfree(fcall); + } + return result; +} + +/** + * v9fs_remove - helper function to remove files and directories + * @dir: directory inode that is being deleted + * @file: dentry that is being deleted + * @rmdir: removing a directory + * + */ + +static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) +{ + struct v9fs_fcall *fcall = NULL; + struct super_block *sb = NULL; + struct v9fs_session_info *v9ses = NULL; + struct v9fs_fid *v9fid = NULL; + struct inode *file_inode = NULL; + int fid = -1; + int result = 0; + + dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, + rmdir); + + file_inode = file->d_inode; + sb = file_inode->i_sb; + v9ses = v9fs_inode2v9ses(file_inode); + v9fid = v9fs_fid_lookup(file, FID_OP); + + if (!v9fid) { + dprintk(DEBUG_ERROR, + "no v9fs_fid\n"); + return -EBADF; + } + + fid = v9fid->fid; + if (fid < 0) { + dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n", + file_inode->i_ino); + return -EBADF; + } + + result = v9fs_t_remove(v9ses, fid, &fcall); + if (result < 0) + dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n", + FCALL_ERROR(fcall), result); + else { + v9fs_put_idpool(fid, &v9ses->fidpool); + v9fs_fid_destroy(v9fid); + } + + kfree(fcall); + return result; +} + +/** + * v9fs_vfs_create - VFS hook to create files + * @inode: directory inode that is being deleted + * @dentry: dentry that is being deleted + * @perm: create permissions + * @nd: path information + * + */ + +static int +v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm, + struct nameidata *nd) +{ + return v9fs_create(inode, dentry, perm, O_RDWR); +} + +/** + * v9fs_vfs_mkdir - VFS mkdir hook to create a directory + * @inode: inode that is being unlinked + * @dentry: dentry that is being unlinked + * @mode: mode for new directory + * + */ + +static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode) +{ + return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY); +} + +/** + * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode + * @dir: inode that is being walked from + * @dentry: dentry that is being walked to? + * @nameidata: path data + * + */ + +static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nameidata) +{ + struct super_block *sb; + struct v9fs_session_info *v9ses; + struct v9fs_fid *dirfid; + struct v9fs_fid *fid; + struct inode *inode; + struct v9fs_fcall *fcall = NULL; + struct stat newstat; + int dirfidnum = -1; + int newfid = -1; + int result = 0; + + dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", + dir, dentry->d_iname, dentry, nameidata); + + sb = dir->i_sb; + v9ses = v9fs_inode2v9ses(dir); + dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK); + + if (!dirfid) { + dprintk(DEBUG_ERROR, "no dirfid\n"); + return ERR_PTR(-EINVAL); + } + + dirfidnum = dirfid->fid; + + if (dirfidnum < 0) { + dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n", + dir, dir->i_ino); + return ERR_PTR(-EBADF); + } + + newfid = v9fs_get_idpool(&v9ses->fidpool); + if (newfid < 0) { + eprintk(KERN_WARNING, "newfid fails!\n"); + return ERR_PTR(-ENOSPC); + } + + result = + v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name, + NULL); + if (result < 0) { + v9fs_put_idpool(newfid, &v9ses->fidpool); + if (result == -ENOENT) { + d_add(dentry, NULL); + dprintk(DEBUG_ERROR, + "Return negative dentry %p count %d\n", + dentry, atomic_read(&dentry->d_count)); + return NULL; + } + dprintk(DEBUG_ERROR, "walk error:%d\n", result); + goto FreeFcall; + } + + result = v9fs_t_stat(v9ses, newfid, &fcall); + if (result < 0) { + dprintk(DEBUG_ERROR, "stat error\n"); + goto FreeFcall; + } + + v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb); + inode = v9fs_get_inode(sb, newstat.st_mode); + + if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) { + eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n", + PTR_ERR(inode)); + + result = -ENOSPC; + goto FreeFcall; + } + + inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid); + + fid = v9fs_fid_create(dentry); + if (fid == NULL) { + dprintk(DEBUG_ERROR, "couldn't insert\n"); + result = -ENOMEM; + goto FreeFcall; + } + + fid->fid = newfid; + fid->fidopen = 0; + fid->v9ses = v9ses; + fid->qid = fcall->params.rstat.stat->qid; + + dentry->d_op = &v9fs_dentry_operations; + v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb); + + d_add(dentry, inode); + kfree(fcall); + + return NULL; + + FreeFcall: + kfree(fcall); + return ERR_PTR(result); +} + +/** + * v9fs_vfs_unlink - VFS unlink hook to delete an inode + * @i: inode that is being unlinked + * @d: dentry that is being unlinked + * + */ + +static int v9fs_vfs_unlink(struct inode *i, struct dentry *d) +{ + return v9fs_remove(i, d, 0); +} + +/** + * v9fs_vfs_rmdir - VFS unlink hook to delete a directory + * @i: inode that is being unlinked + * @d: dentry that is being unlinked + * + */ + +static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) +{ + return v9fs_remove(i, d, 1); +} + +/** + * v9fs_vfs_rename - VFS hook to rename an inode + * @old_dir: old dir inode + * @old_dentry: old dentry + * @new_dir: new dir inode + * @new_dentry: new dentry + * + */ + +static int +v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode = old_dentry->d_inode; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode); + struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK); + struct v9fs_fid *olddirfid = + v9fs_fid_lookup(old_dentry->d_parent, FID_WALK); + struct v9fs_fid *newdirfid = + v9fs_fid_lookup(new_dentry->d_parent, FID_WALK); + struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_fcall *fcall = NULL; + int fid = -1; + int olddirfidnum = -1; + int newdirfidnum = -1; + int retval = 0; + + dprintk(DEBUG_VFS, "\n"); + + if (!mistat) + return -ENOMEM; + + if ((!oldfid) || (!olddirfid) || (!newdirfid)) { + dprintk(DEBUG_ERROR, "problem with arguments\n"); + return -EBADF; + } + + /* 9P can only handle file rename in the same directory */ + if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) { + dprintk(DEBUG_ERROR, "old dir and new dir are different\n"); + retval = -EPERM; + goto FreeFcallnBail; + } + + fid = oldfid->fid; + olddirfidnum = olddirfid->fid; + newdirfidnum = newdirfid->fid; + + if (fid < 0) { + dprintk(DEBUG_ERROR, "no fid for old file #%lu\n", + old_inode->i_ino); + retval = -EBADF; + goto FreeFcallnBail; + } + + v9fs_blank_mistat(v9ses, mistat); + + strcpy(mistat->data + 1, v9ses->name); + mistat->name = mistat->data + 1 + strlen(v9ses->name); + + if (new_dentry->d_name.len > + (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) { + dprintk(DEBUG_ERROR, "new name too long\n"); + goto FreeFcallnBail; + } + + strcpy(mistat->name, new_dentry->d_name.name); + retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall); + + FreeFcallnBail: + kfree(mistat); + + if (retval < 0) + dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", + FCALL_ERROR(fcall)); + + kfree(fcall); + return retval; +} + +/** + * v9fs_vfs_getattr - retreive file metadata + * @mnt - mount information + * @dentry - file to get attributes on + * @stat - metadata structure to populate + * + */ + +static int +v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct v9fs_fcall *fcall = NULL; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + int err = -EPERM; + + dprintk(DEBUG_VFS, "dentry: %p\n", dentry); + if (!fid) { + dprintk(DEBUG_ERROR, + "couldn't find fid associated with dentry\n"); + return -EBADF; + } + + err = v9fs_t_stat(v9ses, fid->fid, &fcall); + + if (err < 0) + dprintk(DEBUG_ERROR, "stat error\n"); + else { + v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode, + dentry->d_inode->i_sb); + generic_fillattr(dentry->d_inode, stat); + } + + kfree(fcall); + return err; +} + +/** + * v9fs_vfs_setattr - set file metadata + * @dentry: file whose metadata to set + * @iattr: metadata assignment structure + * + */ + +static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + struct v9fs_fcall *fcall = NULL; + struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + int res = -EPERM; + + dprintk(DEBUG_VFS, "\n"); + + if (!mistat) + return -ENOMEM; + + if (!fid) { + dprintk(DEBUG_ERROR, + "Couldn't find fid associated with dentry\n"); + return -EBADF; + } + + v9fs_blank_mistat(v9ses, mistat); + if (iattr->ia_valid & ATTR_MODE) + mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode); + + if (iattr->ia_valid & ATTR_MTIME) + mistat->mtime = iattr->ia_mtime.tv_sec; + + if (iattr->ia_valid & ATTR_ATIME) + mistat->atime = iattr->ia_atime.tv_sec; + + if (iattr->ia_valid & ATTR_SIZE) + mistat->length = iattr->ia_size; + + if (v9ses->extended) { + char *ptr = mistat->data+1; + + if (iattr->ia_valid & ATTR_UID) { + mistat->uid = ptr; + ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid); + mistat->n_uid = iattr->ia_uid; + } + + if (iattr->ia_valid & ATTR_GID) { + mistat->gid = ptr; + ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid); + mistat->n_gid = iattr->ia_gid; + } + } + + res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall); + + if (res < 0) + dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall)); + + kfree(mistat); + kfree(fcall); + + if (res >= 0) + res = inode_setattr(dentry->d_inode, iattr); + + return res; +} + +/** + * v9fs_mistat2inode - populate an inode structure with mistat info + * @mistat: Plan 9 metadata (mistat) structure + * @inode: inode to populate + * @sb: superblock of filesystem + * + */ + +void +v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode, + struct super_block *sb) +{ + struct v9fs_session_info *v9ses = sb->s_fs_info; + + inode->i_nlink = 1; + + inode->i_atime.tv_sec = mistat->atime; + inode->i_mtime.tv_sec = mistat->mtime; + inode->i_ctime.tv_sec = mistat->mtime; + + inode->i_uid = -1; + inode->i_gid = -1; + + if (v9ses->extended) { + /* TODO: string to uid mapping via user-space daemon */ + inode->i_uid = mistat->n_uid; + inode->i_gid = mistat->n_gid; + + if (mistat->n_uid == -1) + sscanf(mistat->uid, "%x", &inode->i_uid); + + if (mistat->n_gid == -1) + sscanf(mistat->gid, "%x", &inode->i_gid); + } + + if (inode->i_uid == -1) + inode->i_uid = v9ses->uid; + if (inode->i_gid == -1) + inode->i_gid = v9ses->gid; + + inode->i_mode = p9mode2unixmode(v9ses, mistat->mode); + if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { + char type = 0; + int major = -1; + int minor = -1; + sscanf(mistat->extension, "%c %u %u", &type, &major, &minor); + switch (type) { + case 'c': + inode->i_mode &= ~S_IFBLK; + inode->i_mode |= S_IFCHR; + break; + case 'b': + break; + default: + dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n", + type, mistat->extension); + }; + inode->i_rdev = MKDEV(major, minor); + } else + inode->i_rdev = 0; + + inode->i_size = mistat->length; + + inode->i_blksize = sb->s_blocksize; + inode->i_blocks = + (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits; +} + +/** + * v9fs_qid2ino - convert qid into inode number + * @qid: qid to hash + * + * BUG: potential for inode number collisions? + */ + +ino_t v9fs_qid2ino(struct v9fs_qid *qid) +{ + u64 path = qid->path + 2; + ino_t i = 0; + + if (sizeof(ino_t) == sizeof(path)) + memcpy(&i, &path, sizeof(ino_t)); + else + i = (ino_t) (path ^ (path >> 32)); + + return i; +} + +/** + * v9fs_vfs_symlink - helper function to create symlinks + * @dir: directory inode containing symlink + * @dentry: dentry for symlink + * @symname: symlink data + * + * See 9P2000.u RFC for more information + * + */ + +static int +v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + int retval = -EPERM; + struct v9fs_fid *newfid; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + struct v9fs_fcall *fcall = NULL; + struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + + dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + symname); + + if (!mistat) + return -ENOMEM; + + if (!v9ses->extended) { + dprintk(DEBUG_ERROR, "not extended\n"); + goto FreeFcall; + } + + /* issue a create */ + retval = v9fs_create(dir, dentry, S_IFLNK, 0); + if (retval != 0) + goto FreeFcall; + + newfid = v9fs_fid_lookup(dentry, FID_OP); + + /* issue a twstat */ + v9fs_blank_mistat(v9ses, mistat); + strcpy(mistat->data + 1, symname); + mistat->extension = mistat->data + 1; + retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); + if (retval < 0) { + dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", + FCALL_ERROR(fcall)); + goto FreeFcall; + } + + kfree(fcall); + + if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { + dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", + FCALL_ERROR(fcall)); + goto FreeFcall; + } + + d_drop(dentry); /* FID - will this also clunk? */ + + FreeFcall: + kfree(mistat); + kfree(fcall); + + return retval; +} + +/** + * v9fs_readlink - read a symlink's location (internal version) + * @dentry: dentry for symlink + * @buffer: buffer to load symlink location into + * @buflen: length of buffer + * + */ + +static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + int retval = -EPERM; + + struct v9fs_fcall *fcall = NULL; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode); + struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP); + + if (!fid) { + dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n"); + retval = -EBADF; + goto FreeFcall; + } + + if (!v9ses->extended) { + retval = -EBADF; + dprintk(DEBUG_ERROR, "not extended\n"); + goto FreeFcall; + } + + dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name); + retval = v9fs_t_stat(v9ses, fid->fid, &fcall); + + if (retval < 0) { + dprintk(DEBUG_ERROR, "stat error\n"); + goto FreeFcall; + } + + if (!fcall) + return -EIO; + + if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) { + retval = -EINVAL; + goto FreeFcall; + } + + /* copy extension buffer into buffer */ + if (strlen(fcall->params.rstat.stat->extension) < buflen) + buflen = strlen(fcall->params.rstat.stat->extension); + + memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1); + + retval = buflen; + + FreeFcall: + kfree(fcall); + + return retval; +} + +/** + * v9fs_vfs_readlink - read a symlink's location + * @dentry: dentry for symlink + * @buf: buffer to load symlink location into + * @buflen: length of buffer + * + */ + +static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, + int buflen) +{ + int retval; + int ret; + char *link = __getname(); + + if (strlen(link) < buflen) + buflen = strlen(link); + + dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); + + retval = v9fs_readlink(dentry, link, buflen); + + if (retval > 0) { + if ((ret = copy_to_user(buffer, link, retval)) != 0) { + dprintk(DEBUG_ERROR, "problem copying to user: %d\n", + ret); + retval = ret; + } + } + + putname(link); + return retval; +} + +/** + * v9fs_vfs_follow_link - follow a symlink path + * @dentry: dentry for symlink + * @nd: nameidata + * + */ + +static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int len = 0; + char *link = __getname(); + + dprintk(DEBUG_VFS, "%s n", dentry->d_name.name); + + if (!link) + link = ERR_PTR(-ENOMEM); + else { + len = v9fs_readlink(dentry, link, strlen(link)); + + if (len < 0) { + putname(link); + link = ERR_PTR(len); + } else + link[len] = 0; + } + nd_set_link(nd, link); + + return NULL; +} + +/** + * v9fs_vfs_put_link - release a symlink path + * @dentry: dentry for symlink + * @nd: nameidata + * + */ + +static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +{ + char *s = nd_get_link(nd); + + dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s); + if (!IS_ERR(s)) + putname(s); +} + +/** + * v9fs_vfs_link - create a hardlink + * @old_dentry: dentry for file to link to + * @dir: inode destination for new link + * @dentry: dentry for link + * + */ + +/* XXX - lots of code dup'd from symlink and creates, + * figure out a better reuse strategy + */ + +static int +v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + int retval = -EPERM; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + struct v9fs_fcall *fcall = NULL; + struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP); + struct v9fs_fid *newfid = NULL; + char *symname = __getname(); + + dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, + old_dentry->d_name.name); + + if (!v9ses->extended) { + dprintk(DEBUG_ERROR, "not extended\n"); + goto FreeMem; + } + + /* get fid of old_dentry */ + sprintf(symname, "hardlink(%d)\n", oldfid->fid); + + /* issue a create */ + retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0); + if (retval != 0) + goto FreeMem; + + newfid = v9fs_fid_lookup(dentry, FID_OP); + if (!newfid) { + dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n"); + goto FreeMem; + } + + /* issue a twstat */ + v9fs_blank_mistat(v9ses, mistat); + strcpy(mistat->data + 1, symname); + mistat->extension = mistat->data + 1; + retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); + if (retval < 0) { + dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", + FCALL_ERROR(fcall)); + goto FreeMem; + } + + kfree(fcall); + + if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { + dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", + FCALL_ERROR(fcall)); + goto FreeMem; + } + + d_drop(dentry); /* FID - will this also clunk? */ + + kfree(fcall); + fcall = NULL; + + FreeMem: + kfree(mistat); + kfree(fcall); + putname(symname); + return retval; +} + +/** + * v9fs_vfs_mknod - create a special file + * @dir: inode destination for new link + * @dentry: dentry for file + * @mode: mode for creation + * @dev_t: device associated with special file + * + */ + +static int +v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +{ + int retval = -EPERM; + struct v9fs_fid *newfid; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + struct v9fs_fcall *fcall = NULL; + struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL); + char *symname = __getname(); + + dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, + dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); + + if (!mistat) + return -ENOMEM; + + if (!new_valid_dev(rdev)) { + retval = -EINVAL; + goto FreeMem; + } + + if (!v9ses->extended) { + dprintk(DEBUG_ERROR, "not extended\n"); + goto FreeMem; + } + + /* issue a create */ + retval = v9fs_create(dir, dentry, mode, 0); + + if (retval != 0) + goto FreeMem; + + newfid = v9fs_fid_lookup(dentry, FID_OP); + if (!newfid) { + dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n"); + retval = -EINVAL; + goto FreeMem; + } + + /* build extension */ + if (S_ISBLK(mode)) + sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev)); + else if (S_ISCHR(mode)) + sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev)); + else if (S_ISFIFO(mode)) + ; /* DO NOTHING */ + else { + retval = -EINVAL; + goto FreeMem; + } + + if (!S_ISFIFO(mode)) { + /* issue a twstat */ + v9fs_blank_mistat(v9ses, mistat); + strcpy(mistat->data + 1, symname); + mistat->extension = mistat->data + 1; + retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall); + if (retval < 0) { + dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n", + FCALL_ERROR(fcall)); + goto FreeMem; + } + } + + /* need to update dcache so we show up */ + kfree(fcall); + + if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) { + dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n", + FCALL_ERROR(fcall)); + goto FreeMem; + } + + d_drop(dentry); /* FID - will this also clunk? */ + + FreeMem: + kfree(mistat); + kfree(fcall); + putname(symname); + + return retval; +} + +static struct inode_operations v9fs_dir_inode_operations_ext = { + .create = v9fs_vfs_create, + .lookup = v9fs_vfs_lookup, + .symlink = v9fs_vfs_symlink, + .link = v9fs_vfs_link, + .unlink = v9fs_vfs_unlink, + .mkdir = v9fs_vfs_mkdir, + .rmdir = v9fs_vfs_rmdir, + .mknod = v9fs_vfs_mknod, + .rename = v9fs_vfs_rename, + .readlink = v9fs_vfs_readlink, + .getattr = v9fs_vfs_getattr, + .setattr = v9fs_vfs_setattr, +}; + +static struct inode_operations v9fs_dir_inode_operations = { + .create = v9fs_vfs_create, + .lookup = v9fs_vfs_lookup, + .unlink = v9fs_vfs_unlink, + .mkdir = v9fs_vfs_mkdir, + .rmdir = v9fs_vfs_rmdir, + .mknod = v9fs_vfs_mknod, + .rename = v9fs_vfs_rename, + .getattr = v9fs_vfs_getattr, + .setattr = v9fs_vfs_setattr, +}; + +static struct inode_operations v9fs_file_inode_operations = { + .getattr = v9fs_vfs_getattr, + .setattr = v9fs_vfs_setattr, +}; + +static struct inode_operations v9fs_symlink_inode_operations = { + .readlink = v9fs_vfs_readlink, + .follow_link = v9fs_vfs_follow_link, + .put_link = v9fs_vfs_put_link, + .getattr = v9fs_vfs_getattr, + .setattr = v9fs_vfs_setattr, +}; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c new file mode 100644 index 00000000000..868f350b2c5 --- /dev/null +++ b/fs/9p/vfs_super.c @@ -0,0 +1,280 @@ +/* + * linux/fs/9p/vfs_super.c + * + * This file contians superblock ops for 9P2000. It is intended that + * you mount this file system on directories. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/inet.h> +#include <linux/pagemap.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/idr.h> + +#include "debug.h" +#include "v9fs.h" +#include "9p.h" +#include "v9fs_vfs.h" +#include "conv.h" +#include "fid.h" + +static void v9fs_clear_inode(struct inode *); +static struct super_operations v9fs_super_ops; + +/** + * v9fs_clear_inode - release an inode + * @inode: inode to release + * + */ + +static void v9fs_clear_inode(struct inode *inode) +{ + filemap_fdatawrite(inode->i_mapping); +} + +/** + * v9fs_set_super - set the superblock + * @s: super block + * @data: file system specific data + * + */ + +static int v9fs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + return set_anon_super(s, data); +} + +/** + * v9fs_fill_super - populate superblock with info + * @sb: superblock + * @v9ses: session information + * + */ + +static void +v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, + int flags) +{ + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize_bits = fls(v9ses->maxdata - 1); + sb->s_blocksize = 1 << sb->s_blocksize_bits; + sb->s_magic = V9FS_MAGIC; + sb->s_op = &v9fs_super_ops; + + sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | + MS_NODIRATIME | MS_NOATIME; +} + +/** + * v9fs_get_sb - mount a superblock + * @fs_type: file system type + * @flags: mount flags + * @dev_name: device name that was mounted + * @data: mount options + * + */ + +static struct super_block *v9fs_get_sb(struct file_system_type + *fs_type, int flags, + const char *dev_name, void *data) +{ + struct super_block *sb = NULL; + struct v9fs_fcall *fcall = NULL; + struct inode *inode = NULL; + struct dentry *root = NULL; + struct v9fs_session_info *v9ses = NULL; + struct v9fs_fid *root_fid = NULL; + int mode = S_IRWXUGO | S_ISVTX; + uid_t uid = current->fsuid; + gid_t gid = current->fsgid; + int stat_result = 0; + int newfid = 0; + int retval = 0; + + dprintk(DEBUG_VFS, " \n"); + + v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL); + if (!v9ses) + return ERR_PTR(-ENOMEM); + + if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { + dprintk(DEBUG_ERROR, "problem initiating session\n"); + retval = newfid; + goto free_session; + } + + sb = sget(fs_type, NULL, v9fs_set_super, v9ses); + + v9fs_fill_super(sb, v9ses, flags); + + inode = v9fs_get_inode(sb, S_IFDIR | mode); + if (IS_ERR(inode)) { + retval = PTR_ERR(inode); + goto put_back_sb; + } + + inode->i_uid = uid; + inode->i_gid = gid; + + root = d_alloc_root(inode); + + if (!root) { + retval = -ENOMEM; + goto release_inode; + } + + sb->s_root = root; + + /* Setup the Root Inode */ + root_fid = v9fs_fid_create(root); + if (root_fid == NULL) { + retval = -ENOMEM; + goto release_dentry; + } + + root_fid->fidopen = 0; + root_fid->v9ses = v9ses; + + stat_result = v9fs_t_stat(v9ses, newfid, &fcall); + if (stat_result < 0) { + dprintk(DEBUG_ERROR, "stat error\n"); + v9fs_t_clunk(v9ses, newfid, NULL); + v9fs_put_idpool(newfid, &v9ses->fidpool); + } else { + root_fid->fid = newfid; + root_fid->qid = fcall->params.rstat.stat->qid; + root->d_inode->i_ino = + v9fs_qid2ino(&fcall->params.rstat.stat->qid); + v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb); + } + + kfree(fcall); + + if (stat_result < 0) { + retval = stat_result; + goto release_dentry; + } + + return sb; + + release_dentry: + dput(sb->s_root); + + release_inode: + iput(inode); + + put_back_sb: + up_write(&sb->s_umount); + deactivate_super(sb); + v9fs_session_close(v9ses); + + free_session: + kfree(v9ses); + + return ERR_PTR(retval); +} + +/** + * v9fs_kill_super - Kill Superblock + * @s: superblock + * + */ + +static void v9fs_kill_super(struct super_block *s) +{ + struct v9fs_session_info *v9ses = s->s_fs_info; + + dprintk(DEBUG_VFS, " %p\n", s); + + v9fs_dentry_release(s->s_root); /* clunk root */ + + kill_anon_super(s); + + v9fs_session_close(v9ses); + kfree(v9ses); + dprintk(DEBUG_VFS, "exiting kill_super\n"); +} + +/** + * v9fs_show_options - Show mount options in /proc/mounts + * @m: seq_file to write to + * @mnt: mount descriptor + * + */ + +static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; + + if (v9ses->debug != 0) + seq_printf(m, ",debug=%u", v9ses->debug); + if (v9ses->port != V9FS_PORT) + seq_printf(m, ",port=%u", v9ses->port); + if (v9ses->maxdata != 9000) + seq_printf(m, ",msize=%u", v9ses->maxdata); + if (v9ses->afid != ~0) + seq_printf(m, ",afid=%u", v9ses->afid); + if (v9ses->proto == PROTO_UNIX) + seq_puts(m, ",proto=unix"); + if (v9ses->extended == 0) + seq_puts(m, ",noextend"); + if (v9ses->nodev == 1) + seq_puts(m, ",nodevmap"); + seq_printf(m, ",name=%s", v9ses->name); + seq_printf(m, ",aname=%s", v9ses->remotename); + seq_printf(m, ",uid=%u", v9ses->uid); + seq_printf(m, ",gid=%u", v9ses->gid); + return 0; +} + +static void +v9fs_umount_begin(struct super_block *sb) +{ + struct v9fs_session_info *v9ses = sb->s_fs_info; + + v9fs_session_cancel(v9ses); +} + +static struct super_operations v9fs_super_ops = { + .statfs = simple_statfs, + .clear_inode = v9fs_clear_inode, + .show_options = v9fs_show_options, + .umount_begin = v9fs_umount_begin, +}; + +struct file_system_type v9fs_fs_type = { + .name = "9P", + .get_sb = v9fs_get_sb, + .kill_sb = v9fs_kill_super, + .owner = THIS_MODULE, +}; diff --git a/fs/Kconfig b/fs/Kconfig index 5e817902cb3..068ccea2f18 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -462,6 +462,19 @@ config AUTOFS4_FS local network, you probably do not need an automounter, and can say N here. +config FUSE_FS + tristate "Filesystem in Userspace support" + help + With FUSE it is possible to implement a fully functional filesystem + in a userspace program. + + There's also companion library: libfuse. This library along with + utilities is available from the FUSE homepage: + <http://fuse.sourceforge.net/> + + If you want to develop a userspace FS, or if you want to use + a filesystem based on FUSE, answer Y or M. + menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -1703,6 +1716,17 @@ config AFS_FS config RXRPC tristate +config 9P_FS + tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + depends on INET && EXPERIMENTAL + help + If you say Y here, you will get experimental support for + Plan 9 resource sharing via the 9P2000 protocol. + + See <http://v9fs.sf.net> for more information. + + If unsure, say N. + endmenu menu "Partition Types" diff --git a/fs/Makefile b/fs/Makefile index 15158309dee..1972da18627 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -89,11 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/ obj-$(CONFIG_AUTOFS_FS) += autofs/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ +obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_RELAYFS_FS) += relayfs/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ +obj-$(CONFIG_9P_FS) += 9p/ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 7aa6f200453..9ebe881c678 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -255,6 +255,7 @@ void affs_delete_inode(struct inode *inode) { pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); + truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; if (S_ISREG(inode->i_mode)) affs_truncate(inode); @@ -29,6 +29,7 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> +#include <linux/rcuref.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); @@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) return ioctx; } +static int lock_kiocb_action(void *param) +{ + schedule(); + return 0; +} + +static inline void lock_kiocb(struct kiocb *iocb) +{ + wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, + TASK_UNINTERRUPTIBLE); +} + +static inline void unlock_kiocb(struct kiocb *iocb) +{ + kiocbClearLocked(iocb); + wake_up_bit(&iocb->ki_flags, KIF_LOCKED); +} + /* * use_mm * Makes the calling kernel thread take on the specified @@ -786,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx) * Hold an extra reference while retrying i/o. */ iocb->ki_users++; /* grab extra reference */ + lock_kiocb(iocb); aio_run_iocb(iocb); + unlock_kiocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); } @@ -1527,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; spin_lock_irq(&ctx->ctx_lock); - if (likely(list_empty(&ctx->run_list))) { - aio_run_iocb(req); - } else { - list_add_tail(&req->ki_run_list, &ctx->run_list); + aio_run_iocb(req); + unlock_kiocb(req); + if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) ; @@ -1661,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (NULL != cancel) { struct io_event tmp; pr_debug("calling cancel\n"); + lock_kiocb(kiocb); memset(&tmp, 0, sizeof(tmp)); tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; tmp.data = kiocb->ki_user_data; @@ -1672,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (copy_to_user(result, &tmp, sizeof(tmp))) ret = -EFAULT; } + unlock_kiocb(kiocb); } else - printk(KERN_DEBUG "iocb has no cancel operation\n"); + ret = -EINVAL; put_ioctx(ctx); diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 6171431272d..990c28da5ae 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -105,6 +105,7 @@ struct autofs_sb_info { struct file *pipe; pid_t oz_pgrp; int catatonic; + struct super_block *sb; unsigned long exp_timeout; ino_t next_dir_ino; struct autofs_wait_queue *queues; /* Wait queue pointer */ @@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *); void autofs_hash_delete(struct autofs_dir_ent *); struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *); void autofs_hash_dputall(struct autofs_dirhash *); -void autofs_hash_nuke(struct autofs_dirhash *); +void autofs_hash_nuke(struct autofs_sb_info *); /* Expiration-handling functions */ diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 448143fd079..5ccfcf26310 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh) /* Delete everything. This is used on filesystem destruction, so we make no attempt to keep the pointers valid */ -void autofs_hash_nuke(struct autofs_dirhash *dh) +void autofs_hash_nuke(struct autofs_sb_info *sbi) { int i; struct autofs_dir_ent *ent, *nent; for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) { - for ( ent = dh->h[i] ; ent ; ent = nent ) { + for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) { nent = ent->next; if ( ent->dentry ) dput(ent->dentry); @@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh) kfree(ent); } } + shrink_dcache_sb(sbi->sb); } diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 4888c1fabbf..65e5ed42190 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb) if ( !sbi->catatonic ) autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ - autofs_hash_nuke(&sbi->dirhash); + autofs_hash_nuke(sbi); for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) { if ( test_bit(n, sbi->symlink_bitmap) ) kfree(sbi->symlink[n].data); @@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = AUTOFS_SUPER_MAGIC; s->s_op = &autofs_sops; s->s_time_gran = 1; + sbi->sb = s; root_inode = iget(s, AUTOFS_ROOT_INO); root = d_alloc_root(root_inode); diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 1020dbc88be..1fbc53f14ab 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -20,7 +20,6 @@ struct bfs_sb_info { unsigned long si_lasti; unsigned long * si_imap; struct buffer_head * si_sbh; /* buffer header w/superblock */ - struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */ }; /* diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 5a1e5ce057f..e240c335eb2 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -2,6 +2,7 @@ * fs/bfs/dir.c * BFS directory operations. * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 */ #include <linux/time.h> @@ -20,9 +21,9 @@ #define dprintf(x...) #endif -static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino); +static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino); static struct buffer_head * bfs_find_entry(struct inode * dir, - const char * name, int namelen, struct bfs_dirent ** res_dir); + const unsigned char * name, int namelen, struct bfs_dirent ** res_dir); static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) { @@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) de = (struct bfs_dirent *)(bh->b_data + offset); if (de->ino) { int size = strnlen(de->name, BFS_NAMELEN); - if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) { + if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { brelse(bh); unlock_kernel(); return 0; @@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_mapping->a_ops = &bfs_aops; inode->i_mode = mode; inode->i_ino = ino; - BFS_I(inode)->i_dsk_ino = ino; + BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); BFS_I(inode)->i_sblock = 0; BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); @@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); if (bh) { - unsigned long ino = le32_to_cpu(de->ino); + unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); inode = iget(dir->i_sb, ino); if (!inode) { @@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry) inode = dentry->d_inode; lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); - if (!bh || de->ino != inode->i_ino) + if (!bh || le16_to_cpu(de->ino) != inode->i_ino) goto out_brelse; if (!inode->i_nlink) { @@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); - if (!old_bh || old_de->ino != old_inode->i_ino) + if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino) goto end_rename; error = -EPERM; @@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = { .rename = bfs_rename, }; -static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino) +static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino) { struct buffer_head * bh; struct bfs_dirent * de; @@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int } dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); - de->ino = ino; + de->ino = cpu_to_le16((u16)ino); for (i=0; i<BFS_NAMELEN; i++) de->name[i] = (i < namelen) ? name[i] : 0; mark_buffer_dirty(bh); @@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int return -ENOSPC; } -static inline int bfs_namecmp(int len, const char * name, const char * buffer) +static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer) { if (len < BFS_NAMELEN && buffer[len]) return 0; @@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer) } static struct buffer_head * bfs_find_entry(struct inode * dir, - const char * name, int namelen, struct bfs_dirent ** res_dir) + const unsigned char * name, int namelen, struct bfs_dirent ** res_dir) { unsigned long block, offset; struct buffer_head * bh; @@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir, } de = (struct bfs_dirent *)(bh->b_data + offset); offset += BFS_DIRENT_SIZE; - if (de->ino && bfs_namecmp(namelen, name, de->name)) { + if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) { *res_dir = de; return bh; } diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 747fd1ea55e..807723b65da 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo return 0; } -static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, - unsigned long where) +static int bfs_move_blocks(struct super_block *sb, unsigned long start, + unsigned long end, unsigned long where) { unsigned long i; @@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned static int bfs_get_block(struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { - long phys; + unsigned long phys; int err; struct super_block *sb = inode->i_sb; struct bfs_sb_info *info = BFS_SB(sb); struct bfs_inode_info *bi = BFS_I(inode); struct buffer_head *sbh = info->si_sbh; - if (block < 0 || block > info->si_blocks) + if (block > info->si_blocks) return -EIO; phys = bi->i_sblock + block; if (!create) { if (phys <= bi->i_eblock) { - dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys); + dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n", + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); } return 0; @@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block, of blocks allocated for this file, we can grant it */ if (inode->i_size && phys <= bi->i_eblock) { dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", - create, block, phys); + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); return 0; } @@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block, /* the rest has to be protected against itself */ lock_kernel(); - /* if the last data block for this file is the last allocated block, we can - extend the file trivially, without moving it anywhere */ + /* if the last data block for this file is the last allocated + block, we can extend the file trivially, without moving it + anywhere */ if (bi->i_eblock == info->si_lf_eblk) { dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", - create, block, phys); + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); info->si_freeb -= phys - bi->i_eblock; info->si_lf_eblk = bi->i_eblock = phys; @@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block, } else err = 0; - dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys); + dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", + create, (unsigned long)block, phys); bi->i_sblock = phys; phys += block; info->si_lf_eblk = bi->i_eblock = phys; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 64e0fb33fc0..c7b39aa279d 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -3,6 +3,8 @@ * BFS superblock and inode operations. * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. + * + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. */ #include <linux/module.h> @@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode) off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - inode->i_mode = 0x0000FFFF & di->i_mode; - if (di->i_vtype == BFS_VDIR) { + inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); + if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { inode->i_mode |= S_IFDIR; inode->i_op = &bfs_dir_inops; inode->i_fop = &bfs_dir_operations; - } else if (di->i_vtype == BFS_VREG) { + } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) { inode->i_mode |= S_IFREG; inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; } - inode->i_uid = di->i_uid; - inode->i_gid = di->i_gid; - inode->i_nlink = di->i_nlink; + BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); + BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); + inode->i_uid = le32_to_cpu(di->i_uid); + inode->i_gid = le32_to_cpu(di->i_gid); + inode->i_nlink = le32_to_cpu(di->i_nlink); inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); + if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); inode->i_blksize = PAGE_SIZE; - inode->i_atime.tv_sec = di->i_atime; - inode->i_mtime.tv_sec = di->i_mtime; - inode->i_ctime.tv_sec = di->i_ctime; + inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); + inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); + inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */ - BFS_I(inode)->i_sblock = di->i_sblock; - BFS_I(inode)->i_eblock = di->i_eblock; + BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */ brelse(bh); } static int bfs_write_inode(struct inode * inode, int unused) { - unsigned long ino = inode->i_ino; + unsigned int ino = (u16)inode->i_ino; + unsigned long i_sblock; struct bfs_inode * di; struct buffer_head * bh; int block, off; + dprintf("ino=%08x\n", ino); + if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { - printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); return -EIO; } @@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused) block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); unlock_kernel(); return -EIO; } @@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused) off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - if (inode->i_ino == BFS_ROOT_INO) - di->i_vtype = BFS_VDIR; + if (ino == BFS_ROOT_INO) + di->i_vtype = cpu_to_le32(BFS_VDIR); else - di->i_vtype = BFS_VREG; - - di->i_ino = inode->i_ino; - di->i_mode = inode->i_mode; - di->i_uid = inode->i_uid; - di->i_gid = inode->i_gid; - di->i_nlink = inode->i_nlink; - di->i_atime = inode->i_atime.tv_sec; - di->i_mtime = inode->i_mtime.tv_sec; - di->i_ctime = inode->i_ctime.tv_sec; - di->i_sblock = BFS_I(inode)->i_sblock; - di->i_eblock = BFS_I(inode)->i_eblock; - di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1; + di->i_vtype = cpu_to_le32(BFS_VREG); + + di->i_ino = cpu_to_le16(ino); + di->i_mode = cpu_to_le32(inode->i_mode); + di->i_uid = cpu_to_le32(inode->i_uid); + di->i_gid = cpu_to_le32(inode->i_gid); + di->i_nlink = cpu_to_le32(inode->i_nlink); + di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); + di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + i_sblock = BFS_I(inode)->i_sblock; + di->i_sblock = cpu_to_le32(i_sblock); + di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); + di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); + dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off); brelse(bh); unlock_kernel(); return 0; @@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode) int block, off; struct super_block * s = inode->i_sb; struct bfs_sb_info * info = BFS_SB(s); + struct bfs_inode_info * bi = BFS_I(inode); - dprintf("ino=%08lx\n", inode->i_ino); + dprintf("ino=%08lx\n", ino); - if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) { - printf("invalid ino=%08lx\n", inode->i_ino); + truncate_inode_pages(&inode->i_data, 0); + + if (ino < BFS_ROOT_INO || ino > info->si_lasti) { + printf("invalid ino=%08lx\n", ino); return; } @@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode) return; } off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *)bh->b_data + off; - if (di->i_ino) { - info->si_freeb += BFS_FILEBLOCKS(di); + di = (struct bfs_inode *) bh->b_data + off; + if (bi->i_dsk_ino) { + info->si_freeb += 1 + bi->i_eblock - bi->i_sblock; info->si_freei++; - clear_bit(di->i_ino, info->si_imap); + clear_bit(ino, info->si_imap); dump_imap("delete_inode", s); - } + } di->i_ino = 0; di->i_sblock = 0; mark_buffer_dirty(bh); @@ -272,14 +283,14 @@ static struct super_operations bfs_sops = { void dump_imap(const char *prefix, struct super_block * s) { -#if 0 +#ifdef DEBUG int i; char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); if (!tmpbuf) return; for (i=BFS_SB(s)->si_lasti; i>=0; i--) { - if (i>PAGE_SIZE-100) break; + if (i > PAGE_SIZE-100) break; if (test_bit(i, BFS_SB(s)->si_imap)) strcat(tmpbuf, "1"); else @@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head * bh; struct bfs_super_block * bfs_sb; struct inode * inode; - int i, imap_len; + unsigned i, imap_len; struct bfs_sb_info * info; info = kmalloc(sizeof(*info), GFP_KERNEL); @@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) if(!bh) goto out; bfs_sb = (struct bfs_super_block *)bh->b_data; - if (bfs_sb->s_magic != BFS_MAGIC) { + if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) printf("No BFS filesystem on %s (magic=%08x)\n", - s->s_id, bfs_sb->s_magic); + s->s_id, le32_to_cpu(bfs_sb->s_magic)); goto out; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) printf("%s is unclean, continuing\n", s->s_id); s->s_magic = BFS_MAGIC; - info->si_bfs_sb = bfs_sb; info->si_sbh = bh; - info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; imap_len = info->si_lasti/8 + 1; @@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) goto out; } - info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS; + info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; info->si_lf_sblk = 0; @@ -683,7 +683,7 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, { struct sg_iovec iov; - iov.iov_base = (__user void *)uaddr; + iov.iov_base = (void __user *)uaddr; iov.iov_len = len; return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); diff --git a/fs/buffer.c b/fs/buffer.c index 1c62203a490..6cbfceabd95 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -40,6 +40,7 @@ #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/mpage.h> +#include <linux/bit_spinlock.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void invalidate_bh_lrus(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3217ac5f6bd..2335f14a158 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) } cifs_sb->tcon = NULL; - if (ses) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 2); - } + if (ses) + schedule_timeout_interruptible(msecs_to_jiffies(500)); if (ses) sesInfoFree(ses); diff --git a/fs/compat.c b/fs/compat.c index 8c665705c6a..ac3fb9ed8ee 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1619,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp char *bits; long timeout; int size, max_fdset, ret = -EINVAL; + struct fdtable *fdt; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -1644,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ - max_fdset = current->files->max_fdset; + rcu_read_lock(); + fdt = files_fdtable(current->files); + max_fdset = fdt->max_fdset; + rcu_read_unlock(); if (n > max_fdset) n = max_fdset; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 155e612635f..e28a74203f3 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) r = (void *) &r4; } - if (ret) - return -EFAULT; + if (ret) { + ret = -EFAULT; + goto out; + } set_fs (KERNEL_DS); ret = sys_ioctl (fd, cmd, (unsigned long) r); set_fs (old_fs); +out: if (mysock) sockfd_put(mysock); diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 5034365b06a..8def89f2c43 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/vmalloc.h> #include <linux/zlib.h> +#include <linux/cramfs_fs.h> static z_stream stream; static int initialized; diff --git a/fs/dcache.c b/fs/dcache.c index a15a2e1f552..7376b61269f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode) */ void d_prune_aliases(struct inode *inode) { - struct list_head *tmp, *head = &inode->i_dentry; + struct dentry *dentry; restart: spin_lock(&dcache_lock); - tmp = head; - while ((tmp = tmp->next) != head) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!atomic_read(&dentry->d_count)) { __dget_locked(dentry); @@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb) * superblock to the most recent end of the unused list. */ spin_lock(&dcache_lock); - next = dentry_unused.next; - while (next != &dentry_unused) { - tmp = next; - next = tmp->next; + list_for_each_safe(tmp, next, &dentry_unused) { dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; @@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb) * Pass two ... free the dentries for this superblock. */ repeat: - next = dentry_unused.next; - while (next != &dentry_unused) { - tmp = next; - next = tmp->next; + list_for_each_safe(tmp, next, &dentry_unused) { dentry = list_entry(tmp, struct dentry, d_lru); if (dentry->d_sb != sb) continue; diff --git a/fs/exec.c b/fs/exec.c index 222ab1c572d..14dd03907cc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -798,6 +798,7 @@ no_thread_group: static inline void flush_old_files(struct files_struct * files) { long j = -1; + struct fdtable *fdt; spin_lock(&files->file_lock); for (;;) { @@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files) j++; i = j * __NFDBITS; - if (i >= files->max_fds || i >= files->max_fdset) + fdt = files_fdtable(files); + if (i >= fdt->max_fds || i >= fdt->max_fdset) break; - set = files->close_on_exec->fds_bits[j]; + set = fdt->close_on_exec->fds_bits[j]; if (!set) continue; - files->close_on_exec->fds_bits[j] = 0; + fdt->close_on_exec->fds_bits[j] = 0; spin_unlock(&files->file_lock); for ( ; set ; i++,set >>= 1) { if (set & 1) { diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 161f156d98c..c8d07030c89 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -615,6 +615,11 @@ got: DQUOT_DROP(inode); goto fail2; } + err = ext2_init_security(inode,dir); + if (err) { + DQUOT_FREE_INODE(inode); + goto fail2; + } mark_inode_dirty(inode); ext2_debug("allocating inode %lu\n", inode->i_ino); ext2_preread_inode(inode); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 53dceb0c659..fdba4d1d3c6 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode) */ void ext2_delete_inode (struct inode * inode) { + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index 5f3bfde3b81..67cfeb66e89 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -116,3 +116,11 @@ exit_ext2_xattr(void) # endif /* CONFIG_EXT2_FS_XATTR */ +#ifdef CONFIG_EXT2_FS_SECURITY +extern int ext2_init_security(struct inode *inode, struct inode *dir); +#else +static inline int ext2_init_security(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index 6a6c59fbe59..a2661279847 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/smp_lock.h> #include <linux/ext2_fs.h> +#include <linux/security.h> #include "xattr.h" static size_t @@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name, value, size, flags); } +int +ext2_init_security(struct inode *inode, struct inode *dir) +{ + int err; + size_t len; + void *value; + char *name; + + err = security_inode_init_security(inode, dir, &name, &value, &len); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, + name, value, len, 0); + kfree(name); + kfree(value); + return err; +} + struct xattr_handler ext2_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = ext2_xattr_security_list, diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 6981bd014ed..96552769d03 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -607,6 +607,11 @@ got: DQUOT_DROP(inode); goto fail2; } + err = ext3_init_security(handle,inode, dir); + if (err) { + DQUOT_FREE_INODE(inode); + goto fail2; + } err = ext3_mark_inode_dirty(handle, inode); if (err) { ext3_std_error(sb, err); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9989fdcf4d5..b5177c90d6f 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode) { handle_t *handle; + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) goto no_delete; diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h index eb31a69e82d..2ceae38f3d4 100644 --- a/fs/ext3/xattr.h +++ b/fs/ext3/xattr.h @@ -133,3 +133,14 @@ exit_ext3_xattr(void) #define ext3_xattr_handlers NULL # endif /* CONFIG_EXT3_FS_XATTR */ + +#ifdef CONFIG_EXT3_FS_SECURITY +extern int ext3_init_security(handle_t *handle, struct inode *inode, + struct inode *dir); +#else +static inline int ext3_init_security(handle_t *handle, struct inode *inode, + struct inode *dir) +{ + return 0; +} +#endif diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index ddc1c41750e..b9c40c15647 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -9,6 +9,7 @@ #include <linux/smp_lock.h> #include <linux/ext3_jbd.h> #include <linux/ext3_fs.h> +#include <linux/security.h> #include "xattr.h" static size_t @@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name, value, size, flags); } +int +ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) +{ + int err; + size_t len; + void *value; + char *name; + + err = security_inode_init_security(inode, dir, &name, &value, &len); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY, + name, value, len, 0); + kfree(name); + kfree(value); + return err; +} + struct xattr_handler ext3_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = ext3_xattr_security_list, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 96ae85b67eb..a7cbe68e225 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode); static void fat_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + if (!is_bad_inode(inode)) { inode->i_size = 0; fat_truncate(inode); diff --git a/fs/fcntl.c b/fs/fcntl.c index 6fbc9d8fcc3..863b46e0d78 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -16,6 +16,7 @@ #include <linux/security.h> #include <linux/ptrace.h> #include <linux/signal.h> +#include <linux/rcupdate.h> #include <asm/poll.h> #include <asm/siginfo.h> @@ -24,21 +25,25 @@ void fastcall set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); + fdt = files_fdtable(files); if (flag) - FD_SET(fd, files->close_on_exec); + FD_SET(fd, fdt->close_on_exec); else - FD_CLR(fd, files->close_on_exec); + FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); } static inline int get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; + struct fdtable *fdt; int res; - spin_lock(&files->file_lock); - res = FD_ISSET(fd, files->close_on_exec); - spin_unlock(&files->file_lock); + rcu_read_lock(); + fdt = files_fdtable(files); + res = FD_ISSET(fd, fdt->close_on_exec); + rcu_read_unlock(); return res; } @@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files, unsigned int newfd; unsigned int start; int error; + struct fdtable *fdt; error = -EINVAL; if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; repeat: + fdt = files_fdtable(files); /* * Someone might have closed fd's in the range - * orig_start..files->next_fd + * orig_start..fdt->next_fd */ start = orig_start; - if (start < files->next_fd) - start = files->next_fd; + if (start < fdt->next_fd) + start = fdt->next_fd; newfd = start; - if (start < files->max_fdset) { - newfd = find_next_zero_bit(files->open_fds->fds_bits, - files->max_fdset, start); + if (start < fdt->max_fdset) { + newfd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fdset, start); } error = -EMFILE; @@ -89,9 +96,15 @@ repeat: if (error) goto repeat; - if (start <= files->next_fd) - files->next_fd = newfd + 1; - + /* + * We reacquired files_lock, so we are safe as long as + * we reacquire the fdtable pointer and use it while holding + * the lock, no one can free it during that time. + */ + fdt = files_fdtable(files); + if (start <= fdt->next_fd) + fdt->next_fd = newfd + 1; + error = newfd; out: @@ -101,13 +114,16 @@ out: static int dupfd(struct file *file, unsigned int start) { struct files_struct * files = current->files; + struct fdtable *fdt; int fd; spin_lock(&files->file_lock); fd = locate_fd(files, file, start); if (fd >= 0) { - FD_SET(fd, files->open_fds); - FD_CLR(fd, files->close_on_exec); + /* locate_fd() may have expanded fdtable, load the ptr */ + fdt = files_fdtable(files); + FD_SET(fd, fdt->open_fds); + FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); fd_install(fd, file); } else { @@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) @@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) /* Yes. It's a race. In user space. Nothing sane to do */ err = -EBUSY; - tofree = files->fd[newfd]; - if (!tofree && FD_ISSET(newfd, files->open_fds)) + fdt = files_fdtable(files); + tofree = fdt->fd[newfd]; + if (!tofree && FD_ISSET(newfd, fdt->open_fds)) goto out_fput; - files->fd[newfd] = file; - FD_SET(newfd, files->open_fds); - FD_CLR(newfd, files->close_on_exec); + rcu_assign_pointer(fdt->fd[newfd], file); + FD_SET(newfd, fdt->open_fds); + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) diff --git a/fs/file.c b/fs/file.c index 92b5f25985d..2127a7b9dc3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -13,6 +13,25 @@ #include <linux/vmalloc.h> #include <linux/file.h> #include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/workqueue.h> + +struct fdtable_defer { + spinlock_t lock; + struct work_struct wq; + struct timer_list timer; + struct fdtable *next; +}; + +/* + * We use this list to defer free fdtables that have vmalloced + * sets/arrays. By keeping a per-cpu list, we avoid having to embed + * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in + * this per-task structure. + */ +static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); /* @@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num) vfree(array); } -/* - * Expand the fd array in the files_struct. Called with the files - * spinlock held for write. - */ +static void __free_fdtable(struct fdtable *fdt) +{ + int fdset_size, fdarray_size; -static int expand_fd_array(struct files_struct *files, int nr) - __releases(files->file_lock) - __acquires(files->file_lock) + fdset_size = fdt->max_fdset / 8; + fdarray_size = fdt->max_fds * sizeof(struct file *); + free_fdset(fdt->open_fds, fdset_size); + free_fdset(fdt->close_on_exec, fdset_size); + free_fd_array(fdt->fd, fdarray_size); + kfree(fdt); +} + +static void fdtable_timer(unsigned long data) { - struct file **new_fds; - int error, nfds; + struct fdtable_defer *fddef = (struct fdtable_defer *)data; - - error = -EMFILE; - if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) + spin_lock(&fddef->lock); + /* + * If someone already emptied the queue return. + */ + if (!fddef->next) goto out; + if (!schedule_work(&fddef->wq)) + mod_timer(&fddef->timer, 5); +out: + spin_unlock(&fddef->lock); +} - nfds = files->max_fds; - spin_unlock(&files->file_lock); +static void free_fdtable_work(struct fdtable_defer *f) +{ + struct fdtable *fdt; - /* - * Expand to the max in easy steps, and keep expanding it until - * we have enough for the requested fd array size. - */ + spin_lock_bh(&f->lock); + fdt = f->next; + f->next = NULL; + spin_unlock_bh(&f->lock); + while(fdt) { + struct fdtable *next = fdt->next; + __free_fdtable(fdt); + fdt = next; + } +} - do { -#if NR_OPEN_DEFAULT < 256 - if (nfds < 256) - nfds = 256; - else -#endif - if (nfds < (PAGE_SIZE / sizeof(struct file *))) - nfds = PAGE_SIZE / sizeof(struct file *); - else { - nfds = nfds * 2; - if (nfds > NR_OPEN) - nfds = NR_OPEN; - } - } while (nfds <= nr); +static void free_fdtable_rcu(struct rcu_head *rcu) +{ + struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); + int fdset_size, fdarray_size; + struct fdtable_defer *fddef; - error = -ENOMEM; - new_fds = alloc_fd_array(nfds); - spin_lock(&files->file_lock); - if (!new_fds) - goto out; + BUG_ON(!fdt); + fdset_size = fdt->max_fdset / 8; + fdarray_size = fdt->max_fds * sizeof(struct file *); - /* Copy the existing array and install the new pointer */ - - if (nfds > files->max_fds) { - struct file **old_fds; - int i; - - old_fds = xchg(&files->fd, new_fds); - i = xchg(&files->max_fds, nfds); - - /* Don't copy/clear the array if we are creating a new - fd array for fork() */ - if (i) { - memcpy(new_fds, old_fds, i * sizeof(struct file *)); - /* clear the remainder of the array */ - memset(&new_fds[i], 0, - (nfds-i) * sizeof(struct file *)); - - spin_unlock(&files->file_lock); - free_fd_array(old_fds, i); - spin_lock(&files->file_lock); - } + if (fdt->free_files) { + /* + * The this fdtable was embedded in the files structure + * and the files structure itself was getting destroyed. + * It is now safe to free the files structure. + */ + kmem_cache_free(files_cachep, fdt->free_files); + return; + } + if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { + /* + * The fdtable was embedded + */ + return; + } + if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { + kfree(fdt->open_fds); + kfree(fdt->close_on_exec); + kfree(fdt->fd); + kfree(fdt); } else { - /* Somebody expanded the array while we slept ... */ - spin_unlock(&files->file_lock); - free_fd_array(new_fds, nfds); - spin_lock(&files->file_lock); + fddef = &get_cpu_var(fdtable_defer_list); + spin_lock(&fddef->lock); + fdt->next = fddef->next; + fddef->next = fdt; + /* + * vmallocs are handled from the workqueue context. + * If the per-cpu workqueue is running, then we + * defer work scheduling through a timer. + */ + if (!schedule_work(&fddef->wq)) + mod_timer(&fddef->timer, 5); + spin_unlock(&fddef->lock); + put_cpu_var(fdtable_defer_list); } - error = 0; -out: - return error; +} + +void free_fdtable(struct fdtable *fdt) +{ + if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || + fdt->max_fds > NR_OPEN_DEFAULT) + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + +/* + * Expand the fdset in the files_struct. Called with the files spinlock + * held for write. + */ +static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) +{ + int i; + int count; + + BUG_ON(nfdt->max_fdset < fdt->max_fdset); + BUG_ON(nfdt->max_fds < fdt->max_fds); + /* Copy the existing tables and install the new pointers */ + + i = fdt->max_fdset / (sizeof(unsigned long) * 8); + count = (nfdt->max_fdset - fdt->max_fdset) / 8; + + /* + * Don't copy the entire array if the current fdset is + * not yet initialised. + */ + if (i) { + memcpy (nfdt->open_fds, fdt->open_fds, + fdt->max_fdset/8); + memcpy (nfdt->close_on_exec, fdt->close_on_exec, + fdt->max_fdset/8); + memset (&nfdt->open_fds->fds_bits[i], 0, count); + memset (&nfdt->close_on_exec->fds_bits[i], 0, count); + } + + /* Don't copy/clear the array if we are creating a new + fd array for fork() */ + if (fdt->max_fds) { + memcpy(nfdt->fd, fdt->fd, + fdt->max_fds * sizeof(struct file *)); + /* clear the remainder of the array */ + memset(&nfdt->fd[fdt->max_fds], 0, + (nfdt->max_fds - fdt->max_fds) * + sizeof(struct file *)); + } + nfdt->next_fd = fdt->next_fd; } /* @@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num) vfree(array); } -/* - * Expand the fdset in the files_struct. Called with the files spinlock - * held for write. - */ -static int expand_fdset(struct files_struct *files, int nr) - __releases(file->file_lock) - __acquires(file->file_lock) +static struct fdtable *alloc_fdtable(int nr) { - fd_set *new_openset = NULL, *new_execset = NULL; - int error, nfds = 0; - - error = -EMFILE; - if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN) - goto out; + struct fdtable *fdt = NULL; + int nfds = 0; + fd_set *new_openset = NULL, *new_execset = NULL; + struct file **new_fds; - nfds = files->max_fdset; - spin_unlock(&files->file_lock); + fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); + if (!fdt) + goto out; + memset(fdt, 0, sizeof(*fdt)); - /* Expand to the max in easy steps */ - do { + nfds = __FD_SETSIZE; + /* Expand to the max in easy steps */ + do { if (nfds < (PAGE_SIZE * 8)) nfds = PAGE_SIZE * 8; else { @@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr) } } while (nfds <= nr); - error = -ENOMEM; - new_openset = alloc_fdset(nfds); - new_execset = alloc_fdset(nfds); - spin_lock(&files->file_lock); - if (!new_openset || !new_execset) + new_openset = alloc_fdset(nfds); + new_execset = alloc_fdset(nfds); + if (!new_openset || !new_execset) + goto out; + fdt->open_fds = new_openset; + fdt->close_on_exec = new_execset; + fdt->max_fdset = nfds; + + nfds = NR_OPEN_DEFAULT; + /* + * Expand to the max in easy steps, and keep expanding it until + * we have enough for the requested fd array size. + */ + do { +#if NR_OPEN_DEFAULT < 256 + if (nfds < 256) + nfds = 256; + else +#endif + if (nfds < (PAGE_SIZE / sizeof(struct file *))) + nfds = PAGE_SIZE / sizeof(struct file *); + else { + nfds = nfds * 2; + if (nfds > NR_OPEN) + nfds = NR_OPEN; + } + } while (nfds <= nr); + new_fds = alloc_fd_array(nfds); + if (!new_fds) + goto out; + fdt->fd = new_fds; + fdt->max_fds = nfds; + fdt->free_files = NULL; + return fdt; +out: + if (new_openset) + free_fdset(new_openset, nfds); + if (new_execset) + free_fdset(new_execset, nfds); + kfree(fdt); + return NULL; +} + +/* + * Expands the file descriptor table - it will allocate a new fdtable and + * both fd array and fdset. It is expected to be called with the + * files_lock held. + */ +static int expand_fdtable(struct files_struct *files, int nr) + __releases(files->file_lock) + __acquires(files->file_lock) +{ + int error = 0; + struct fdtable *fdt; + struct fdtable *nfdt = NULL; + + spin_unlock(&files->file_lock); + nfdt = alloc_fdtable(nr); + if (!nfdt) { + error = -ENOMEM; + spin_lock(&files->file_lock); goto out; + } - error = 0; - - /* Copy the existing tables and install the new pointers */ - if (nfds > files->max_fdset) { - int i = files->max_fdset / (sizeof(unsigned long) * 8); - int count = (nfds - files->max_fdset) / 8; - - /* - * Don't copy the entire array if the current fdset is - * not yet initialised. - */ - if (i) { - memcpy (new_openset, files->open_fds, files->max_fdset/8); - memcpy (new_execset, files->close_on_exec, files->max_fdset/8); - memset (&new_openset->fds_bits[i], 0, count); - memset (&new_execset->fds_bits[i], 0, count); - } - - nfds = xchg(&files->max_fdset, nfds); - new_openset = xchg(&files->open_fds, new_openset); - new_execset = xchg(&files->close_on_exec, new_execset); + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + /* + * Check again since another task may have expanded the + * fd table while we dropped the lock + */ + if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { + copy_fdtable(nfdt, fdt); + } else { + /* Somebody expanded while we dropped file_lock */ spin_unlock(&files->file_lock); - free_fdset (new_openset, nfds); - free_fdset (new_execset, nfds); + __free_fdtable(nfdt); spin_lock(&files->file_lock); - return 0; - } - /* Somebody expanded the array while we slept ... */ - + goto out; + } + rcu_assign_pointer(files->fdt, nfdt); + free_fdtable(fdt); out: - spin_unlock(&files->file_lock); - if (new_openset) - free_fdset(new_openset, nfds); - if (new_execset) - free_fdset(new_execset, nfds); - spin_lock(&files->file_lock); return error; } @@ -237,18 +351,39 @@ out: int expand_files(struct files_struct *files, int nr) { int err, expand = 0; + struct fdtable *fdt; - if (nr >= files->max_fdset) { - expand = 1; - if ((err = expand_fdset(files, nr))) + fdt = files_fdtable(files); + if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { + if (fdt->max_fdset >= NR_OPEN || + fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { + err = -EMFILE; goto out; - } - if (nr >= files->max_fds) { + } expand = 1; - if ((err = expand_fd_array(files, nr))) + if ((err = expand_fdtable(files, nr))) goto out; } err = expand; out: return err; } + +static void __devinit fdtable_defer_list_init(int cpu) +{ + struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); + spin_lock_init(&fddef->lock); + INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); + init_timer(&fddef->timer); + fddef->timer.data = (unsigned long)fddef; + fddef->timer.function = fdtable_timer; + fddef->next = NULL; +} + +void __init files_defer_init(void) +{ + int i; + /* Really early - can't use for_each_cpu */ + for (i = 0; i < NR_CPUS; i++) + fdtable_defer_list_init(i); +} diff --git a/fs/file_table.c b/fs/file_table.c index 43e9e1737de..86ec8ae985b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -14,6 +14,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/eventpoll.h> +#include <linux/rcupdate.h> #include <linux/mount.h> #include <linux/cdev.h> #include <linux/fsnotify.h> @@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags) spin_unlock_irqrestore(&filp_count_lock, flags); } -static inline void file_free(struct file *f) +static inline void file_free_rcu(struct rcu_head *head) { + struct file *f = container_of(head, struct file, f_rcuhead); kmem_cache_free(filp_cachep, f); } +static inline void file_free(struct file *f) +{ + call_rcu(&f->f_rcuhead, file_free_rcu); +} + /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or * we run out of memory. @@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) + if (rcuref_dec_and_test(&file->f_count)) __fput(file); } @@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd) struct file *file; struct files_struct *files = current->files; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); - if (file) - get_file(file); - spin_unlock(&files->file_lock); + if (file) { + if (!rcuref_inc_lf(&file->f_count)) { + /* File object ref couldn't be taken */ + rcu_read_unlock(); + return NULL; + } + } + rcu_read_unlock(); + return file; } @@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) if (likely((atomic_read(&files->count) == 1))) { file = fcheck_files(files, fd); } else { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - get_file(file); - *fput_needed = 1; + if (rcuref_inc_lf(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } + return file; } void put_filp(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) { + if (rcuref_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); @@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages) files_stat.max_files = n; if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; + files_defer_init(); } diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile new file mode 100644 index 00000000000..c3e1f760cac --- /dev/null +++ b/fs/fuse/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the FUSE filesystem. +# + +obj-$(CONFIG_FUSE_FS) += fuse.o + +fuse-objs := dev.o dir.o file.o inode.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c new file mode 100644 index 00000000000..d4c869c6d01 --- /dev/null +++ b/fs/fuse/dev.c @@ -0,0 +1,877 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/uio.h> +#include <linux/miscdevice.h> +#include <linux/pagemap.h> +#include <linux/file.h> +#include <linux/slab.h> + +MODULE_ALIAS_MISCDEV(FUSE_MINOR); + +static kmem_cache_t *fuse_req_cachep; + +static inline struct fuse_conn *fuse_get_conn(struct file *file) +{ + struct fuse_conn *fc; + spin_lock(&fuse_lock); + fc = file->private_data; + if (fc && !fc->mounted) + fc = NULL; + spin_unlock(&fuse_lock); + return fc; +} + +static inline void fuse_request_init(struct fuse_req *req) +{ + memset(req, 0, sizeof(*req)); + INIT_LIST_HEAD(&req->list); + init_waitqueue_head(&req->waitq); + atomic_set(&req->count, 1); +} + +struct fuse_req *fuse_request_alloc(void) +{ + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL); + if (req) + fuse_request_init(req); + return req; +} + +void fuse_request_free(struct fuse_req *req) +{ + kmem_cache_free(fuse_req_cachep, req); +} + +static inline void block_sigs(sigset_t *oldset) +{ + sigset_t mask; + + siginitsetinv(&mask, sigmask(SIGKILL)); + sigprocmask(SIG_BLOCK, &mask, oldset); +} + +static inline void restore_sigs(sigset_t *oldset) +{ + sigprocmask(SIG_SETMASK, oldset, NULL); +} + +void fuse_reset_request(struct fuse_req *req) +{ + int preallocated = req->preallocated; + BUG_ON(atomic_read(&req->count) != 1); + fuse_request_init(req); + req->preallocated = preallocated; +} + +static void __fuse_get_request(struct fuse_req *req) +{ + atomic_inc(&req->count); +} + +/* Must be called with > 1 refcount */ +static void __fuse_put_request(struct fuse_req *req) +{ + BUG_ON(atomic_read(&req->count) < 2); + atomic_dec(&req->count); +} + +static struct fuse_req *do_get_request(struct fuse_conn *fc) +{ + struct fuse_req *req; + + spin_lock(&fuse_lock); + BUG_ON(list_empty(&fc->unused_list)); + req = list_entry(fc->unused_list.next, struct fuse_req, list); + list_del_init(&req->list); + spin_unlock(&fuse_lock); + fuse_request_init(req); + req->preallocated = 1; + req->in.h.uid = current->fsuid; + req->in.h.gid = current->fsgid; + req->in.h.pid = current->pid; + return req; +} + +/* This can return NULL, but only in case it's interrupted by a SIGKILL */ +struct fuse_req *fuse_get_request(struct fuse_conn *fc) +{ + int intr; + sigset_t oldset; + + block_sigs(&oldset); + intr = down_interruptible(&fc->outstanding_sem); + restore_sigs(&oldset); + return intr ? NULL : do_get_request(fc); +} + +static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) +{ + spin_lock(&fuse_lock); + if (req->preallocated) + list_add(&req->list, &fc->unused_list); + else + fuse_request_free(req); + + /* If we are in debt decrease that first */ + if (fc->outstanding_debt) + fc->outstanding_debt--; + else + up(&fc->outstanding_sem); + spin_unlock(&fuse_lock); +} + +void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +{ + if (atomic_dec_and_test(&req->count)) + fuse_putback_request(fc, req); +} + +void fuse_release_background(struct fuse_req *req) +{ + iput(req->inode); + iput(req->inode2); + if (req->file) + fput(req->file); + spin_lock(&fuse_lock); + list_del(&req->bg_entry); + spin_unlock(&fuse_lock); +} + +/* + * This function is called when a request is finished. Either a reply + * has arrived or it was interrupted (and not yet sent) or some error + * occured during communication with userspace, or the device file was + * closed. It decreases the referece count for the request. In case + * of a background request the referece to the stored objects are + * released. The requester thread is woken up (if still waiting), and + * finally the request is either freed or put on the unused_list + * + * Called with fuse_lock, unlocks it + */ +static void request_end(struct fuse_conn *fc, struct fuse_req *req) +{ + int putback; + req->finished = 1; + putback = atomic_dec_and_test(&req->count); + spin_unlock(&fuse_lock); + if (req->background) { + down_read(&fc->sbput_sem); + if (fc->mounted) + fuse_release_background(req); + up_read(&fc->sbput_sem); + } + wake_up(&req->waitq); + if (req->in.h.opcode == FUSE_INIT) { + int i; + + if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) + fc->conn_error = 1; + + /* After INIT reply is received other requests can go + out. So do (FUSE_MAX_OUTSTANDING - 1) number of + up()s on outstanding_sem. The last up() is done in + fuse_putback_request() */ + for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) + up(&fc->outstanding_sem); + } + if (putback) + fuse_putback_request(fc, req); +} + +/* + * Unfortunately request interruption not just solves the deadlock + * problem, it causes problems too. These stem from the fact, that an + * interrupted request is continued to be processed in userspace, + * while all the locks and object references (inode and file) held + * during the operation are released. + * + * To release the locks is exactly why there's a need to interrupt the + * request, so there's not a lot that can be done about this, except + * introduce additional locking in userspace. + * + * More important is to keep inode and file references until userspace + * has replied, otherwise FORGET and RELEASE could be sent while the + * inode/file is still used by the filesystem. + * + * For this reason the concept of "background" request is introduced. + * An interrupted request is backgrounded if it has been already sent + * to userspace. Backgrounding involves getting an extra reference to + * inode(s) or file used in the request, and adding the request to + * fc->background list. When a reply is received for a background + * request, the object references are released, and the request is + * removed from the list. If the filesystem is unmounted while there + * are still background requests, the list is walked and references + * are released as if a reply was received. + * + * There's one more use for a background request. The RELEASE message is + * always sent as background, since it doesn't return an error or + * data. + */ +static void background_request(struct fuse_conn *fc, struct fuse_req *req) +{ + req->background = 1; + list_add(&req->bg_entry, &fc->background); + if (req->inode) + req->inode = igrab(req->inode); + if (req->inode2) + req->inode2 = igrab(req->inode2); + if (req->file) + get_file(req->file); +} + +/* Called with fuse_lock held. Releases, and then reacquires it. */ +static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) +{ + sigset_t oldset; + + spin_unlock(&fuse_lock); + block_sigs(&oldset); + wait_event_interruptible(req->waitq, req->finished); + restore_sigs(&oldset); + spin_lock(&fuse_lock); + if (req->finished) + return; + + req->out.h.error = -EINTR; + req->interrupted = 1; + if (req->locked) { + /* This is uninterruptible sleep, because data is + being copied to/from the buffers of req. During + locked state, there mustn't be any filesystem + operation (e.g. page fault), since that could lead + to deadlock */ + spin_unlock(&fuse_lock); + wait_event(req->waitq, !req->locked); + spin_lock(&fuse_lock); + } + if (!req->sent && !list_empty(&req->list)) { + list_del(&req->list); + __fuse_put_request(req); + } else if (!req->finished && req->sent) + background_request(fc, req); +} + +static unsigned len_args(unsigned numargs, struct fuse_arg *args) +{ + unsigned nbytes = 0; + unsigned i; + + for (i = 0; i < numargs; i++) + nbytes += args[i].size; + + return nbytes; +} + +static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +{ + fc->reqctr++; + /* zero is special */ + if (fc->reqctr == 0) + fc->reqctr = 1; + req->in.h.unique = fc->reqctr; + req->in.h.len = sizeof(struct fuse_in_header) + + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); + if (!req->preallocated) { + /* If request is not preallocated (either FORGET or + RELEASE), then still decrease outstanding_sem, so + user can't open infinite number of files while not + processing the RELEASE requests. However for + efficiency do it without blocking, so if down() + would block, just increase the debt instead */ + if (down_trylock(&fc->outstanding_sem)) + fc->outstanding_debt++; + } + list_add_tail(&req->list, &fc->pending); + wake_up(&fc->waitq); +} + +/* + * This can only be interrupted by a SIGKILL + */ +void request_send(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + spin_lock(&fuse_lock); + if (!fc->connected) + req->out.h.error = -ENOTCONN; + else if (fc->conn_error) + req->out.h.error = -ECONNREFUSED; + else { + queue_request(fc, req); + /* acquire extra reference, since request is still needed + after request_end() */ + __fuse_get_request(req); + + request_wait_answer(fc, req); + } + spin_unlock(&fuse_lock); +} + +static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) +{ + spin_lock(&fuse_lock); + if (fc->connected) { + queue_request(fc, req); + spin_unlock(&fuse_lock); + } else { + req->out.h.error = -ENOTCONN; + request_end(fc, req); + } +} + +void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 0; + request_send_nowait(fc, req); +} + +void request_send_background(struct fuse_conn *fc, struct fuse_req *req) +{ + req->isreply = 1; + spin_lock(&fuse_lock); + background_request(fc, req); + spin_unlock(&fuse_lock); + request_send_nowait(fc, req); +} + +void fuse_send_init(struct fuse_conn *fc) +{ + /* This is called from fuse_read_super() so there's guaranteed + to be a request available */ + struct fuse_req *req = do_get_request(fc); + struct fuse_init_in_out *arg = &req->misc.init_in_out; + arg->major = FUSE_KERNEL_VERSION; + arg->minor = FUSE_KERNEL_MINOR_VERSION; + req->in.h.opcode = FUSE_INIT; + req->in.numargs = 1; + req->in.args[0].size = sizeof(*arg); + req->in.args[0].value = arg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(*arg); + req->out.args[0].value = arg; + request_send_background(fc, req); +} + +/* + * Lock the request. Up to the next unlock_request() there mustn't be + * anything that could cause a page-fault. If the request was already + * interrupted bail out. + */ +static inline int lock_request(struct fuse_req *req) +{ + int err = 0; + if (req) { + spin_lock(&fuse_lock); + if (req->interrupted) + err = -ENOENT; + else + req->locked = 1; + spin_unlock(&fuse_lock); + } + return err; +} + +/* + * Unlock request. If it was interrupted during being locked, the + * requester thread is currently waiting for it to be unlocked, so + * wake it up. + */ +static inline void unlock_request(struct fuse_req *req) +{ + if (req) { + spin_lock(&fuse_lock); + req->locked = 0; + if (req->interrupted) + wake_up(&req->waitq); + spin_unlock(&fuse_lock); + } +} + +struct fuse_copy_state { + int write; + struct fuse_req *req; + const struct iovec *iov; + unsigned long nr_segs; + unsigned long seglen; + unsigned long addr; + struct page *pg; + void *mapaddr; + void *buf; + unsigned len; +}; + +static void fuse_copy_init(struct fuse_copy_state *cs, int write, + struct fuse_req *req, const struct iovec *iov, + unsigned long nr_segs) +{ + memset(cs, 0, sizeof(*cs)); + cs->write = write; + cs->req = req; + cs->iov = iov; + cs->nr_segs = nr_segs; +} + +/* Unmap and put previous page of userspace buffer */ +static inline void fuse_copy_finish(struct fuse_copy_state *cs) +{ + if (cs->mapaddr) { + kunmap_atomic(cs->mapaddr, KM_USER0); + if (cs->write) { + flush_dcache_page(cs->pg); + set_page_dirty_lock(cs->pg); + } + put_page(cs->pg); + cs->mapaddr = NULL; + } +} + +/* + * Get another pagefull of userspace buffer, and map it to kernel + * address space, and lock request + */ +static int fuse_copy_fill(struct fuse_copy_state *cs) +{ + unsigned long offset; + int err; + + unlock_request(cs->req); + fuse_copy_finish(cs); + if (!cs->seglen) { + BUG_ON(!cs->nr_segs); + cs->seglen = cs->iov[0].iov_len; + cs->addr = (unsigned long) cs->iov[0].iov_base; + cs->iov ++; + cs->nr_segs --; + } + down_read(¤t->mm->mmap_sem); + err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, + &cs->pg, NULL); + up_read(¤t->mm->mmap_sem); + if (err < 0) + return err; + BUG_ON(err != 1); + offset = cs->addr % PAGE_SIZE; + cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); + cs->buf = cs->mapaddr + offset; + cs->len = min(PAGE_SIZE - offset, cs->seglen); + cs->seglen -= cs->len; + cs->addr += cs->len; + + return lock_request(cs->req); +} + +/* Do as much copy to/from userspace buffer as we can */ +static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, + unsigned *size) +{ + unsigned ncpy = min(*size, cs->len); + if (val) { + if (cs->write) + memcpy(cs->buf, *val, ncpy); + else + memcpy(*val, cs->buf, ncpy); + *val += ncpy; + } + *size -= ncpy; + cs->len -= ncpy; + cs->buf += ncpy; + return ncpy; +} + +/* + * Copy a page in the request to/from the userspace buffer. Must be + * done atomically + */ +static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, + unsigned offset, unsigned count, int zeroing) +{ + if (page && zeroing && count < PAGE_SIZE) { + void *mapaddr = kmap_atomic(page, KM_USER1); + memset(mapaddr, 0, PAGE_SIZE); + kunmap_atomic(mapaddr, KM_USER1); + } + while (count) { + int err; + if (!cs->len && (err = fuse_copy_fill(cs))) + return err; + if (page) { + void *mapaddr = kmap_atomic(page, KM_USER1); + void *buf = mapaddr + offset; + offset += fuse_copy_do(cs, &buf, &count); + kunmap_atomic(mapaddr, KM_USER1); + } else + offset += fuse_copy_do(cs, NULL, &count); + } + if (page && !cs->write) + flush_dcache_page(page); + return 0; +} + +/* Copy pages in the request to/from userspace buffer */ +static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, + int zeroing) +{ + unsigned i; + struct fuse_req *req = cs->req; + unsigned offset = req->page_offset; + unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); + + for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { + struct page *page = req->pages[i]; + int err = fuse_copy_page(cs, page, offset, count, zeroing); + if (err) + return err; + + nbytes -= count; + count = min(nbytes, (unsigned) PAGE_SIZE); + offset = 0; + } + return 0; +} + +/* Copy a single argument in the request to/from userspace buffer */ +static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) +{ + while (size) { + int err; + if (!cs->len && (err = fuse_copy_fill(cs))) + return err; + fuse_copy_do(cs, &val, &size); + } + return 0; +} + +/* Copy request arguments to/from userspace buffer */ +static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, + unsigned argpages, struct fuse_arg *args, + int zeroing) +{ + int err = 0; + unsigned i; + + for (i = 0; !err && i < numargs; i++) { + struct fuse_arg *arg = &args[i]; + if (i == numargs - 1 && argpages) + err = fuse_copy_pages(cs, arg->size, zeroing); + else + err = fuse_copy_one(cs, arg->value, arg->size); + } + return err; +} + +/* Wait until a request is available on the pending list */ +static void request_wait(struct fuse_conn *fc) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&fc->waitq, &wait); + while (fc->mounted && list_empty(&fc->pending)) { + set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) + break; + + spin_unlock(&fuse_lock); + schedule(); + spin_lock(&fuse_lock); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&fc->waitq, &wait); +} + +/* + * Read a single request into the userspace filesystem's buffer. This + * function waits until a request is available, then removes it from + * the pending list and copies request data to userspace buffer. If + * no reply is needed (FORGET) or request has been interrupted or + * there was an error during the copying then it's finished by calling + * request_end(). Otherwise add it to the processing list, and set + * the 'sent' flag. + */ +static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *off) +{ + int err; + struct fuse_conn *fc; + struct fuse_req *req; + struct fuse_in *in; + struct fuse_copy_state cs; + unsigned reqsize; + + spin_lock(&fuse_lock); + fc = file->private_data; + err = -EPERM; + if (!fc) + goto err_unlock; + request_wait(fc); + err = -ENODEV; + if (!fc->mounted) + goto err_unlock; + err = -ERESTARTSYS; + if (list_empty(&fc->pending)) + goto err_unlock; + + req = list_entry(fc->pending.next, struct fuse_req, list); + list_del_init(&req->list); + spin_unlock(&fuse_lock); + + in = &req->in; + reqsize = req->in.h.len; + fuse_copy_init(&cs, 1, req, iov, nr_segs); + err = -EINVAL; + if (iov_length(iov, nr_segs) >= reqsize) { + err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); + if (!err) + err = fuse_copy_args(&cs, in->numargs, in->argpages, + (struct fuse_arg *) in->args, 0); + } + fuse_copy_finish(&cs); + + spin_lock(&fuse_lock); + req->locked = 0; + if (!err && req->interrupted) + err = -ENOENT; + if (err) { + if (!req->interrupted) + req->out.h.error = -EIO; + request_end(fc, req); + return err; + } + if (!req->isreply) + request_end(fc, req); + else { + req->sent = 1; + list_add_tail(&req->list, &fc->processing); + spin_unlock(&fuse_lock); + } + return reqsize; + + err_unlock: + spin_unlock(&fuse_lock); + return err; +} + +static ssize_t fuse_dev_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *off) +{ + struct iovec iov; + iov.iov_len = nbytes; + iov.iov_base = buf; + return fuse_dev_readv(file, &iov, 1, off); +} + +/* Look up request on processing list by unique ID */ +static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) +{ + struct list_head *entry; + + list_for_each(entry, &fc->processing) { + struct fuse_req *req; + req = list_entry(entry, struct fuse_req, list); + if (req->in.h.unique == unique) + return req; + } + return NULL; +} + +static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, + unsigned nbytes) +{ + unsigned reqsize = sizeof(struct fuse_out_header); + + if (out->h.error) + return nbytes != reqsize ? -EINVAL : 0; + + reqsize += len_args(out->numargs, out->args); + + if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) + return -EINVAL; + else if (reqsize > nbytes) { + struct fuse_arg *lastarg = &out->args[out->numargs-1]; + unsigned diffsize = reqsize - nbytes; + if (diffsize > lastarg->size) + return -EINVAL; + lastarg->size -= diffsize; + } + return fuse_copy_args(cs, out->numargs, out->argpages, out->args, + out->page_zeroing); +} + +/* + * Write a single reply to a request. First the header is copied from + * the write buffer. The request is then searched on the processing + * list by the unique ID found in the header. If found, then remove + * it from the list and copy the rest of the buffer to the request. + * The request is finished by calling request_end() + */ +static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *off) +{ + int err; + unsigned nbytes = iov_length(iov, nr_segs); + struct fuse_req *req; + struct fuse_out_header oh; + struct fuse_copy_state cs; + struct fuse_conn *fc = fuse_get_conn(file); + if (!fc) + return -ENODEV; + + fuse_copy_init(&cs, 0, NULL, iov, nr_segs); + if (nbytes < sizeof(struct fuse_out_header)) + return -EINVAL; + + err = fuse_copy_one(&cs, &oh, sizeof(oh)); + if (err) + goto err_finish; + err = -EINVAL; + if (!oh.unique || oh.error <= -1000 || oh.error > 0 || + oh.len != nbytes) + goto err_finish; + + spin_lock(&fuse_lock); + req = request_find(fc, oh.unique); + err = -EINVAL; + if (!req) + goto err_unlock; + + list_del_init(&req->list); + if (req->interrupted) { + request_end(fc, req); + fuse_copy_finish(&cs); + return -ENOENT; + } + req->out.h = oh; + req->locked = 1; + cs.req = req; + spin_unlock(&fuse_lock); + + err = copy_out_args(&cs, &req->out, nbytes); + fuse_copy_finish(&cs); + + spin_lock(&fuse_lock); + req->locked = 0; + if (!err) { + if (req->interrupted) + err = -ENOENT; + } else if (!req->interrupted) + req->out.h.error = -EIO; + request_end(fc, req); + + return err ? err : nbytes; + + err_unlock: + spin_unlock(&fuse_lock); + err_finish: + fuse_copy_finish(&cs); + return err; +} + +static ssize_t fuse_dev_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *off) +{ + struct iovec iov; + iov.iov_len = nbytes; + iov.iov_base = (char __user *) buf; + return fuse_dev_writev(file, &iov, 1, off); +} + +static unsigned fuse_dev_poll(struct file *file, poll_table *wait) +{ + struct fuse_conn *fc = fuse_get_conn(file); + unsigned mask = POLLOUT | POLLWRNORM; + + if (!fc) + return -ENODEV; + + poll_wait(file, &fc->waitq, wait); + + spin_lock(&fuse_lock); + if (!list_empty(&fc->pending)) + mask |= POLLIN | POLLRDNORM; + spin_unlock(&fuse_lock); + + return mask; +} + +/* Abort all requests on the given list (pending or processing) */ +static void end_requests(struct fuse_conn *fc, struct list_head *head) +{ + while (!list_empty(head)) { + struct fuse_req *req; + req = list_entry(head->next, struct fuse_req, list); + list_del_init(&req->list); + req->out.h.error = -ECONNABORTED; + request_end(fc, req); + spin_lock(&fuse_lock); + } +} + +static int fuse_dev_release(struct inode *inode, struct file *file) +{ + struct fuse_conn *fc; + + spin_lock(&fuse_lock); + fc = file->private_data; + if (fc) { + fc->connected = 0; + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); + fuse_release_conn(fc); + } + spin_unlock(&fuse_lock); + return 0; +} + +struct file_operations fuse_dev_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = fuse_dev_read, + .readv = fuse_dev_readv, + .write = fuse_dev_write, + .writev = fuse_dev_writev, + .poll = fuse_dev_poll, + .release = fuse_dev_release, +}; + +static struct miscdevice fuse_miscdevice = { + .minor = FUSE_MINOR, + .name = "fuse", + .fops = &fuse_dev_operations, +}; + +int __init fuse_dev_init(void) +{ + int err = -ENOMEM; + fuse_req_cachep = kmem_cache_create("fuse_request", + sizeof(struct fuse_req), + 0, 0, NULL, NULL); + if (!fuse_req_cachep) + goto out; + + err = misc_register(&fuse_miscdevice); + if (err) + goto out_cache_clean; + + return 0; + + out_cache_clean: + kmem_cache_destroy(fuse_req_cachep); + out: + return err; +} + +void fuse_dev_cleanup(void) +{ + misc_deregister(&fuse_miscdevice); + kmem_cache_destroy(fuse_req_cachep); +} diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c new file mode 100644 index 00000000000..e79e49b3eec --- /dev/null +++ b/fs/fuse/dir.c @@ -0,0 +1,982 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/file.h> +#include <linux/gfp.h> +#include <linux/sched.h> +#include <linux/namei.h> + +static inline unsigned long time_to_jiffies(unsigned long sec, + unsigned long nsec) +{ + struct timespec ts = {sec, nsec}; + return jiffies + timespec_to_jiffies(&ts); +} + +static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, + struct dentry *entry, + struct fuse_entry_out *outarg) +{ + req->in.h.opcode = FUSE_LOOKUP; + req->in.h.nodeid = get_node_id(dir); + req->inode = dir; + req->in.numargs = 1; + req->in.args[0].size = entry->d_name.len + 1; + req->in.args[0].value = entry->d_name.name; + req->out.numargs = 1; + req->out.args[0].size = sizeof(struct fuse_entry_out); + req->out.args[0].value = outarg; +} + +static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) +{ + if (!entry->d_inode || is_bad_inode(entry->d_inode)) + return 0; + else if (time_after(jiffies, entry->d_time)) { + int err; + struct fuse_entry_out outarg; + struct inode *inode = entry->d_inode; + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return 0; + + fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); + request_send(fc, req); + err = req->out.h.error; + if (!err) { + if (outarg.nodeid != get_node_id(inode)) { + fuse_send_forget(fc, req, outarg.nodeid, 1); + return 0; + } + fi->nlookup ++; + } + fuse_put_request(fc, req); + if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + return 0; + + fuse_change_attributes(inode, &outarg.attr); + entry->d_time = time_to_jiffies(outarg.entry_valid, + outarg.entry_valid_nsec); + fi->i_time = time_to_jiffies(outarg.attr_valid, + outarg.attr_valid_nsec); + } + return 1; +} + +static struct dentry_operations fuse_dentry_operations = { + .d_revalidate = fuse_dentry_revalidate, +}; + +static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, + struct inode **inodep) +{ + int err; + struct fuse_entry_out outarg; + struct inode *inode = NULL; + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_req *req; + + if (entry->d_name.len > FUSE_NAME_MAX) + return -ENAMETOOLONG; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + fuse_lookup_init(req, dir, entry, &outarg); + request_send(fc, req); + err = req->out.h.error; + if (!err) { + inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, + &outarg.attr); + if (!inode) { + fuse_send_forget(fc, req, outarg.nodeid, 1); + return -ENOMEM; + } + } + fuse_put_request(fc, req); + if (err && err != -ENOENT) + return err; + + if (inode) { + struct fuse_inode *fi = get_fuse_inode(inode); + entry->d_time = time_to_jiffies(outarg.entry_valid, + outarg.entry_valid_nsec); + fi->i_time = time_to_jiffies(outarg.attr_valid, + outarg.attr_valid_nsec); + } + + entry->d_op = &fuse_dentry_operations; + *inodep = inode; + return 0; +} + +void fuse_invalidate_attr(struct inode *inode) +{ + get_fuse_inode(inode)->i_time = jiffies - 1; +} + +static void fuse_invalidate_entry(struct dentry *entry) +{ + d_invalidate(entry); + entry->d_time = jiffies - 1; +} + +static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, + struct inode *dir, struct dentry *entry, + int mode) +{ + struct fuse_entry_out outarg; + struct inode *inode; + struct fuse_inode *fi; + int err; + + req->in.h.nodeid = get_node_id(dir); + req->inode = dir; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + if (err) { + fuse_put_request(fc, req); + return err; + } + inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, + &outarg.attr); + if (!inode) { + fuse_send_forget(fc, req, outarg.nodeid, 1); + return -ENOMEM; + } + fuse_put_request(fc, req); + + /* Don't allow userspace to do really stupid things... */ + if ((inode->i_mode ^ mode) & S_IFMT) { + iput(inode); + return -EIO; + } + + entry->d_time = time_to_jiffies(outarg.entry_valid, + outarg.entry_valid_nsec); + + fi = get_fuse_inode(inode); + fi->i_time = time_to_jiffies(outarg.attr_valid, + outarg.attr_valid_nsec); + + d_instantiate(entry, inode); + fuse_invalidate_attr(dir); + return 0; +} + +static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, + dev_t rdev) +{ + struct fuse_mknod_in inarg; + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.mode = mode; + inarg.rdev = new_encode_dev(rdev); + req->in.h.opcode = FUSE_MKNOD; + req->in.numargs = 2; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = entry->d_name.len + 1; + req->in.args[1].value = entry->d_name.name; + return create_new_entry(fc, req, dir, entry, mode); +} + +static int fuse_create(struct inode *dir, struct dentry *entry, int mode, + struct nameidata *nd) +{ + return fuse_mknod(dir, entry, mode, 0); +} + +static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) +{ + struct fuse_mkdir_in inarg; + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.mode = mode; + req->in.h.opcode = FUSE_MKDIR; + req->in.numargs = 2; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = entry->d_name.len + 1; + req->in.args[1].value = entry->d_name.name; + return create_new_entry(fc, req, dir, entry, S_IFDIR); +} + +static int fuse_symlink(struct inode *dir, struct dentry *entry, + const char *link) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + unsigned len = strlen(link) + 1; + struct fuse_req *req; + + if (len > FUSE_SYMLINK_MAX) + return -ENAMETOOLONG; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.h.opcode = FUSE_SYMLINK; + req->in.numargs = 2; + req->in.args[0].size = entry->d_name.len + 1; + req->in.args[0].value = entry->d_name.name; + req->in.args[1].size = len; + req->in.args[1].value = link; + return create_new_entry(fc, req, dir, entry, S_IFLNK); +} + +static int fuse_unlink(struct inode *dir, struct dentry *entry) +{ + int err; + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.h.opcode = FUSE_UNLINK; + req->in.h.nodeid = get_node_id(dir); + req->inode = dir; + req->in.numargs = 1; + req->in.args[0].size = entry->d_name.len + 1; + req->in.args[0].value = entry->d_name.name; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) { + struct inode *inode = entry->d_inode; + + /* Set nlink to zero so the inode can be cleared, if + the inode does have more links this will be + discovered at the next lookup/getattr */ + inode->i_nlink = 0; + fuse_invalidate_attr(inode); + fuse_invalidate_attr(dir); + } else if (err == -EINTR) + fuse_invalidate_entry(entry); + return err; +} + +static int fuse_rmdir(struct inode *dir, struct dentry *entry) +{ + int err; + struct fuse_conn *fc = get_fuse_conn(dir); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.h.opcode = FUSE_RMDIR; + req->in.h.nodeid = get_node_id(dir); + req->inode = dir; + req->in.numargs = 1; + req->in.args[0].size = entry->d_name.len + 1; + req->in.args[0].value = entry->d_name.name; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) { + entry->d_inode->i_nlink = 0; + fuse_invalidate_attr(dir); + } else if (err == -EINTR) + fuse_invalidate_entry(entry); + return err; +} + +static int fuse_rename(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + int err; + struct fuse_rename_in inarg; + struct fuse_conn *fc = get_fuse_conn(olddir); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.newdir = get_node_id(newdir); + req->in.h.opcode = FUSE_RENAME; + req->in.h.nodeid = get_node_id(olddir); + req->inode = olddir; + req->inode2 = newdir; + req->in.numargs = 3; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = oldent->d_name.len + 1; + req->in.args[1].value = oldent->d_name.name; + req->in.args[2].size = newent->d_name.len + 1; + req->in.args[2].value = newent->d_name.name; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) { + fuse_invalidate_attr(olddir); + if (olddir != newdir) + fuse_invalidate_attr(newdir); + } else if (err == -EINTR) { + /* If request was interrupted, DEITY only knows if the + rename actually took place. If the invalidation + fails (e.g. some process has CWD under the renamed + directory), then there can be inconsistency between + the dcache and the real filesystem. Tough luck. */ + fuse_invalidate_entry(oldent); + if (newent->d_inode) + fuse_invalidate_entry(newent); + } + + return err; +} + +static int fuse_link(struct dentry *entry, struct inode *newdir, + struct dentry *newent) +{ + int err; + struct fuse_link_in inarg; + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.oldnodeid = get_node_id(inode); + req->in.h.opcode = FUSE_LINK; + req->inode2 = inode; + req->in.numargs = 2; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = newent->d_name.len + 1; + req->in.args[1].value = newent->d_name.name; + err = create_new_entry(fc, req, newdir, newent, inode->i_mode); + /* Contrary to "normal" filesystems it can happen that link + makes two "logical" inodes point to the same "physical" + inode. We invalidate the attributes of the old one, so it + will reflect changes in the backing inode (link count, + etc.) + */ + if (!err || err == -EINTR) + fuse_invalidate_attr(inode); + return err; +} + +int fuse_do_getattr(struct inode *inode) +{ + int err; + struct fuse_attr_out arg; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.h.opcode = FUSE_GETATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->out.numargs = 1; + req->out.args[0].size = sizeof(arg); + req->out.args[0].value = &arg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) { + if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) { + make_bad_inode(inode); + err = -EIO; + } else { + struct fuse_inode *fi = get_fuse_inode(inode); + fuse_change_attributes(inode, &arg.attr); + fi->i_time = time_to_jiffies(arg.attr_valid, + arg.attr_valid_nsec); + } + } + return err; +} + +/* + * Calling into a user-controlled filesystem gives the filesystem + * daemon ptrace-like capabilities over the requester process. This + * means, that the filesystem daemon is able to record the exact + * filesystem operations performed, and can also control the behavior + * of the requester process in otherwise impossible ways. For example + * it can delay the operation for arbitrary length of time allowing + * DoS against the requester. + * + * For this reason only those processes can call into the filesystem, + * for which the owner of the mount has ptrace privilege. This + * excludes processes started by other users, suid or sgid processes. + */ +static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) +{ + if (fc->flags & FUSE_ALLOW_OTHER) + return 1; + + if (task->euid == fc->user_id && + task->suid == fc->user_id && + task->uid == fc->user_id && + task->egid == fc->group_id && + task->sgid == fc->group_id && + task->gid == fc->group_id) + return 1; + + return 0; +} + +static int fuse_revalidate(struct dentry *entry) +{ + struct inode *inode = entry->d_inode; + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + if (get_node_id(inode) != FUSE_ROOT_ID && + time_before_eq(jiffies, fi->i_time)) + return 0; + + return fuse_do_getattr(inode); +} + +static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { + int err = generic_permission(inode, mask, NULL); + + /* If permission is denied, try to refresh file + attributes. This is also needed, because the root + node will at first have no permissions */ + if (err == -EACCES) { + err = fuse_do_getattr(inode); + if (!err) + err = generic_permission(inode, mask, NULL); + } + + /* FIXME: Need some mechanism to revoke permissions: + currently if the filesystem suddenly changes the + file mode, we will not be informed about it, and + continue to allow access to the file/directory. + + This is actually not so grave, since the user can + simply keep access to the file/directory anyway by + keeping it open... */ + + return err; + } else { + int mode = inode->i_mode; + if ((mask & MAY_WRITE) && IS_RDONLY(inode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) + return -EACCES; + return 0; + } +} + +static int parse_dirfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir) +{ + while (nbytes >= FUSE_NAME_OFFSET) { + struct fuse_dirent *dirent = (struct fuse_dirent *) buf; + size_t reclen = FUSE_DIRENT_SIZE(dirent); + int over; + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, dirent->type); + if (over) + break; + + buf += reclen; + nbytes -= reclen; + file->f_pos = dirent->off; + } + + return 0; +} + +static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, + size_t count) +{ + return fuse_send_read_common(req, file, inode, pos, count, 1); +} + +static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) +{ + int err; + size_t nbytes; + struct page *page; + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + page = alloc_page(GFP_KERNEL); + if (!page) { + fuse_put_request(fc, req); + return -ENOMEM; + } + req->num_pages = 1; + req->pages[0] = page; + nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) + err = parse_dirfile(page_address(page), nbytes, file, dstbuf, + filldir); + + __free_page(page); + fuse_invalidate_attr(inode); /* atime changed */ + return err; +} + +static char *read_link(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = fuse_get_request(fc); + char *link; + + if (!req) + return ERR_PTR(-EINTR); + + link = (char *) __get_free_page(GFP_KERNEL); + if (!link) { + link = ERR_PTR(-ENOMEM); + goto out; + } + req->in.h.opcode = FUSE_READLINK; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->out.argvar = 1; + req->out.numargs = 1; + req->out.args[0].size = PAGE_SIZE - 1; + req->out.args[0].value = link; + request_send(fc, req); + if (req->out.h.error) { + free_page((unsigned long) link); + link = ERR_PTR(req->out.h.error); + } else + link[req->out.args[0].size] = '\0'; + out: + fuse_put_request(fc, req); + fuse_invalidate_attr(inode); /* atime changed */ + return link; +} + +static void free_link(char *link) +{ + if (!IS_ERR(link)) + free_page((unsigned long) link); +} + +static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + nd_set_link(nd, read_link(dentry)); + return NULL; +} + +static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) +{ + free_link(nd_get_link(nd)); +} + +static int fuse_dir_open(struct inode *inode, struct file *file) +{ + return fuse_open_common(inode, file, 1); +} + +static int fuse_dir_release(struct inode *inode, struct file *file) +{ + return fuse_release_common(inode, file, 1); +} + +static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) +{ + /* nfsd can call this with no file */ + return file ? fuse_fsync_common(file, de, datasync, 1) : 0; +} + +static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr) +{ + unsigned ivalid = iattr->ia_valid; + unsigned fvalid = 0; + + memset(fattr, 0, sizeof(*fattr)); + + if (ivalid & ATTR_MODE) + fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode; + if (ivalid & ATTR_UID) + fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid; + if (ivalid & ATTR_GID) + fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid; + if (ivalid & ATTR_SIZE) + fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size; + /* You can only _set_ these together (they may change by themselves) */ + if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { + fvalid |= FATTR_ATIME | FATTR_MTIME; + fattr->atime = iattr->ia_atime.tv_sec; + fattr->mtime = iattr->ia_mtime.tv_sec; + } + + return fvalid; +} + +static int fuse_setattr(struct dentry *entry, struct iattr *attr) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_req *req; + struct fuse_setattr_in inarg; + struct fuse_attr_out outarg; + int err; + int is_truncate = 0; + + if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { + err = inode_change_ok(inode, attr); + if (err) + return err; + } + + if (attr->ia_valid & ATTR_SIZE) { + unsigned long limit; + is_truncate = 1; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + } + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.valid = iattr_to_fattr(attr, &inarg.attr); + req->in.h.opcode = FUSE_SETATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) { + if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + make_bad_inode(inode); + err = -EIO; + } else { + if (is_truncate) { + loff_t origsize = i_size_read(inode); + i_size_write(inode, outarg.attr.size); + if (origsize > outarg.attr.size) + vmtruncate(inode, outarg.attr.size); + } + fuse_change_attributes(inode, &outarg.attr); + fi->i_time = time_to_jiffies(outarg.attr_valid, + outarg.attr_valid_nsec); + } + } else if (err == -EINTR) + fuse_invalidate_attr(inode); + + return err; +} + +static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, + struct kstat *stat) +{ + struct inode *inode = entry->d_inode; + int err = fuse_revalidate(entry); + if (!err) + generic_fillattr(inode, stat); + + return err; +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) +{ + struct inode *inode; + int err = fuse_lookup_iget(dir, entry, &inode); + if (err) + return ERR_PTR(err); + if (inode && S_ISDIR(inode->i_mode)) { + /* Don't allow creating an alias to a directory */ + struct dentry *alias = d_find_alias(inode); + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { + dput(alias); + iput(inode); + return ERR_PTR(-EIO); + } + } + return d_splice_alias(inode, entry); +} + +static int fuse_setxattr(struct dentry *entry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_setxattr_in inarg; + int err; + + if (size > FUSE_XATTR_SIZE_MAX) + return -E2BIG; + + if (fc->no_setxattr) + return -EOPNOTSUPP; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.size = size; + inarg.flags = flags; + req->in.h.opcode = FUSE_SETXATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 3; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = strlen(name) + 1; + req->in.args[1].value = name; + req->in.args[2].size = size; + req->in.args[2].value = value; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) { + fc->no_setxattr = 1; + err = -EOPNOTSUPP; + } + return err; +} + +static ssize_t fuse_getxattr(struct dentry *entry, const char *name, + void *value, size_t size) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_getxattr_in inarg; + struct fuse_getxattr_out outarg; + ssize_t ret; + + if (fc->no_getxattr) + return -EOPNOTSUPP; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.size = size; + req->in.h.opcode = FUSE_GETXATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 2; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->in.args[1].size = strlen(name) + 1; + req->in.args[1].value = name; + /* This is really two different operations rolled into one */ + req->out.numargs = 1; + if (size) { + req->out.argvar = 1; + req->out.args[0].size = size; + req->out.args[0].value = value; + } else { + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + } + request_send(fc, req); + ret = req->out.h.error; + if (!ret) + ret = size ? req->out.args[0].size : outarg.size; + else { + if (ret == -ENOSYS) { + fc->no_getxattr = 1; + ret = -EOPNOTSUPP; + } + } + fuse_put_request(fc, req); + return ret; +} + +static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_getxattr_in inarg; + struct fuse_getxattr_out outarg; + ssize_t ret; + + if (fc->no_listxattr) + return -EOPNOTSUPP; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.size = size; + req->in.h.opcode = FUSE_LISTXATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + /* This is really two different operations rolled into one */ + req->out.numargs = 1; + if (size) { + req->out.argvar = 1; + req->out.args[0].size = size; + req->out.args[0].value = list; + } else { + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + } + request_send(fc, req); + ret = req->out.h.error; + if (!ret) + ret = size ? req->out.args[0].size : outarg.size; + else { + if (ret == -ENOSYS) { + fc->no_listxattr = 1; + ret = -EOPNOTSUPP; + } + } + fuse_put_request(fc, req); + return ret; +} + +static int fuse_removexattr(struct dentry *entry, const char *name) +{ + struct inode *inode = entry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + int err; + + if (fc->no_removexattr) + return -EOPNOTSUPP; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.h.opcode = FUSE_REMOVEXATTR; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = strlen(name) + 1; + req->in.args[0].value = name; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) { + fc->no_removexattr = 1; + err = -EOPNOTSUPP; + } + return err; +} + +static struct inode_operations fuse_dir_inode_operations = { + .lookup = fuse_lookup, + .mkdir = fuse_mkdir, + .symlink = fuse_symlink, + .unlink = fuse_unlink, + .rmdir = fuse_rmdir, + .rename = fuse_rename, + .link = fuse_link, + .setattr = fuse_setattr, + .create = fuse_create, + .mknod = fuse_mknod, + .permission = fuse_permission, + .getattr = fuse_getattr, + .setxattr = fuse_setxattr, + .getxattr = fuse_getxattr, + .listxattr = fuse_listxattr, + .removexattr = fuse_removexattr, +}; + +static struct file_operations fuse_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = fuse_readdir, + .open = fuse_dir_open, + .release = fuse_dir_release, + .fsync = fuse_dir_fsync, +}; + +static struct inode_operations fuse_common_inode_operations = { + .setattr = fuse_setattr, + .permission = fuse_permission, + .getattr = fuse_getattr, + .setxattr = fuse_setxattr, + .getxattr = fuse_getxattr, + .listxattr = fuse_listxattr, + .removexattr = fuse_removexattr, +}; + +static struct inode_operations fuse_symlink_inode_operations = { + .setattr = fuse_setattr, + .follow_link = fuse_follow_link, + .put_link = fuse_put_link, + .readlink = generic_readlink, + .getattr = fuse_getattr, + .setxattr = fuse_setxattr, + .getxattr = fuse_getxattr, + .listxattr = fuse_listxattr, + .removexattr = fuse_removexattr, +}; + +void fuse_init_common(struct inode *inode) +{ + inode->i_op = &fuse_common_inode_operations; +} + +void fuse_init_dir(struct inode *inode) +{ + inode->i_op = &fuse_dir_inode_operations; + inode->i_fop = &fuse_dir_operations; +} + +void fuse_init_symlink(struct inode *inode) +{ + inode->i_op = &fuse_symlink_inode_operations; +} diff --git a/fs/fuse/file.c b/fs/fuse/file.c new file mode 100644 index 00000000000..6454022b053 --- /dev/null +++ b/fs/fuse/file.c @@ -0,0 +1,555 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/kernel.h> + +static struct file_operations fuse_direct_io_file_operations; + +int fuse_open_common(struct inode *inode, struct file *file, int isdir) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_open_in inarg; + struct fuse_open_out outarg; + struct fuse_file *ff; + int err; + + err = generic_file_open(inode, file); + if (err) + return err; + + /* If opening the root node, no lookup has been performed on + it, so the attributes must be refreshed */ + if (get_node_id(inode) == FUSE_ROOT_ID) { + int err = fuse_do_getattr(inode); + if (err) + return err; + } + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + err = -ENOMEM; + ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + if (!ff) + goto out_put_request; + + ff->release_req = fuse_request_alloc(); + if (!ff->release_req) { + kfree(ff); + goto out_put_request; + } + + memset(&inarg, 0, sizeof(inarg)); + inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + if (err) { + fuse_request_free(ff->release_req); + kfree(ff); + } else { + if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO)) + file->f_op = &fuse_direct_io_file_operations; + if (!(outarg.open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages(inode->i_mapping); + ff->fh = outarg.fh; + file->private_data = ff; + } + + out_put_request: + fuse_put_request(fc, req); + return err; +} + +int fuse_release_common(struct inode *inode, struct file *file, int isdir) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_req *req = ff->release_req; + struct fuse_release_in *inarg = &req->misc.release_in; + + inarg->fh = ff->fh; + inarg->flags = file->f_flags & ~O_EXCL; + req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct fuse_release_in); + req->in.args[0].value = inarg; + request_send_background(fc, req); + kfree(ff); + + /* Return value is ignored by VFS */ + return 0; +} + +static int fuse_open(struct inode *inode, struct file *file) +{ + return fuse_open_common(inode, file, 0); +} + +static int fuse_release(struct inode *inode, struct file *file) +{ + return fuse_release_common(inode, file, 0); +} + +static int fuse_flush(struct file *file) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_req *req; + struct fuse_flush_in inarg; + int err; + + if (fc->no_flush) + return 0; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + req->in.h.opcode = FUSE_FLUSH; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->file = file; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) { + fc->no_flush = 1; + err = 0; + } + return err; +} + +int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, + int isdir) +{ + struct inode *inode = de->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_req *req; + struct fuse_fsync_in inarg; + int err; + + if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) + return 0; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + inarg.fsync_flags = datasync ? 1 : 0; + req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->file = file; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) { + if (isdir) + fc->no_fsyncdir = 1; + else + fc->no_fsync = 1; + err = 0; + } + return err; +} + +static int fuse_fsync(struct file *file, struct dentry *de, int datasync) +{ + return fuse_fsync_common(file, de, datasync, 0); +} + +size_t fuse_send_read_common(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count, + int isdir) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_read_in inarg; + + memset(&inarg, 0, sizeof(struct fuse_read_in)); + inarg.fh = ff->fh; + inarg.offset = pos; + inarg.size = count; + req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->file = file; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct fuse_read_in); + req->in.args[0].value = &inarg; + req->out.argpages = 1; + req->out.argvar = 1; + req->out.numargs = 1; + req->out.args[0].size = count; + request_send(fc, req); + return req->out.args[0].size; +} + +static inline size_t fuse_send_read(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, + size_t count) +{ + return fuse_send_read_common(req, file, inode, pos, count, 0); +} + +static int fuse_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT; + struct fuse_req *req = fuse_get_request(fc); + int err = -EINTR; + if (!req) + goto out; + + req->out.page_zeroing = 1; + req->num_pages = 1; + req->pages[0] = page; + fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) + SetPageUptodate(page); + fuse_invalidate_attr(inode); /* atime changed */ + out: + unlock_page(page); + return err; +} + +static int fuse_send_readpages(struct fuse_req *req, struct file *file, + struct inode *inode) +{ + loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT; + size_t count = req->num_pages << PAGE_CACHE_SHIFT; + unsigned i; + req->out.page_zeroing = 1; + fuse_send_read(req, file, inode, pos, count); + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + if (!req->out.h.error) + SetPageUptodate(page); + unlock_page(page); + } + return req->out.h.error; +} + +struct fuse_readpages_data { + struct fuse_req *req; + struct file *file; + struct inode *inode; +}; + +static int fuse_readpages_fill(void *_data, struct page *page) +{ + struct fuse_readpages_data *data = _data; + struct fuse_req *req = data->req; + struct inode *inode = data->inode; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (req->num_pages && + (req->num_pages == FUSE_MAX_PAGES_PER_REQ || + (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || + req->pages[req->num_pages - 1]->index + 1 != page->index)) { + int err = fuse_send_readpages(req, data->file, inode); + if (err) { + unlock_page(page); + return err; + } + fuse_reset_request(req); + } + req->pages[req->num_pages] = page; + req->num_pages ++; + return 0; +} + +static int fuse_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_readpages_data data; + int err; + data.file = file; + data.inode = inode; + data.req = fuse_get_request(fc); + if (!data.req) + return -EINTR; + + err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); + if (!err && data.req->num_pages) + err = fuse_send_readpages(data.req, file, inode); + fuse_put_request(fc, data.req); + fuse_invalidate_attr(inode); /* atime changed */ + return err; +} + +static size_t fuse_send_write(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_write_in inarg; + struct fuse_write_out outarg; + + memset(&inarg, 0, sizeof(struct fuse_write_in)); + inarg.fh = ff->fh; + inarg.offset = pos; + inarg.size = count; + req->in.h.opcode = FUSE_WRITE; + req->in.h.nodeid = get_node_id(inode); + req->inode = inode; + req->file = file; + req->in.argpages = 1; + req->in.numargs = 2; + req->in.args[0].size = sizeof(struct fuse_write_in); + req->in.args[0].value = &inarg; + req->in.args[1].size = count; + req->out.numargs = 1; + req->out.args[0].size = sizeof(struct fuse_write_out); + req->out.args[0].value = &outarg; + request_send(fc, req); + return outarg.size; +} + +static int fuse_prepare_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + /* No op */ + return 0; +} + +static int fuse_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + int err; + size_t nres; + unsigned count = to - offset; + struct inode *inode = page->mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset; + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->num_pages = 1; + req->pages[0] = page; + req->page_offset = offset; + nres = fuse_send_write(req, file, inode, pos, count); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err && nres != count) + err = -EIO; + if (!err) { + pos += count; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + + if (offset == 0 && to == PAGE_CACHE_SIZE) { + clear_page_dirty(page); + SetPageUptodate(page); + } + } + fuse_invalidate_attr(inode); + return err; +} + +static void fuse_release_user_pages(struct fuse_req *req, int write) +{ + unsigned i; + + for (i = 0; i < req->num_pages; i++) { + struct page *page = req->pages[i]; + if (write) + set_page_dirty_lock(page); + put_page(page); + } +} + +static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, + unsigned nbytes, int write) +{ + unsigned long user_addr = (unsigned long) buf; + unsigned offset = user_addr & ~PAGE_MASK; + int npages; + + /* This doesn't work with nfsd */ + if (!current->mm) + return -EPERM; + + nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); + npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + npages = min(npages, FUSE_MAX_PAGES_PER_REQ); + down_read(¤t->mm->mmap_sem); + npages = get_user_pages(current, current->mm, user_addr, npages, write, + 0, req->pages, NULL); + up_read(¤t->mm->mmap_sem); + if (npages < 0) + return npages; + + req->num_pages = npages; + req->page_offset = offset; + return 0; +} + +static ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + size_t nmax = write ? fc->max_write : fc->max_read; + loff_t pos = *ppos; + ssize_t res = 0; + struct fuse_req *req = fuse_get_request(fc); + if (!req) + return -EINTR; + + while (count) { + size_t tmp; + size_t nres; + size_t nbytes = min(count, nmax); + int err = fuse_get_user_pages(req, buf, nbytes, !write); + if (err) { + res = err; + break; + } + tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset; + nbytes = min(nbytes, tmp); + if (write) + nres = fuse_send_write(req, file, inode, pos, nbytes); + else + nres = fuse_send_read(req, file, inode, pos, nbytes); + fuse_release_user_pages(req, !write); + if (req->out.h.error) { + if (!res) + res = req->out.h.error; + break; + } else if (nres > nbytes) { + res = -EIO; + break; + } + count -= nres; + res += nres; + pos += nres; + buf += nres; + if (nres != nbytes) + break; + if (count) + fuse_reset_request(req); + } + fuse_put_request(fc, req); + if (res > 0) { + if (write && pos > i_size_read(inode)) + i_size_write(inode, pos); + *ppos = pos; + } + fuse_invalidate_attr(inode); + + return res; +} + +static ssize_t fuse_direct_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return fuse_direct_io(file, buf, count, ppos, 0); +} + +static ssize_t fuse_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + ssize_t res; + /* Don't allow parallel writes to the same file */ + down(&inode->i_sem); + res = fuse_direct_io(file, buf, count, ppos, 1); + up(&inode->i_sem); + return res; +} + +static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & VM_SHARED)) { + if ((vma->vm_flags & VM_WRITE)) + return -ENODEV; + else + vma->vm_flags &= ~VM_MAYWRITE; + } + return generic_file_mmap(file, vma); +} + +static int fuse_set_page_dirty(struct page *page) +{ + printk("fuse_set_page_dirty: should not happen\n"); + dump_stack(); + return 0; +} + +static struct file_operations fuse_file_operations = { + .llseek = generic_file_llseek, + .read = generic_file_read, + .write = generic_file_write, + .mmap = fuse_file_mmap, + .open = fuse_open, + .flush = fuse_flush, + .release = fuse_release, + .fsync = fuse_fsync, + .sendfile = generic_file_sendfile, +}; + +static struct file_operations fuse_direct_io_file_operations = { + .llseek = generic_file_llseek, + .read = fuse_direct_read, + .write = fuse_direct_write, + .open = fuse_open, + .flush = fuse_flush, + .release = fuse_release, + .fsync = fuse_fsync, + /* no mmap and sendfile */ +}; + +static struct address_space_operations fuse_file_aops = { + .readpage = fuse_readpage, + .prepare_write = fuse_prepare_write, + .commit_write = fuse_commit_write, + .readpages = fuse_readpages, + .set_page_dirty = fuse_set_page_dirty, +}; + +void fuse_init_file_inode(struct inode *inode) +{ + inode->i_fop = &fuse_file_operations; + inode->i_data.a_ops = &fuse_file_aops; +} diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h new file mode 100644 index 00000000000..24d761518d8 --- /dev/null +++ b/fs/fuse/fuse_i.h @@ -0,0 +1,451 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include <linux/fuse.h> +#include <linux/fs.h> +#include <linux/wait.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/backing-dev.h> +#include <asm/semaphore.h> + +/** Max number of pages that can be used in a single read request */ +#define FUSE_MAX_PAGES_PER_REQ 32 + +/** If more requests are outstanding, then the operation will block */ +#define FUSE_MAX_OUTSTANDING 10 + +/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem + module will check permissions based on the file mode. Otherwise no + permission checking is done in the kernel */ +#define FUSE_DEFAULT_PERMISSIONS (1 << 0) + +/** If the FUSE_ALLOW_OTHER flag is given, then not only the user + doing the mount will be allowed to access the filesystem */ +#define FUSE_ALLOW_OTHER (1 << 1) + + +/** FUSE inode */ +struct fuse_inode { + /** Inode data */ + struct inode inode; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** Number of lookups on this inode */ + u64 nlookup; + + /** The request used for sending the FORGET message */ + struct fuse_req *forget_req; + + /** Time in jiffies until the file attributes are valid */ + unsigned long i_time; +}; + +/** FUSE specific file data */ +struct fuse_file { + /** Request reserved for flush and release */ + struct fuse_req *release_req; + + /** File handle used by userspace */ + u64 fh; +}; + +/** One input argument of a request */ +struct fuse_in_arg { + unsigned size; + const void *value; +}; + +/** The request input */ +struct fuse_in { + /** The request header */ + struct fuse_in_header h; + + /** True if the data for the last argument is in req->pages */ + unsigned argpages:1; + + /** Number of arguments */ + unsigned numargs; + + /** Array of arguments */ + struct fuse_in_arg args[3]; +}; + +/** One output argument of a request */ +struct fuse_arg { + unsigned size; + void *value; +}; + +/** The request output */ +struct fuse_out { + /** Header returned from userspace */ + struct fuse_out_header h; + + /** Last argument is variable length (can be shorter than + arg->size) */ + unsigned argvar:1; + + /** Last argument is a list of pages to copy data to */ + unsigned argpages:1; + + /** Zero partially or not copied pages */ + unsigned page_zeroing:1; + + /** Number or arguments */ + unsigned numargs; + + /** Array of arguments */ + struct fuse_arg args[3]; +}; + +struct fuse_req; +struct fuse_conn; + +/** + * A request to the client + */ +struct fuse_req { + /** This can be on either unused_list, pending or processing + lists in fuse_conn */ + struct list_head list; + + /** Entry on the background list */ + struct list_head bg_entry; + + /** refcount */ + atomic_t count; + + /** True if the request has reply */ + unsigned isreply:1; + + /** The request is preallocated */ + unsigned preallocated:1; + + /** The request was interrupted */ + unsigned interrupted:1; + + /** Request is sent in the background */ + unsigned background:1; + + /** Data is being copied to/from the request */ + unsigned locked:1; + + /** Request has been sent to userspace */ + unsigned sent:1; + + /** The request is finished */ + unsigned finished:1; + + /** The request input */ + struct fuse_in in; + + /** The request output */ + struct fuse_out out; + + /** Used to wake up the task waiting for completion of request*/ + wait_queue_head_t waitq; + + /** Data for asynchronous requests */ + union { + struct fuse_forget_in forget_in; + struct fuse_release_in release_in; + struct fuse_init_in_out init_in_out; + } misc; + + /** page vector */ + struct page *pages[FUSE_MAX_PAGES_PER_REQ]; + + /** number of pages in vector */ + unsigned num_pages; + + /** offset of data on first page */ + unsigned page_offset; + + /** Inode used in the request */ + struct inode *inode; + + /** Second inode used in the request (or NULL) */ + struct inode *inode2; + + /** File used in the request (or NULL) */ + struct file *file; +}; + +/** + * A Fuse connection. + * + * This structure is created, when the filesystem is mounted, and is + * destroyed, when the client device is closed and the filesystem is + * unmounted. + */ +struct fuse_conn { + /** Reference count */ + int count; + + /** The user id for this mount */ + uid_t user_id; + + /** The group id for this mount */ + gid_t group_id; + + /** The fuse mount flags for this mount */ + unsigned flags; + + /** Maximum read size */ + unsigned max_read; + + /** Maximum write size */ + unsigned max_write; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The list of pending requests */ + struct list_head pending; + + /** The list of requests being processed */ + struct list_head processing; + + /** Requests put in the background (RELEASE or any other + interrupted request) */ + struct list_head background; + + /** Controls the maximum number of outstanding requests */ + struct semaphore outstanding_sem; + + /** This counts the number of outstanding requests if + outstanding_sem would go negative */ + unsigned outstanding_debt; + + /** RW semaphore for exclusion with fuse_put_super() */ + struct rw_semaphore sbput_sem; + + /** The list of unused requests */ + struct list_head unused_list; + + /** The next unique request id */ + u64 reqctr; + + /** Mount is active */ + unsigned mounted : 1; + + /** Connection established */ + unsigned connected : 1; + + /** Connection failed (version mismatch) */ + unsigned conn_error : 1; + + /** Is fsync not implemented by fs? */ + unsigned no_fsync : 1; + + /** Is fsyncdir not implemented by fs? */ + unsigned no_fsyncdir : 1; + + /** Is flush not implemented by fs? */ + unsigned no_flush : 1; + + /** Is setxattr not implemented by fs? */ + unsigned no_setxattr : 1; + + /** Is getxattr not implemented by fs? */ + unsigned no_getxattr : 1; + + /** Is listxattr not implemented by fs? */ + unsigned no_listxattr : 1; + + /** Is removexattr not implemented by fs? */ + unsigned no_removexattr : 1; + + /** Backing dev info */ + struct backing_dev_info bdi; +}; + +static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb) +{ + return (struct fuse_conn **) &sb->s_fs_info; +} + +static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) +{ + return *get_fuse_conn_super_p(sb); +} + +static inline struct fuse_conn *get_fuse_conn(struct inode *inode) +{ + return get_fuse_conn_super(inode->i_sb); +} + +static inline struct fuse_inode *get_fuse_inode(struct inode *inode) +{ + return container_of(inode, struct fuse_inode, inode); +} + +static inline u64 get_node_id(struct inode *inode) +{ + return get_fuse_inode(inode)->nodeid; +} + +/** Device operations */ +extern struct file_operations fuse_dev_operations; + +/** + * This is the single global spinlock which protects FUSE's structures + * + * The following data is protected by this lock: + * + * - the private_data field of the device file + * - the s_fs_info field of the super block + * - unused_list, pending, processing lists in fuse_conn + * - background list in fuse_conn + * - the unique request ID counter reqctr in fuse_conn + * - the sb (super_block) field in fuse_conn + * - the file (device file) field in fuse_conn + */ +extern spinlock_t fuse_lock; + +/** + * Get a filled in inode + */ +struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, + int generation, struct fuse_attr *attr); + +/** + * Send FORGET command + */ +void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, + unsigned long nodeid, u64 nlookup); + +/** + * Send READ or READDIR request + */ +size_t fuse_send_read_common(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count, + int isdir); + +/** + * Send OPEN or OPENDIR request + */ +int fuse_open_common(struct inode *inode, struct file *file, int isdir); + +/** + * Send RELEASE or RELEASEDIR request + */ +int fuse_release_common(struct inode *inode, struct file *file, int isdir); + +/** + * Send FSYNC or FSYNCDIR request + */ +int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, + int isdir); + +/** + * Initialise file operations on a regular file + */ +void fuse_init_file_inode(struct inode *inode); + +/** + * Initialise inode operations on regular files and special files + */ +void fuse_init_common(struct inode *inode); + +/** + * Initialise inode and file operations on a directory + */ +void fuse_init_dir(struct inode *inode); + +/** + * Initialise inode operations on a symlink + */ +void fuse_init_symlink(struct inode *inode); + +/** + * Change attributes of an inode + */ +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); + +/** + * Check if the connection can be released, and if yes, then free the + * connection structure + */ +void fuse_release_conn(struct fuse_conn *fc); + +/** + * Initialize the client device + */ +int fuse_dev_init(void); + +/** + * Cleanup the client device + */ +void fuse_dev_cleanup(void); + +/** + * Allocate a request + */ +struct fuse_req *fuse_request_alloc(void); + +/** + * Free a request + */ +void fuse_request_free(struct fuse_req *req); + +/** + * Reinitialize a request, the preallocated flag is left unmodified + */ +void fuse_reset_request(struct fuse_req *req); + +/** + * Reserve a preallocated request + */ +struct fuse_req *fuse_get_request(struct fuse_conn *fc); + +/** + * Decrement reference count of a request. If count goes to zero put + * on unused list (preallocated) or free reqest (not preallocated). + */ +void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); + +/** + * Send a request (synchronous) + */ +void request_send(struct fuse_conn *fc, struct fuse_req *req); + +/** + * Send a request with no reply + */ +void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); + +/** + * Send a request in the background + */ +void request_send_background(struct fuse_conn *fc, struct fuse_req *req); + +/** + * Release inodes and file assiciated with background request + */ +void fuse_release_background(struct fuse_req *req); + +/** + * Get the attributes of a file + */ +int fuse_do_getattr(struct inode *inode); + +/** + * Invalidate inode attributes + */ +void fuse_invalidate_attr(struct inode *inode); + +/** + * Send the INIT message + */ +void fuse_send_init(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c new file mode 100644 index 00000000000..e69a546844d --- /dev/null +++ b/fs/fuse/inode.c @@ -0,0 +1,591 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/parser.h> +#include <linux/statfs.h> + +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); +MODULE_DESCRIPTION("Filesystem in Userspace"); +MODULE_LICENSE("GPL"); + +spinlock_t fuse_lock; +static kmem_cache_t *fuse_inode_cachep; + +#define FUSE_SUPER_MAGIC 0x65735546 + +struct fuse_mount_data { + int fd; + unsigned rootmode; + unsigned user_id; + unsigned group_id; + unsigned fd_present : 1; + unsigned rootmode_present : 1; + unsigned user_id_present : 1; + unsigned group_id_present : 1; + unsigned flags; + unsigned max_read; +}; + +static struct inode *fuse_alloc_inode(struct super_block *sb) +{ + struct inode *inode; + struct fuse_inode *fi; + + inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL); + if (!inode) + return NULL; + + fi = get_fuse_inode(inode); + fi->i_time = jiffies - 1; + fi->nodeid = 0; + fi->nlookup = 0; + fi->forget_req = fuse_request_alloc(); + if (!fi->forget_req) { + kmem_cache_free(fuse_inode_cachep, inode); + return NULL; + } + + return inode; +} + +static void fuse_destroy_inode(struct inode *inode) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + if (fi->forget_req) + fuse_request_free(fi->forget_req); + kmem_cache_free(fuse_inode_cachep, inode); +} + +static void fuse_read_inode(struct inode *inode) +{ + /* No op */ +} + +void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, + unsigned long nodeid, u64 nlookup) +{ + struct fuse_forget_in *inarg = &req->misc.forget_in; + inarg->nlookup = nlookup; + req->in.h.opcode = FUSE_FORGET; + req->in.h.nodeid = nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(struct fuse_forget_in); + req->in.args[0].value = inarg; + request_send_noreply(fc, req); +} + +static void fuse_clear_inode(struct inode *inode) +{ + if (inode->i_sb->s_flags & MS_ACTIVE) { + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup); + fi->forget_req = NULL; + } +} + +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) +{ + if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) + invalidate_inode_pages(inode->i_mapping); + + inode->i_ino = attr->ino; + inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777); + inode->i_nlink = attr->nlink; + inode->i_uid = attr->uid; + inode->i_gid = attr->gid; + i_size_write(inode, attr->size); + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = attr->blocks; + inode->i_atime.tv_sec = attr->atime; + inode->i_atime.tv_nsec = attr->atimensec; + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = attr->mtimensec; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; +} + +static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) +{ + inode->i_mode = attr->mode & S_IFMT; + i_size_write(inode, attr->size); + if (S_ISREG(inode->i_mode)) { + fuse_init_common(inode); + fuse_init_file_inode(inode); + } else if (S_ISDIR(inode->i_mode)) + fuse_init_dir(inode); + else if (S_ISLNK(inode->i_mode)) + fuse_init_symlink(inode); + else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + fuse_init_common(inode); + init_special_inode(inode, inode->i_mode, + new_decode_dev(attr->rdev)); + } else { + /* Don't let user create weird files */ + inode->i_mode = S_IFREG; + fuse_init_common(inode); + fuse_init_file_inode(inode); + } +} + +static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +{ + unsigned long nodeid = *(unsigned long *) _nodeidp; + if (get_node_id(inode) == nodeid) + return 1; + else + return 0; +} + +static int fuse_inode_set(struct inode *inode, void *_nodeidp) +{ + unsigned long nodeid = *(unsigned long *) _nodeidp; + get_fuse_inode(inode)->nodeid = nodeid; + return 0; +} + +struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, + int generation, struct fuse_attr *attr) +{ + struct inode *inode; + struct fuse_inode *fi; + struct fuse_conn *fc = get_fuse_conn_super(sb); + int retried = 0; + + retry: + inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); + if (!inode) + return NULL; + + if ((inode->i_state & I_NEW)) { + inode->i_flags |= S_NOATIME|S_NOCMTIME; + inode->i_generation = generation; + inode->i_data.backing_dev_info = &fc->bdi; + fuse_init_inode(inode, attr); + unlock_new_inode(inode); + } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { + BUG_ON(retried); + /* Inode has changed type, any I/O on the old should fail */ + make_bad_inode(inode); + iput(inode); + retried = 1; + goto retry; + } + + fi = get_fuse_inode(inode); + fi->nlookup ++; + fuse_change_attributes(inode, attr); + return inode; +} + +static void fuse_put_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + down_write(&fc->sbput_sem); + while (!list_empty(&fc->background)) + fuse_release_background(list_entry(fc->background.next, + struct fuse_req, bg_entry)); + + spin_lock(&fuse_lock); + fc->mounted = 0; + fc->user_id = 0; + fc->group_id = 0; + fc->flags = 0; + /* Flush all readers on this fs */ + wake_up_all(&fc->waitq); + up_write(&fc->sbput_sem); + fuse_release_conn(fc); + spin_unlock(&fuse_lock); +} + +static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) +{ + stbuf->f_type = FUSE_SUPER_MAGIC; + stbuf->f_bsize = attr->bsize; + stbuf->f_blocks = attr->blocks; + stbuf->f_bfree = attr->bfree; + stbuf->f_bavail = attr->bavail; + stbuf->f_files = attr->files; + stbuf->f_ffree = attr->ffree; + stbuf->f_namelen = attr->namelen; + /* fsid is left zero */ +} + +static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct fuse_req *req; + struct fuse_statfs_out outarg; + int err; + + req = fuse_get_request(fc); + if (!req) + return -EINTR; + + req->in.numargs = 0; + req->in.h.opcode = FUSE_STATFS; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + if (!err) + convert_fuse_statfs(buf, &outarg.st); + fuse_put_request(fc, req); + return err; +} + +enum { + OPT_FD, + OPT_ROOTMODE, + OPT_USER_ID, + OPT_GROUP_ID, + OPT_DEFAULT_PERMISSIONS, + OPT_ALLOW_OTHER, + OPT_MAX_READ, + OPT_ERR +}; + +static match_table_t tokens = { + {OPT_FD, "fd=%u"}, + {OPT_ROOTMODE, "rootmode=%o"}, + {OPT_USER_ID, "user_id=%u"}, + {OPT_GROUP_ID, "group_id=%u"}, + {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, + {OPT_ALLOW_OTHER, "allow_other"}, + {OPT_MAX_READ, "max_read=%u"}, + {OPT_ERR, NULL} +}; + +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d) +{ + char *p; + memset(d, 0, sizeof(struct fuse_mount_data)); + d->max_read = ~0; + + while ((p = strsep(&opt, ",")) != NULL) { + int token; + int value; + substring_t args[MAX_OPT_ARGS]; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case OPT_FD: + if (match_int(&args[0], &value)) + return 0; + d->fd = value; + d->fd_present = 1; + break; + + case OPT_ROOTMODE: + if (match_octal(&args[0], &value)) + return 0; + d->rootmode = value; + d->rootmode_present = 1; + break; + + case OPT_USER_ID: + if (match_int(&args[0], &value)) + return 0; + d->user_id = value; + d->user_id_present = 1; + break; + + case OPT_GROUP_ID: + if (match_int(&args[0], &value)) + return 0; + d->group_id = value; + d->group_id_present = 1; + break; + + case OPT_DEFAULT_PERMISSIONS: + d->flags |= FUSE_DEFAULT_PERMISSIONS; + break; + + case OPT_ALLOW_OTHER: + d->flags |= FUSE_ALLOW_OTHER; + break; + + case OPT_MAX_READ: + if (match_int(&args[0], &value)) + return 0; + d->max_read = value; + break; + + default: + return 0; + } + } + + if (!d->fd_present || !d->rootmode_present || + !d->user_id_present || !d->group_id_present) + return 0; + + return 1; +} + +static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb); + + seq_printf(m, ",user_id=%u", fc->user_id); + seq_printf(m, ",group_id=%u", fc->group_id); + if (fc->flags & FUSE_DEFAULT_PERMISSIONS) + seq_puts(m, ",default_permissions"); + if (fc->flags & FUSE_ALLOW_OTHER) + seq_puts(m, ",allow_other"); + if (fc->max_read != ~0) + seq_printf(m, ",max_read=%u", fc->max_read); + return 0; +} + +static void free_conn(struct fuse_conn *fc) +{ + while (!list_empty(&fc->unused_list)) { + struct fuse_req *req; + req = list_entry(fc->unused_list.next, struct fuse_req, list); + list_del(&req->list); + fuse_request_free(req); + } + kfree(fc); +} + +/* Must be called with the fuse lock held */ +void fuse_release_conn(struct fuse_conn *fc) +{ + fc->count--; + if (!fc->count) + free_conn(fc); +} + +static struct fuse_conn *new_conn(void) +{ + struct fuse_conn *fc; + + fc = kmalloc(sizeof(*fc), GFP_KERNEL); + if (fc != NULL) { + int i; + memset(fc, 0, sizeof(*fc)); + init_waitqueue_head(&fc->waitq); + INIT_LIST_HEAD(&fc->pending); + INIT_LIST_HEAD(&fc->processing); + INIT_LIST_HEAD(&fc->unused_list); + INIT_LIST_HEAD(&fc->background); + sema_init(&fc->outstanding_sem, 0); + init_rwsem(&fc->sbput_sem); + for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) { + struct fuse_req *req = fuse_request_alloc(); + if (!req) { + free_conn(fc); + return NULL; + } + list_add(&req->list, &fc->unused_list); + } + fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + fc->bdi.unplug_io_fn = default_unplug_io_fn; + fc->reqctr = 0; + } + return fc; +} + +static struct fuse_conn *get_conn(struct file *file, struct super_block *sb) +{ + struct fuse_conn *fc; + + if (file->f_op != &fuse_dev_operations) + return ERR_PTR(-EINVAL); + fc = new_conn(); + if (fc == NULL) + return ERR_PTR(-ENOMEM); + spin_lock(&fuse_lock); + if (file->private_data) { + free_conn(fc); + fc = ERR_PTR(-EINVAL); + } else { + file->private_data = fc; + *get_fuse_conn_super_p(sb) = fc; + fc->mounted = 1; + fc->connected = 1; + fc->count = 2; + } + spin_unlock(&fuse_lock); + return fc; +} + +static struct inode *get_root_inode(struct super_block *sb, unsigned mode) +{ + struct fuse_attr attr; + memset(&attr, 0, sizeof(attr)); + + attr.mode = mode; + attr.ino = FUSE_ROOT_ID; + return fuse_iget(sb, 1, 0, &attr); +} + +static struct super_operations fuse_super_operations = { + .alloc_inode = fuse_alloc_inode, + .destroy_inode = fuse_destroy_inode, + .read_inode = fuse_read_inode, + .clear_inode = fuse_clear_inode, + .put_super = fuse_put_super, + .statfs = fuse_statfs, + .show_options = fuse_show_options, +}; + +static int fuse_fill_super(struct super_block *sb, void *data, int silent) +{ + struct fuse_conn *fc; + struct inode *root; + struct fuse_mount_data d; + struct file *file; + int err; + + if (!parse_fuse_opt((char *) data, &d)) + return -EINVAL; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = FUSE_SUPER_MAGIC; + sb->s_op = &fuse_super_operations; + sb->s_maxbytes = MAX_LFS_FILESIZE; + + file = fget(d.fd); + if (!file) + return -EINVAL; + + fc = get_conn(file, sb); + fput(file); + if (IS_ERR(fc)) + return PTR_ERR(fc); + + fc->flags = d.flags; + fc->user_id = d.user_id; + fc->group_id = d.group_id; + fc->max_read = d.max_read; + if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) + fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; + fc->max_write = FUSE_MAX_IN / 2; + + err = -ENOMEM; + root = get_root_inode(sb, d.rootmode); + if (root == NULL) + goto err; + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + goto err; + } + fuse_send_init(fc); + return 0; + + err: + spin_lock(&fuse_lock); + fuse_release_conn(fc); + spin_unlock(&fuse_lock); + return err; +} + +static struct super_block *fuse_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data) +{ + return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); +} + +static struct file_system_type fuse_fs_type = { + .owner = THIS_MODULE, + .name = "fuse", + .get_sb = fuse_get_sb, + .kill_sb = kill_anon_super, +}; + +static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, + unsigned long flags) +{ + struct inode * inode = foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(inode); +} + +static int __init fuse_fs_init(void) +{ + int err; + + err = register_filesystem(&fuse_fs_type); + if (err) + printk("fuse: failed to register filesystem\n"); + else { + fuse_inode_cachep = kmem_cache_create("fuse_inode", + sizeof(struct fuse_inode), + 0, SLAB_HWCACHE_ALIGN, + fuse_inode_init_once, NULL); + if (!fuse_inode_cachep) { + unregister_filesystem(&fuse_fs_type); + err = -ENOMEM; + } + } + + return err; +} + +static void fuse_fs_cleanup(void) +{ + unregister_filesystem(&fuse_fs_type); + kmem_cache_destroy(fuse_inode_cachep); +} + +static int __init fuse_init(void) +{ + int res; + + printk("fuse init (API version %i.%i)\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + + spin_lock_init(&fuse_lock); + res = fuse_fs_init(); + if (res) + goto err; + + res = fuse_dev_init(); + if (res) + goto err_fs_cleanup; + + return 0; + + err_fs_cleanup: + fuse_fs_cleanup(); + err: + return res; +} + +static void __exit fuse_exit(void) +{ + printk(KERN_DEBUG "fuse exit\n"); + + fuse_fs_cleanup(); + fuse_dev_cleanup(); +} + +module_init(fuse_init); +module_exit(fuse_exit); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index b2d18200a00..59c5062cd63 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) static void hostfs_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); if(HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); HOSTFS_I(inode)->fd = -1; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 38b1741fa53..e3d17e9ea6c 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode) void hpfs_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); lock_kernel(); hpfs_remove_fnode(inode->i_sb, inode->i_ino); unlock_kernel(); diff --git a/fs/inode.c b/fs/inode.c index 71df1b1e8f7..f80a79ff156 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode) inodes_stat.nr_inodes--; spin_unlock(&inode_lock); - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - security_inode_delete(inode); if (op->delete_inode) { void (*delete)(struct inode *) = op->delete_inode; if (!is_bad_inode(inode)) DQUOT_INIT(inode); - /* s_op->delete_inode internally recalls clear_inode() */ + /* Filesystems implementing their own + * s_op->delete_inode are required to call + * truncate_inode_pages and clear_inode() + * internally */ delete(inode); - } else + } else { + truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); + } spin_lock(&inode_lock); hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index c6ec66fd876..49bbc2be3d7 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1340,8 +1340,7 @@ int journal_stop(handle_t *handle) if (handle->h_sync) { do { old_handle_count = transaction->t_handle_count; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); } while (old_handle_count != transaction->t_handle_count); } diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 777b90057b8..3dcc6d2162c 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1744,6 +1744,7 @@ jffs_delete_inode(struct inode *inode) D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n", inode->i_ino)); + truncate_inode_pages(&inode->i_data, 0); lock_kernel(); inode->i_size = 0; inode->i_blocks = 0; diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 456d7e6e29c..27f199e94cf 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino) { struct jffs_file *f; int i = ino % c->hash_len; - struct list_head *tmp; D3(printk("jffs_find_file(): ino: %u\n", ino)); - for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) { - f = list_entry(tmp, struct jffs_file, hash); + list_for_each_entry(f, &c->hash[i], hash) { if (ino != f->ino) continue; D3(printk("jffs_find_file(): Found file with ino " @@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *)) int result = 0; for (pos = 0; pos < c->hash_len; pos++) { - struct list_head *p, *next; - for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) { - /* We need a reference to the next file in the - list because `func' might remove the current - file `f'. */ - next = p->next; - r = func(list_entry(p, struct jffs_file, hash)); + struct jffs_file *f, *next; + + /* We must do _safe, because 'func' might remove the + current file 'f' from the list. */ + list_for_each_entry_safe(f, next, &c->hash[pos], hash) { + r = func(f); if (r < 0) return r; result += r; @@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c) printk("JFFS: Dumping the file system's hash table...\n"); for (i = 0; i < c->hash_len; i++) { - struct list_head *p; - for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) { - struct jffs_file *f=list_entry(p,struct jffs_file,hash); + struct jffs_file *f; + list_for_each_entry(f, &c->hash[i], hash) { printk("*** c->hash[%u]: \"%s\" " "(ino: %u, pino: %u)\n", i, (f->name ? f->name : ""), diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index e892dab40c2..461e4934ca7 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -23,6 +23,7 @@ #include <linux/quotaops.h> #include <linux/posix_acl_xattr.h> #include "jfs_incore.h" +#include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" @@ -75,7 +76,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) return acl; } -static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +static int jfs_set_acl(tid_t tid, struct inode *inode, int type, + struct posix_acl *acl) { char *ea_name; struct jfs_inode_info *ji = JFS_IP(inode); @@ -110,7 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (rc < 0) goto out; } - rc = __jfs_setxattr(inode, ea_name, value, size, 0); + rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0); out: kfree(value); @@ -143,7 +145,7 @@ int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) return generic_permission(inode, mask, jfs_check_acl); } -int jfs_init_acl(struct inode *inode, struct inode *dir) +int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; struct posix_acl *clone; @@ -159,7 +161,7 @@ int jfs_init_acl(struct inode *inode, struct inode *dir) if (acl) { if (S_ISDIR(inode->i_mode)) { - rc = jfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); if (rc) goto cleanup; } @@ -173,7 +175,8 @@ int jfs_init_acl(struct inode *inode, struct inode *dir) if (rc >= 0) { inode->i_mode = mode; if (rc > 0) - rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, + clone); } posix_acl_release(clone); cleanup: @@ -202,8 +205,15 @@ static int jfs_acl_chmod(struct inode *inode) return -ENOMEM; rc = posix_acl_chmod_masq(clone, inode->i_mode); - if (!rc) - rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + if (!rc) { + tid_t tid = txBegin(inode->i_sb, 0); + down(&JFS_IP(inode)->commit_sem); + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + up(&JFS_IP(inode)->commit_sem); + } posix_acl_release(clone); return rc; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 767c7ecb429..0ec62d5310d 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -128,21 +128,23 @@ void jfs_delete_inode(struct inode *inode) { jfs_info("In jfs_delete_inode, inode = 0x%p", inode); - if (is_bad_inode(inode) || - (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I))) - return; + if (!is_bad_inode(inode) && + (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { - if (test_cflag(COMMIT_Freewmap, inode)) - jfs_free_zero_link(inode); + truncate_inode_pages(&inode->i_data, 0); - diFree(inode); + if (test_cflag(COMMIT_Freewmap, inode)) + jfs_free_zero_link(inode); - /* - * Free the inode from the quota allocation. - */ - DQUOT_INIT(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); + diFree(inode); + + /* + * Free the inode from the quota allocation. + */ + DQUOT_INIT(inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + } clear_inode(inode); } diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index a3acd3eec05..a76293767c7 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -21,8 +21,16 @@ #ifdef CONFIG_JFS_POSIX_ACL int jfs_permission(struct inode *, int, struct nameidata *); -int jfs_init_acl(struct inode *, struct inode *); +int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); -#endif /* CONFIG_JFS_POSIX_ACL */ +#else + +static inline int jfs_init_acl(tid_t tid, struct inode *inode, + struct inode *dir) +{ + return 0; +} + +#endif #endif /* _H_JFS_ACL */ diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index a1052f3f0be..25e9990bccd 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h @@ -52,8 +52,8 @@ struct jfs_ea_list { #define END_EALIST(ealist) \ ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) -extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, - int); +extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *, + size_t, int); extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, int); extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); @@ -61,4 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); extern int jfs_removexattr(struct dentry *, const char *); +#ifdef CONFIG_JFS_SECURITY +extern int jfs_init_security(tid_t, struct inode *, struct inode *); +#else +static inline int jfs_init_security(tid_t tid, struct inode *inode, + struct inode *dir) +{ + return 0; +} +#endif + #endif /* H_JFS_XATTR */ diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 49ccde3937f..1abe7343f92 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -39,6 +39,24 @@ struct dentry_operations jfs_ci_dentry_operations; static s64 commitZeroLink(tid_t, struct inode *); /* + * NAME: free_ea_wmap(inode) + * + * FUNCTION: free uncommitted extended attributes from working map + * + */ +static inline void free_ea_wmap(struct inode *inode) +{ + dxd_t *ea = &JFS_IP(inode)->ea; + + if (ea->flag & DXD_EXTENT) { + /* free EA pages from cache */ + invalidate_dxd_metapages(inode, *ea); + dbFree(inode, addressDXD(ea), lengthDXD(ea)); + } + ea->flag = 0; +} + +/* * NAME: jfs_create(dip, dentry, mode) * * FUNCTION: create a regular file in the parent directory <dip> @@ -89,8 +107,19 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, down(&JFS_IP(dip)->commit_sem); down(&JFS_IP(ip)->commit_sem); + rc = jfs_init_acl(tid, ip, dip); + if (rc) + goto out3; + + rc = jfs_init_security(tid, ip, dip); + if (rc) { + txAbort(tid, 0); + goto out3; + } + if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { jfs_err("jfs_create: dtSearch returned %d", rc); + txAbort(tid, 0); goto out3; } @@ -139,6 +168,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, up(&JFS_IP(dip)->commit_sem); up(&JFS_IP(ip)->commit_sem); if (rc) { + free_ea_wmap(ip); ip->i_nlink = 0; iput(ip); } else @@ -147,11 +177,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, out2: free_UCSname(&dname); -#ifdef CONFIG_JFS_POSIX_ACL - if (rc == 0) - jfs_init_acl(ip, dip); -#endif - out1: jfs_info("jfs_create: rc:%d", rc); @@ -216,8 +241,19 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) down(&JFS_IP(dip)->commit_sem); down(&JFS_IP(ip)->commit_sem); + rc = jfs_init_acl(tid, ip, dip); + if (rc) + goto out3; + + rc = jfs_init_security(tid, ip, dip); + if (rc) { + txAbort(tid, 0); + goto out3; + } + if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) { jfs_err("jfs_mkdir: dtSearch returned %d", rc); + txAbort(tid, 0); goto out3; } @@ -267,6 +303,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) up(&JFS_IP(dip)->commit_sem); up(&JFS_IP(ip)->commit_sem); if (rc) { + free_ea_wmap(ip); ip->i_nlink = 0; iput(ip); } else @@ -275,10 +312,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) out2: free_UCSname(&dname); -#ifdef CONFIG_JFS_POSIX_ACL - if (rc == 0) - jfs_init_acl(ip, dip); -#endif out1: @@ -885,6 +918,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, down(&JFS_IP(dip)->commit_sem); down(&JFS_IP(ip)->commit_sem); + rc = jfs_init_security(tid, ip, dip); + if (rc) + goto out3; + tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_CREATE; tblk->ino = ip->i_ino; @@ -1000,6 +1037,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, up(&JFS_IP(dip)->commit_sem); up(&JFS_IP(ip)->commit_sem); if (rc) { + free_ea_wmap(ip); ip->i_nlink = 0; iput(ip); } else @@ -1008,11 +1046,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, out2: free_UCSname(&dname); -#ifdef CONFIG_JFS_POSIX_ACL - if (rc == 0) - jfs_init_acl(ip, dip); -#endif - out1: jfs_info("jfs_symlink: rc:%d", rc); return rc; @@ -1328,8 +1361,20 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, down(&JFS_IP(dir)->commit_sem); down(&JFS_IP(ip)->commit_sem); - if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) + rc = jfs_init_acl(tid, ip, dir); + if (rc) + goto out3; + + rc = jfs_init_security(tid, ip, dir); + if (rc) { + txAbort(tid, 0); goto out3; + } + + if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) { + txAbort(tid, 0); + goto out3; + } tblk = tid_to_tblock(tid); tblk->xflag |= COMMIT_CREATE; @@ -1337,8 +1382,10 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, tblk->u.ixpxd = JFS_IP(ip)->ixpxd; ino = ip->i_ino; - if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) + if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) { + txAbort(tid, 0); goto out3; + } ip->i_op = &jfs_file_inode_operations; jfs_ip->dev = new_encode_dev(rdev); @@ -1360,6 +1407,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, up(&JFS_IP(ip)->commit_sem); up(&JFS_IP(dir)->commit_sem); if (rc) { + free_ea_wmap(ip); ip->i_nlink = 0; iput(ip); } else @@ -1368,11 +1416,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, out1: free_UCSname(&dname); -#ifdef CONFIG_JFS_POSIX_ACL - if (rc == 0) - jfs_init_acl(ip, dir); -#endif - out: jfs_info("jfs_mknod: returning %d", rc); return rc; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 554ec739e49..23aa5066b5a 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -21,6 +21,7 @@ #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> #include <linux/quotaops.h> +#include <linux/security.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -633,12 +634,12 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf) } } -static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size) +static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, + int new_size) { struct jfs_inode_info *ji = JFS_IP(inode); unsigned long old_blocks, new_blocks; int rc = 0; - tid_t tid; if (new_size == 0) { ea_release(inode, ea_buf); @@ -664,9 +665,6 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size) if (rc) return rc; - tid = txBegin(inode->i_sb, 0); - down(&ji->commit_sem); - old_blocks = new_blocks = 0; if (ji->ea.flag & DXD_EXTENT) { @@ -695,11 +693,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size) DQUOT_FREE_BLOCK(inode, old_blocks); inode->i_ctime = CURRENT_TIME; - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - up(&ji->commit_sem); - return rc; + return 0; } /* @@ -810,8 +805,8 @@ static int can_set_xattr(struct inode *inode, const char *name, return permission(inode, MAY_WRITE, NULL); } -int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) +int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, + const void *value, size_t value_len, int flags) { struct jfs_ea_list *ealist; struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL; @@ -825,9 +820,6 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value, int rc; int length; - if ((rc = can_set_xattr(inode, name, value, value_len))) - return rc; - if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, GFP_KERNEL); @@ -939,7 +931,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value, ealist->size = cpu_to_le32(new_size); - rc = ea_put(inode, &ea_buf, new_size); + rc = ea_put(tid, inode, &ea_buf, new_size); goto out; release: @@ -955,12 +947,29 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value, int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t value_len, int flags) { + struct inode *inode = dentry->d_inode; + struct jfs_inode_info *ji = JFS_IP(inode); + int rc; + tid_t tid; + + if ((rc = can_set_xattr(inode, name, value, value_len))) + return rc; + if (value == NULL) { /* empty EA, do not remove */ value = ""; value_len = 0; } - return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags); + tid = txBegin(inode->i_sb, 0); + down(&ji->commit_sem); + rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len, + flags); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + up(&ji->commit_sem); + + return rc; } static int can_get_xattr(struct inode *inode, const char *name) @@ -1122,5 +1131,56 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) int jfs_removexattr(struct dentry *dentry, const char *name) { - return __jfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); + struct inode *inode = dentry->d_inode; + struct jfs_inode_info *ji = JFS_IP(inode); + int rc; + tid_t tid; + + if ((rc = can_set_xattr(inode, name, NULL, 0))) + return rc; + + tid = txBegin(inode->i_sb, 0); + down(&ji->commit_sem); + rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + up(&ji->commit_sem); + + return rc; +} + +#ifdef CONFIG_JFS_SECURITY +int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir) +{ + int rc; + size_t len; + void *value; + char *suffix; + char *name; + + rc = security_inode_init_security(inode, dir, &suffix, &value, &len); + if (rc) { + if (rc == -EOPNOTSUPP) + return 0; + return rc; + } + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix), + GFP_NOFS); + if (!name) { + rc = -ENOMEM; + goto kmalloc_failed; + } + strcpy(name, XATTR_SECURITY_PREFIX); + strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); + + rc = __jfs_setxattr(tid, inode, name, value, len, 0); + + kfree(name); +kmalloc_failed: + kfree(suffix); + kfree(value); + + return rc; } +#endif diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 14b3ce87fa2..87332f30141 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -299,8 +299,7 @@ nlmclnt_alloc_call(void) return call; } printk("nlmclnt_alloc_call: failed, waiting for memory\n"); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(5*HZ); + schedule_timeout_interruptible(5*HZ); } return NULL; } diff --git a/fs/locks.c b/fs/locks.c index 11956b6179f..c2c09b4798d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from) { struct files_struct *files = current->files; int i, j; + struct fdtable *fdt; if (from == files) return; lock_kernel(); j = 0; + fdt = files_fdtable(files); for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= files->max_fdset || i >= files->max_fds) + if (i >= fdt->max_fdset || i >= fdt->max_fds) break; - set = files->open_fds->fds_bits[j++]; + set = fdt->open_fds->fds_bits[j++]; while (set) { if (set & 1) { - struct file *file = files->fd[i]; + struct file *file = fdt->fd[i]; if (file) __steal_locks(file, from); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 3f18c21198d..790cc0d0e97 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data); static void minix_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; minix_truncate(inode); minix_free_inode(inode); diff --git a/fs/namei.c b/fs/namei.c index 145e852c4bd..21d85f1ac83 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; DQUOT_INIT(dir); error = dir->i_op->create(dir, dentry, mode, nd); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_create(dir, dentry, mode); - } return error; } @@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) DQUOT_INIT(dir); error = dir->i_op->mknod(dir, dentry, mode, dev); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_mknod(dir, dentry, mode, dev); - } return error; } @@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) DQUOT_INIT(dir); error = dir->i_op->mkdir(dir, dentry, mode); - if (!error) { + if (!error) fsnotify_mkdir(dir, dentry->d_name.name); - security_inode_post_mkdir(dir,dentry, mode); - } return error; } @@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i DQUOT_INIT(dir); error = dir->i_op->symlink(dir, dentry, oldname); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_symlink(dir, dentry, oldname); - } return error; } @@ -2020,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de DQUOT_INIT(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); up(&old_dentry->d_inode->i_sem); - if (!error) { + if (!error) fsnotify_create(dir, new_dentry->d_name.name); - security_inode_post_link(old_dentry, dir, new_dentry); - } return error; } @@ -2142,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, d_rehash(new_dentry); dput(new_dentry); } - if (!error) { + if (!error) d_move(old_dentry,new_dentry); - security_inode_post_rename(old_dir, old_dentry, - new_dir, new_dentry); - } return error; } @@ -2172,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, /* The following d_move() should become unconditional */ if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) d_move(old_dentry, new_dentry); - security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); } if (target) up(&target->i_sem); diff --git a/fs/namespace.c b/fs/namespace.c index 34156260c9b..2fa9fdf7d6f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) { struct vfsmount *res, *p, *q, *r, *s; - struct list_head *h; struct nameidata nd; res = q = clone_mnt(mnt, dentry); @@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; - for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) { - r = list_entry(h, struct vfsmount, mnt_child); + list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) continue; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 44795d2f4b3..8c8839203cd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) static void ncp_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); + if (S_ISDIR(inode->i_mode)) { DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 541b418327c..6922469d6fc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + truncate_inode_pages(&inode->i_data, 0); + nfs_wb_all(inode); /* * The following should never happen... diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2681485cf2d..edc95514046 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(NFS_JUKEBOX_RETRY_TIME); + schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!signalled()); rpc_clnt_sigunmask(clnt, &oldset); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0c5a308e496..9701ca8c942 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MAX; rpc_clnt_sigmask(clnt, &oldset); if (clnt->cl_intr) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(*timeout); + schedule_timeout_interruptible(*timeout); if (signalled()) res = -ERESTARTSYS; - } else { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(*timeout); - } + } else + schedule_timeout_uninterruptible(*timeout); rpc_clnt_sigunmask(clnt, &oldset); *timeout <<= 1; return res; @@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(timeout); + schedule_timeout_interruptible(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 9eecc9939df..49eafbdb15c 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -22,6 +22,77 @@ ToDo/Notes: - Enable the code for setting the NT4 compatibility flag when we start making NTFS 1.2 specific modifications. +2.1.24 - Lots of bug fixes and support more clean journal states. + + - Support journals ($LogFile) which have been modified by chkdsk. This + means users can boot into Windows after we marked the volume dirty. + The Windows boot will run chkdsk and then reboot. The user can then + immediately boot into Linux rather than having to do a full Windows + boot first before rebooting into Linux and we will recognize such a + journal and empty it as it is clean by definition. + - Support journals ($LogFile) with only one restart page as well as + journals with two different restart pages. We sanity check both and + either use the only sane one or the more recent one of the two in the + case that both are valid. + - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to + ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and + hence cannot fail. + - Use ntfs_malloc_nofs_nofail() in the two critical regions in + fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer + need to panic() if the allocation fails as it now cannot fail. + - Fix two nasty runlist merging bugs that had gone unnoticed so far. + Thanks to Stefano Picerno for the bug report. + - Remove two bogus BUG_ON()s from fs/ntfs/mft.c. + - Fix handling of valid but empty mapping pairs array in + fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress(). + - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING + messages and include the inode number. Thanks to Yura Pakhuchiy for + pointing this out. + - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new + length is zero. + - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller + specified hole into a runlist. + - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned + index entry is in the index root, we forgot to set the @ir pointer in + the index context. Thanks to Yura Pakhuchiy for finding this bug. + - Remove bogus setting of PageError in ntfs_read_compressed_block(). + - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize(). + - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect + access to the allocated size in the ntfs inode with the size lock. + - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to + return LCN_ENOENT when there is no runlist and the allocated size is + zero. + - Fix load_attribute_list() to handle the case of a NULL runlist. + - Fix handling of sparse attributes in ntfs_attr_make_non_resident(). + - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set() + to ensure that these functions are never called for compressed or + encrypted attributes. + - Fix cluster (de)allocators to work when the runlist is NULL and more + importantly to take a locked runlist rather than them locking it + which leads to lock reversal. + - Truncate {a,c,m}time to the ntfs supported time granularity when + updating the times in the inode in ntfs_setattr(). + - Fixup handling of sparse, compressed, and encrypted attributes in + fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(), + fs/ntfs/aops.c::ntfs_{read,write}page(). + - Make ntfs_write_block() not instantiate sparse blocks if they contain + only zeroes. + - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page + lock protection over the buffer submission for i/o which allows the + removal of the get_bh()/put_bh() pairs for each buffer. + - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case + where a concurrent truncate has truncated the runlist under our feet. + - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c. + - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock + in the first buffer head instead of a driver global spin lock to + improve scalability. + - Minor fix to error handling and error message display in + fs/ntfs/aops.c::ntfs_prepare_nonresident_write(). + - Change the mount options {u,f,d}mask to always parse the number as + an octal number to conform to how chmod(1) works, too. Thanks to + Giuseppe Bilotta and Horst von Brand for pointing out the errors of + my ways. + 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index f083f27d8b6..894b2b876d3 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 78adad7a988..b6cc8cf2462 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -27,6 +27,7 @@ #include <linux/swap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/bit_spinlock.h> #include "aops.h" #include "attrib.h" @@ -55,9 +56,8 @@ */ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; - struct buffer_head *tmp; + struct buffer_head *first, *tmp; struct page *page; ntfs_inode *ni; int page_uptodate = 1; @@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } } else { clear_buffer_uptodate(bh); + SetPageError(page); ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", (unsigned long long)bh->b_blocknr); - SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unlock_page(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page) blocksize_bits = VFS_I(ni)->i_blkbits; blocksize = 1 << blocksize_bits; - if (!page_has_buffers(page)) + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, 0); - bh = head = page_buffers(page); - if (unlikely(!bh)) { - unlock_page(page); - return -ENOMEM; + if (unlikely(!page_has_buffers(page))) { + unlock_page(page); + return -ENOMEM; + } } + bh = head = page_buffers(page); + BUG_ON(!bh); iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); read_lock_irqsave(&ni->size_lock, flags); @@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page) nr = i = 0; do { u8 *kaddr; + int err; if (unlikely(buffer_uptodate(bh))) continue; @@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page) arr[nr++] = bh; continue; } + err = 0; bh->b_bdev = vol->sb->s_bdev; /* Is the block within the allowed limits? */ if (iblock < lblock) { @@ -252,7 +260,6 @@ lock_retry_remap: goto handle_hole; /* If first try and runlist unmapped, map and retry. */ if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { - int err; is_retry = TRUE; /* * Attempt to map runlist, dropping lock for @@ -263,20 +270,30 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, treat it as a + * hole. This can happen due to concurrent truncate + * for example. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + err = 0; + goto handle_hole; + } /* Hard error, zero out region. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; SetPageError(page); ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, " "offset 0x%x because its location on " "disk could not be determined%s " - "(error code %lli).", ni->mft_no, + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); + "retrying" : "", err); } /* * Either iblock was outside lblock limits or @@ -289,9 +306,10 @@ handle_hole: handle_zblock: kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); + flush_dcache_page(page); + if (likely(!err)) + set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); /* Release the lock if we took it. */ @@ -367,31 +385,38 @@ retry_readpage: return 0; } ni = NTFS_I(page->mapping->host); - + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If attribute is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + BUG_ON(ni->type != AT_DATA); + err = -EACCES; + goto err_out; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + return ntfs_read_compressed_block(page); + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed or - * encrypted. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - err = -EACCES; - goto err_out; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) - return ntfs_read_compressed_block(page); - } - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_read_block(page); } /* * Attribute is resident, implying it is not compressed or encrypted. * This also means the attribute is smaller than an mft record and * hence smaller than a page, so can simply zero out any pages with - * index above 0. + * index above 0. Note the attribute can actually be marked compressed + * but if it is resident the actual data is not compressed so we are + * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { kaddr = kmap_atomic(page, KM_USER0); @@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) BUG_ON(!PageUptodate(page)); create_empty_buffers(page, blocksize, (1 << BH_Uptodate) | (1 << BH_Dirty)); + if (unlikely(!page_has_buffers(page))) { + ntfs_warning(vol->sb, "Error allocating page " + "buffers. Redirtying page so we try " + "again later."); + /* + * Put the page back on mapping->dirty_pages, but leave + * its buffers' dirty state as-is. + */ + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } } bh = head = page_buffers(page); - if (unlikely(!bh)) { - ntfs_warning(vol->sb, "Error allocating page buffers. " - "Redirtying page so we try again later."); - /* - * Put the page back on mapping->dirty_pages, but leave its - * buffer's dirty state as-is. - */ - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } + BUG_ON(!bh); /* NOTE: Different naming scheme to ntfs_read_block()! */ @@ -670,6 +697,27 @@ lock_retry_remap: } /* It is a hole, need to instantiate it. */ if (lcn == LCN_HOLE) { + u8 *kaddr; + unsigned long *bpos, *bend; + + /* Check if the buffer is zero. */ + kaddr = kmap_atomic(page, KM_USER0); + bpos = (unsigned long *)(kaddr + bh_offset(bh)); + bend = (unsigned long *)((u8*)bpos + blocksize); + do { + if (unlikely(*bpos)) + break; + } while (likely(++bpos < bend)); + kunmap_atomic(kaddr, KM_USER0); + if (bpos == bend) { + /* + * Buffer is zero and sparse, no need to write + * it. + */ + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + continue; + } // TODO: Instantiate the hole. // clear_buffer_new(bh); // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); @@ -690,20 +738,37 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, truncate has cut it out + * of the runlist. Just clean and clear the buffer and set it + * uptodate so it can get discarded by the VM. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + u8 *kaddr; + + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + bh_offset(bh), 0, blocksize); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + set_buffer_uptodate(bh); + err = 0; + continue; + } /* Failed to map the buffer, even after retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, offset 0x%x " "because its location on disk could not be " - "determined%s (error code %lli).", ni->mft_no, + "determined%s (error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); - if (!err) - err = -EIO; + "retrying" : "", err); break; } while (block++, (bh = bh->b_this_page) != head); @@ -714,7 +779,7 @@ lock_retry_remap: /* For the error case, need to reset bh to the beginning. */ bh = head; - /* Just an optimization, so ->readpage() isn't called later. */ + /* Just an optimization, so ->readpage() is not called later. */ if (unlikely(!PageUptodate(page))) { int uptodate = 1; do { @@ -730,7 +795,6 @@ lock_retry_remap: /* Setup all mapped, dirty buffers for async write i/o. */ do { - get_bh(bh); if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { @@ -768,14 +832,8 @@ lock_retry_remap: BUG_ON(PageWriteback(page)); set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ - unlock_page(page); - /* - * Submit the prepared buffers for i/o. Note the page is unlocked, - * and the async write i/o completion handler can end_page_writeback() - * at any time after the *first* submit_bh(). So the buffers can then - * disappear... - */ + /* Submit the prepared buffers for i/o. */ need_end_writeback = TRUE; do { struct buffer_head *next = bh->b_this_page; @@ -783,9 +841,9 @@ lock_retry_remap: submit_bh(WRITE, bh); need_end_writeback = FALSE; } - put_bh(bh); bh = next; } while (bh != head); + unlock_page(page); /* If no i/o was started, need to end_page_writeback(). */ if (unlikely(need_end_writeback)) @@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page, sync = (wbc->sync_mode == WB_SYNC_ALL); /* Make sure we have mapped buffers. */ - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); @@ -1280,38 +1337,42 @@ retry_writepage: ntfs_debug("Write outside i_size - truncated?"); return 0; } + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + unlock_page(page); + BUG_ON(ni->type != AT_DATA); + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + // TODO: Implement and replace this with + // return ntfs_write_compressed_block(page); + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to compressed files is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoNonResident(ni) && NInoSparse(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to sparse files is not " + "supported yet. Sorry."); + return -EOPNOTSUPP; + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed, encrypted, - * and/or sparse. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - unlock_page(page); - ntfs_debug("Denying write access to encrypted " - "file."); - return -EACCES; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) { - // TODO: Implement and replace this check with - // return ntfs_write_compressed_block(page); - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to compressed " - "files is not supported yet. " - "Sorry."); - return -EOPNOTSUPP; - } - // TODO: Implement and remove this check. - if (NInoSparse(ni)) { - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to sparse files " - "is not supported yet. Sorry."); - return -EOPNOTSUPP; - } - } /* We have to zero every time due to mmap-at-end-of-file. */ if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ @@ -1324,14 +1385,16 @@ retry_writepage: /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) return ntfs_write_mst_block(page, wbc); - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_write_block(page, wbc); } /* - * Attribute is resident, implying it is not compressed, encrypted, - * sparse, or mst protected. This also means the attribute is smaller - * than an mft record and hence smaller than a page, so can simply - * return error on any pages with index above 0. + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. This also means the attribute is smaller than an mft + * record and hence smaller than a page, so can simply return error on + * any pages with index above 0. Note the attribute can actually be + * marked compressed but if it is resident the actual data is not + * compressed so we are ok to ignore the compressed flag here. */ BUG_ON(page_has_buffers(page)); BUG_ON(!PageUptodate(page)); @@ -1380,30 +1443,14 @@ retry_writepage: BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); - /* - * Here, we don't need to zero the out of bounds area everytime because - * the below memcpy() already takes care of the mmap-at-end-of-file - * requirements. If the file is converted to a non-resident one, then - * the code path use is switched to the non-resident one where the - * zeroing happens on each ntfs_writepage() invocation. - * - * The above also applies nicely when i_size is decreased. - * - * When i_size is increased, the memory between the old and new i_size - * _must_ be zeroed (or overwritten with new data). Otherwise we will - * expose data to userspace/disk which should never have been exposed. - * - * FIXME: Ensure that i_size increases do the zeroing/overwriting and - * if we cannot guarantee that, then enable the zeroing below. If the - * zeroing below is enabled, we MUST move the unlock_page() from above - * to after the kunmap_atomic(), i.e. just before the - * end_page_writeback(). - * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size - * increases for resident attributes so those are ok. - * TODO: ntfs_truncate(), others? + * Here, we do not need to zero the out of bounds area everytime + * because the below memcpy() already takes care of the + * mmap-at-end-of-file requirements. If the file is converted to a + * non-resident one, then the code path use is switched to the + * non-resident one where the zeroing happens on each ntfs_writepage() + * invocation. */ - attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { @@ -1681,27 +1728,25 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); /* * Failed to map the buffer, even after * retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode " "0x%lx, attribute type 0x%x, " "vcn 0x%llx, offset 0x%x " "because its location on disk " "could not be determined%s " - "(error code %lli).", + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even " - "after retrying" : "", - (long long)lcn); - if (!err) - err = -EIO; + "after retrying" : "", err); goto err_out; } /* We now have a successful remap, i.e. lcn >= 0. */ @@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { buffers_to_free = bh; } bh = head = page_buffers(page); + BUG_ON(!bh); do { bh_ofs = bh_offset(bh); if (bh_ofs + bh_size <= ofs) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index cd0f9e740b1..3f9a4ff42ee 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -43,6 +43,9 @@ * which is not an error as such. This is -ENOENT. It means that @vcn is out * of bounds of the runlist. * + * Note the runlist can be NULL after this function returns if @vcn is zero and + * the attribute has zero allocated size, i.e. there simply is no runlist. + * * Locking: - The runlist must be locked for writing. * - This function modifies the runlist. */ @@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) ATTR_RECORD *a; ntfs_attr_search_ctx *ctx; runlist_element *rl; + unsigned long flags; int err = 0; ntfs_debug("Mapping runlist part containing vcn 0x%llx.", @@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) * ntfs_mapping_pairs_decompress() fails. */ end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; - if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) + if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) { + read_lock_irqsave(&ni->size_lock, flags); end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; + read_unlock_irqrestore(&ni->size_lock, flags); + } if (unlikely(vcn >= end_vcn)) { err = -ENOENT; goto err_out; @@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, const BOOL write_locked) { LCN lcn; + unsigned long flags; BOOL is_retry = FALSE; ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", @@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); + if (!ni->runlist.rl) { + read_lock_irqsave(&ni->size_lock, flags); + if (!ni->allocated_size) { + read_unlock_irqrestore(&ni->size_lock, flags); + return LCN_ENOENT; + } + read_unlock_irqrestore(&ni->size_lock, flags); + } retry_remap: /* Convert vcn to lcn. If that fails map the runlist and retry once. */ lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); @@ -255,6 +271,7 @@ retry_remap: runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, const BOOL write_locked) { + unsigned long flags; runlist_element *rl; int err = 0; BOOL is_retry = FALSE; @@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); + if (!ni->runlist.rl) { + read_lock_irqsave(&ni->size_lock, flags); + if (!ni->allocated_size) { + read_unlock_irqrestore(&ni->size_lock, flags); + return ERR_PTR(-ENOENT); + } + read_unlock_irqrestore(&ni->size_lock, flags); + } retry_remap: rl = ni->runlist.rl; if (likely(rl && vcn >= rl[0].vcn)) { @@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start, block_size_bits = sb->s_blocksize_bits; down_read(&runlist->lock); rl = runlist->rl; + if (!rl) { + ntfs_error(sb, "Cannot read attribute list since runlist is " + "missing."); + goto err_out; + } /* Read all clusters specified by the runlist one run at a time. */ while (rl->length) { lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); @@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size) } /** + * ntfs_resident_attr_value_resize - resize the value of a resident attribute + * @m: mft record containing attribute record + * @a: attribute record whose value to resize + * @new_size: new size in bytes to which to resize the attribute value of @a + * + * Resize the value of the attribute @a in the mft record @m to @new_size bytes. + * If the value is made bigger, the newly allocated space is cleared. + * + * Return 0 on success and -errno on error. The following error codes are + * defined: + * -ENOSPC - Not enough space in the mft record @m to perform the resize. + * + * Note: On error, no modifications have been performed whatsoever. + * + * Warning: If you make a record smaller without having copied all the data you + * are interested in the data may be overwritten. + */ +int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, + const u32 new_size) +{ + u32 old_size; + + /* Resize the resident part of the attribute record. */ + if (ntfs_attr_record_resize(m, a, + le16_to_cpu(a->data.resident.value_offset) + new_size)) + return -ENOSPC; + /* + * The resize succeeded! If we made the attribute value bigger, clear + * the area between the old size and @new_size. + */ + old_size = le32_to_cpu(a->data.resident.value_length); + if (new_size > old_size) + memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + + old_size, 0, new_size - old_size); + /* Finally update the length of the attribute value. */ + a->data.resident.value_length = cpu_to_le32(new_size); + return 0; +} + +/** * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute * @ni: ntfs inode describing the attribute to convert * @@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) return err; } /* + * FIXME: Compressed and encrypted attributes are not supported when + * writing and we should never have gotten here for them. + */ + BUG_ON(NInoCompressed(ni)); + BUG_ON(NInoEncrypted(ni)); + /* * The size needs to be aligned to a cluster boundary for allocation * purposes. */ @@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) BUG_ON(a->non_resident); /* * Calculate new offsets for the name and the mapping pairs array. - * We assume the attribute is not compressed or sparse. */ - name_ofs = (offsetof(ATTR_REC, - data.non_resident.compressed_size) + 7) & ~7; + if (NInoSparse(ni) || NInoCompressed(ni)) + name_ofs = (offsetof(ATTR_REC, + data.non_resident.compressed_size) + + sizeof(a->data.non_resident.compressed_size) + + 7) & ~7; + else + name_ofs = (offsetof(ATTR_REC, + data.non_resident.compressed_size) + 7) & ~7; mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; /* * Determine the size of the resident part of the now non-resident @@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), a->name_length * sizeof(ntfschar)); a->name_offset = cpu_to_le16(name_ofs); - /* - * FIXME: For now just clear all of these as we do not support them - * when writing. - */ - a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE | - ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK)); /* Setup the fields specific to non-resident attributes. */ a->data.non_resident.lowest_vcn = 0; a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> vol->cluster_size_bits); a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); - a->data.non_resident.compression_unit = 0; memset(&a->data.non_resident.reserved, 0, sizeof(a->data.non_resident.reserved)); a->data.non_resident.allocated_size = cpu_to_sle64(new_size); a->data.non_resident.data_size = a->data.non_resident.initialized_size = cpu_to_sle64(attr_size); + if (NInoSparse(ni) || NInoCompressed(ni)) { + a->data.non_resident.compression_unit = 4; + a->data.non_resident.compressed_size = + a->data.non_resident.allocated_size; + } else + a->data.non_resident.compression_unit = 0; /* Generate the mapping pairs array into the attribute record. */ err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, arec_size - mp_ofs, rl, 0, -1, NULL); @@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) goto undo_err_out; } /* Setup the in-memory attribute structure to be non-resident. */ - /* - * FIXME: For now just clear all of these as we do not support them - * when writing. - */ - NInoClearSparse(ni); - NInoClearEncrypted(ni); - NInoClearCompressed(ni); ni->runlist.rl = rl; write_lock_irqsave(&ni->size_lock, flags); ni->allocated_size = new_size; + if (NInoSparse(ni) || NInoCompressed(ni)) { + ni->itype.compressed.size = ni->allocated_size; + ni->itype.compressed.block_size = 1U << + (a->data.non_resident.compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = + ffs(ni->itype.compressed.block_size) - 1; + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident.compression_unit; + } write_unlock_irqrestore(&ni->size_lock, flags); /* * This needs to be last since the address space operations ->readpage @@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) BUG_ON(cnt < 0); if (!cnt) goto done; + /* + * FIXME: Compressed and encrypted attributes are not supported when + * writing and we should never have gotten here for them. + */ + BUG_ON(NInoCompressed(ni)); + BUG_ON(NInoEncrypted(ni)); mapping = VFS_I(ni)->i_mapping; /* Work out the starting index and page offset. */ idx = ofs >> PAGE_CACHE_SHIFT; diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index 0e4ac6d3c0e..0618ed6fd7b 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h @@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type); extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); +extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, + const u32 new_size); extern int ntfs_attr_make_non_resident(ntfs_inode *ni); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 6d265cfd49a..25d24106f89 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page) if (unlikely(!pages || !bhs)) { kfree(bhs); kfree(pages); - SetPageError(page); unlock_page(page); ntfs_error(vol->sb, "Failed to allocate internal buffers."); return -ENOMEM; @@ -871,9 +870,6 @@ lock_retry_remap: for (; prev_cur_page < cur_page; prev_cur_page++) { page = pages[prev_cur_page]; if (page) { - if (prev_cur_page == xpage && - !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); @@ -904,8 +900,6 @@ lock_retry_remap: "Terminating them with extreme " "prejudice. Inode 0x%lx, page index " "0x%lx.", ni->mft_no, page->index); - if (cur_page == xpage && !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); @@ -953,8 +947,6 @@ err_out: for (i = cur_page; i < max_page; i++) { page = pages[i]; if (page) { - if (i == xpage && !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 46779471c54..795c3d1930f 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, ie->key.file_name.file_name_length, &name, NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); if (name_len <= 0) { - ntfs_debug("Skipping unrepresentable file."); + ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.", + (long long)MREF_LE(ie->data.dir.indexed_file)); return 0; } if (ie->key.file_name.file_attributes & diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index e0f530ce6b9..be9fd1dd423 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* - * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. + * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, if (!datasync || !NInoNonResident(NTFS_I(vi))) ret = ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); + /* + * NOTE: If we were to use mapping->private_list (see ext2 and + * fs/buffer.c) for dirty blocks then we could optimize the below to be + * sync_mapping_buffers(vi->i_mapping). + */ err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) ret = err; diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 11fd5307d78..8f2d5727546 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len, &ie->key, key_len)) { ir_done: ictx->is_in_root = TRUE; + ictx->ir = ir; ictx->actx = actx; ictx->base_ni = base_ni; ictx->ia = NULL; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 886214a77f9..dc4bbe3acf5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1013,41 +1013,50 @@ skip_large_dir_stuff: } a = ctx->attr; /* Setup the state. */ - if (a->non_resident) { - NInoSetNonResident(ni); - if (a->flags & (ATTR_COMPRESSION_MASK | - ATTR_IS_SPARSE)) { - if (a->flags & ATTR_COMPRESSION_MASK) { - NInoSetCompressed(ni); - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found " + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " "compressed data but " "compression is " "disabled due to " "cluster size (%i) > " "4kiB.", vol->cluster_size); - goto unm_err_out; - } - if ((a->flags & ATTR_COMPRESSION_MASK) - != ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found " - "unknown compression " - "method or corrupt " - "file."); - goto unm_err_out; - } + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method " + "or corrupt file."); + goto unm_err_out; } - if (a->flags & ATTR_IS_SPARSE) - NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_ENCRYPTED) { + if (NInoCompressed(ni)) { + ntfs_error(vi->i_sb, "Found encrypted and " + "compressed data."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (a->non_resident) { + NInoSetNonResident(ni); + if (NInoCompressed(ni) || NInoSparse(ni)) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found " - "nonstandard compression unit " - "(%u instead of 4). Cannot " - "handle this.", - a->data.non_resident. - compression_unit); + "nonstandard " + "compression unit (%u " + "instead of 4). " + "Cannot handle this.", + a->data.non_resident. + compression_unit); err = -EOPNOTSUPP; goto unm_err_out; } @@ -1065,14 +1074,6 @@ skip_large_dir_stuff: a->data.non_resident. compressed_size); } - if (a->flags & ATTR_IS_ENCRYPTED) { - if (a->flags & ATTR_COMPRESSION_MASK) { - ntfs_error(vi->i_sb, "Found encrypted " - "and compressed data."); - goto unm_err_out; - } - NInoSetEncrypted(ni); - } if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of $DATA " "attribute has non zero " @@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) if (unlikely(err)) goto unm_err_out; a = ctx->attr; + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if ((ni->type != AT_DATA) || (ni->type == AT_DATA && + ni->name_len)) { + ntfs_error(vi->i_sb, "Found compressed " + "non-data or named data " + "attribute. Please report " + "you saw this message to " + "linux-ntfs-dev@lists." + "sourceforge.net"); + goto unm_err_out; + } + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found compressed " + "attribute but compression is " + "disabled due to cluster size " + "(%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) != + ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method."); + goto unm_err_out; + } + } + /* + * The encryption flag set in an index root just means to + * compress all files. + */ + if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "but the attribute is %s. Please " + "report you saw this message to " + "linux-ntfs-dev@lists.sourceforge.net", + NInoCompressed(ni) ? "compressed" : + "sparse"); + goto unm_err_out; + } + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_ENCRYPTED) { + if (NInoCompressed(ni)) { + ntfs_error(vi->i_sb, "Found encrypted and compressed " + "data."); + goto unm_err_out; + } + /* + * The encryption flag set in an index root just means to + * encrypt all files. + */ + if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "but the attribute is encrypted. " + "Please report you saw this message " + "to linux-ntfs-dev@lists.sourceforge." + "net"); + goto unm_err_out; + } + if (ni->type != AT_DATA) { + ntfs_error(vi->i_sb, "Found encrypted non-data " + "attribute."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } if (!a->non_resident) { /* Ensure the attribute name is placed before the value. */ if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= @@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the attribute value."); goto unm_err_out; } - if (NInoMstProtected(ni) || a->flags) { + if (NInoMstProtected(ni)) { ntfs_error(vi->i_sb, "Found mst protected attribute " - "or attribute with non-zero flags but " - "the attribute is resident. Please " - "report you saw this message to " + "but the attribute is resident. " + "Please report you saw this message to " "linux-ntfs-dev@lists.sourceforge.net"); goto unm_err_out; } @@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the mapping pairs array."); goto unm_err_out; } - if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { - if (a->flags & ATTR_COMPRESSION_MASK) { - NInoSetCompressed(ni); - if ((ni->type != AT_DATA) || (ni->type == - AT_DATA && ni->name_len)) { - ntfs_error(vi->i_sb, "Found compressed " - "non-data or named " - "data attribute. " - "Please report you " - "saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found compressed " - "attribute but " - "compression is " - "disabled due to " - "cluster size (%i) > " - "4kiB.", - vol->cluster_size); - goto unm_err_out; - } - if ((a->flags & ATTR_COMPRESSION_MASK) != - ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found unknown " - "compression method."); - goto unm_err_out; - } - } - if (NInoMstProtected(ni)) { - ntfs_error(vi->i_sb, "Found mst protected " - "attribute but the attribute " - "is %s. Please report you " - "saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net", - NInoCompressed(ni) ? - "compressed" : "sparse"); - goto unm_err_out; - } - if (a->flags & ATTR_IS_SPARSE) - NInoSetSparse(ni); + if ((NInoCompressed(ni) || NInoSparse(ni)) && + ni->type != AT_INDEX_ROOT) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found nonstandard " "compression unit (%u instead " @@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ni->itype.compressed.size = sle64_to_cpu( a->data.non_resident.compressed_size); } - if (a->flags & ATTR_IS_ENCRYPTED) { - if (a->flags & ATTR_COMPRESSION_MASK) { - ntfs_error(vi->i_sb, "Found encrypted and " - "compressed data."); - goto unm_err_out; - } - if (NInoMstProtected(ni)) { - ntfs_error(vi->i_sb, "Found mst protected " - "attribute but the attribute " - "is encrypted. Please report " - "you saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - NInoSetEncrypted(ni); - } if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of attribute has " "non-zero lowest_vcn."); @@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) vi->i_mapping->a_ops = &ntfs_mst_aops; else vi->i_mapping->a_ops = &ntfs_aops; - if (NInoCompressed(ni) || NInoSparse(ni)) + if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT) vi->i_blocks = ni->itype.compressed.size >> 9; else vi->i_blocks = ni->allocated_size >> 9; /* - * Make sure the base inode doesn't go away and attach it to the + * Make sure the base inode does not go away and attach it to the * attribute inode. */ igrab(base_vi); @@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) "after the attribute value."); goto unm_err_out; } - /* Compressed/encrypted/sparse index root is not allowed. */ + /* + * Compressed/encrypted/sparse index root is not allowed, except for + * directories of course but those are not dealt with here. + */ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | ATTR_IS_SPARSE)) { ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " @@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) * We skipped the truncate but must still update * timestamps. */ - ia_valid |= ATTR_MTIME|ATTR_CTIME; + ia_valid |= ATTR_MTIME | ATTR_CTIME; } } - if (ia_valid & ATTR_ATIME) - vi->i_atime = attr->ia_atime; + vi->i_atime = timespec_trunc(attr->ia_atime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - vi->i_mtime = attr->ia_mtime; + vi->i_mtime = timespec_trunc(attr->ia_mtime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - vi->i_ctime = attr->ia_ctime; + vi->i_ctime = timespec_trunc(attr->ia_ctime, + vi->i_sb->s_time_gran); mark_inode_dirty(vi); out: return err; diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index a4bc07616e5..7b593429068 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, int ret = 0; ntfs_debug("Entering."); + if (!rl) + return 0; for (; rl->length; rl++) { int err; @@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, BUG_ON(zone < FIRST_ZONE); BUG_ON(zone > LAST_ZONE); - /* Return empty runlist if @count == 0 */ - // FIXME: Do we want to just return NULL instead? (AIA) - if (!count) { - rl = ntfs_malloc_nofs(PAGE_SIZE); - if (!rl) - return ERR_PTR(-ENOMEM); - rl[0].vcn = start_vcn; - rl[0].lcn = LCN_RL_NOT_MAPPED; - rl[0].length = 0; - return rl; - } + /* Return NULL if @count is zero. */ + if (!count) + return NULL; /* Take the lcnbmp lock for writing. */ down_write(&vol->lcnbmp_lock); /* @@ -788,7 +782,8 @@ out: * @vi: vfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters - * @is_rollback: if TRUE this is a rollback operation + * @write_locked: true if the runlist is locked for writing + * @is_rollback: true if this is a rollback operation * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the vfs inode @vi. @@ -806,17 +801,17 @@ out: * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be unlocked on entry and is - * unlocked on return. - * - This function takes the runlist lock of @vi for reading and - * sometimes for writing and sometimes modifies the runlist. + * Locking: - The runlist described by @vi must be locked on entry and is + * locked on return. Note if the runlist is locked for reading the + * lock may be dropped and reacquired. Note the runlist may be + * modified when needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, - const BOOL is_rollback) + const BOOL write_locked, const BOOL is_rollback) { s64 delta, to_free, total_freed, real_freed; ntfs_inode *ni; @@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, total_freed = real_freed = 0; - down_read(&ni->runlist.lock); - rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE); + rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* Attempt to map runlist. */ vcn = rl->vcn; - rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); + rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) @@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* Update the total done clusters. */ total_freed += to_free; } - up_read(&ni->runlist.lock); if (likely(!is_rollback)) up_write(&vol->lcnbmp_lock); @@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, ntfs_debug("Done."); return real_freed; err_out: - up_read(&ni->runlist.lock); if (is_rollback) return err; /* If no real clusters were freed, no need to rollback. */ @@ -973,7 +965,8 @@ err_out: * If rollback fails, set the volume errors flag, emit an error * message, and return the error code. */ - delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); + delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked, + TRUE); if (delta < 0) { ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " "inconsistent metadata! Unmount and run " diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index 4cac1c024af..e4d7fb98d68 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h @@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const NTFS_CLUSTER_ALLOCATION_ZONES zone); extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count, const BOOL is_rollback); + s64 count, const BOOL write_locked, const BOOL is_rollback); /** * ntfs_cluster_free - free clusters on an ntfs volume * @vi: vfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters + * @write_locked: true if the runlist is locked for writing * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the vfs inode @vi. @@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be unlocked on entry and is - * unlocked on return. - * - This function takes the runlist lock of @vi for reading and - * sometimes for writing and sometimes modifies the runlist. + * Locking: - The runlist described by @vi must be locked on entry and is + * locked on return. Note if the runlist is locked for reading the + * lock may be dropped and reacquired. Note the runlist may be + * modified when needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count) + s64 count, const BOOL write_locked) { - return __ntfs_cluster_free(vi, start_vcn, count, FALSE); + return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE); } extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, @@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, * * Return 0 on success and -errno on error. * - * Locking: This function takes the volume lcn bitmap lock for writing and - * modifies the bitmap contents. + * Locking: - This function takes the volume lcn bitmap lock for writing and + * modifies the bitmap contents. + * - The caller must have locked the runlist @rl for reading or + * writing. */ static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, const runlist_element *rl) diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 8edb8e20fb0..0173e95500d 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, */ if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { ntfs_error(vi->i_sb, "$LogFile restart page is not modified " - "chkdsk but a chkdsk LSN is specified."); + "by chkdsk but a chkdsk LSN is specified."); return FALSE; } ntfs_debug("Done."); @@ -312,10 +312,12 @@ err_out: * @vi: $LogFile inode to which the restart page belongs * @rp: restart page to check * @pos: position in @vi at which the restart page resides - * @wrp: copy of the multi sector transfer deprotected restart page + * @wrp: [OUT] copy of the multi sector transfer deprotected restart page + * @lsn: [OUT] set to the current logfile lsn on success * - * Check the restart page @rp for consistency and return TRUE if it is - * consistent and FALSE otherwise. + * Check the restart page @rp for consistency and return 0 if it is consistent + * and -errno otherwise. The restart page may have been modified by chkdsk in + * which case its magic is CHKD instead of RSTR. * * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not * require the full restart page. @@ -323,25 +325,33 @@ err_out: * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a * copy of the complete multi sector transfer deprotected page. On failure, * *@wrp is undefined. + * + * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current + * logfile lsn according to this restart page. On failure, *@lsn is undefined. + * + * The following error codes are defined: + * -EINVAL - The restart page is inconsistent. + * -ENOMEM - Not enough memory to load the restart page. + * -EIO - Failed to reading from $LogFile. */ -static BOOL ntfs_check_and_load_restart_page(struct inode *vi, - RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) +static int ntfs_check_and_load_restart_page(struct inode *vi, + RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp, + LSN *lsn) { RESTART_AREA *ra; RESTART_PAGE_HEADER *trp; - int size; - BOOL ret; + int size, err; ntfs_debug("Entering."); /* Check the restart page header for consistency. */ if (!ntfs_check_restart_page_header(vi, rp, pos)) { /* Error output already done inside the function. */ - return FALSE; + return -EINVAL; } /* Check the restart area for consistency. */ if (!ntfs_check_restart_area(vi, rp)) { /* Error output already done inside the function. */ - return FALSE; + return -EINVAL; } ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); /* @@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, if (!trp) { ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " "restart page buffer."); - return FALSE; + return -ENOMEM; } /* * Read the whole of the restart page into the buffer. If it fits @@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, if (IS_ERR(page)) { ntfs_error(vi->i_sb, "Error mapping $LogFile " "page (index %lu).", idx); + err = PTR_ERR(page); + if (err != -EIO && err != -ENOMEM) + err = -EIO; goto err_out; } size = min_t(int, to_read, PAGE_CACHE_SIZE); @@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, /* Perform the multi sector transfer deprotection on the buffer. */ if (post_read_mst_fixup((NTFS_RECORD*)trp, le32_to_cpu(rp->system_page_size))) { - ntfs_error(vi->i_sb, "Multi sector transfer error detected in " - "$LogFile restart page."); - goto err_out; + /* + * A multi sector tranfer error was detected. We only need to + * abort if the restart page contents exceed the multi sector + * transfer fixup of the first sector. + */ + if (le16_to_cpu(rp->restart_area_offset) + + le16_to_cpu(ra->restart_area_length) > + NTFS_BLOCK_SIZE - sizeof(u16)) { + ntfs_error(vi->i_sb, "Multi sector transfer error " + "detected in $LogFile restart page."); + err = -EINVAL; + goto err_out; + } + } + /* + * If the restart page is modified by chkdsk or there are no active + * logfile clients, the logfile is consistent. Otherwise, need to + * check the log client records for consistency, too. + */ + err = 0; + if (ntfs_is_rstr_record(rp->magic) && + ra->client_in_use_list != LOGFILE_NO_CLIENT) { + if (!ntfs_check_log_client_array(vi, trp)) { + err = -EINVAL; + goto err_out; + } + } + if (lsn) { + if (ntfs_is_rstr_record(rp->magic)) + *lsn = sle64_to_cpu(ra->current_lsn); + else /* if (ntfs_is_chkd_record(rp->magic)) */ + *lsn = sle64_to_cpu(rp->chkdsk_lsn); } - /* Check the log client records for consistency. */ - ret = ntfs_check_log_client_array(vi, trp); - if (ret && wrp) - *wrp = trp; - else - ntfs_free(trp); ntfs_debug("Done."); - return ret; + if (wrp) + *wrp = trp; + else { err_out: - ntfs_free(trp); - return FALSE; + ntfs_free(trp); + } + return err; } /** * ntfs_check_logfile - check the journal for consistency * @log_vi: struct inode of loaded journal $LogFile to check + * @rp: [OUT] on success this is a copy of the current restart page * * Check the $LogFile journal for consistency and return TRUE if it is - * consistent and FALSE if not. + * consistent and FALSE if not. On success, the current restart page is + * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it. * * At present we only check the two restart pages and ignore the log record * pages. @@ -424,19 +465,18 @@ err_out: * if the $LogFile was created on a system with a different page size to ours * yet and mst deprotection would fail if our page size is smaller. */ -BOOL ntfs_check_logfile(struct inode *log_vi) +BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) { - s64 size, pos, rstr1_pos, rstr2_pos; + s64 size, pos; + LSN rstr1_lsn, rstr2_lsn; ntfs_volume *vol = NTFS_SB(log_vi->i_sb); struct address_space *mapping = log_vi->i_mapping; struct page *page = NULL; u8 *kaddr = NULL; RESTART_PAGE_HEADER *rstr1_ph = NULL; RESTART_PAGE_HEADER *rstr2_ph = NULL; - int log_page_size, log_page_mask, ofs; + int log_page_size, log_page_mask, err; BOOL logfile_is_empty = TRUE; - BOOL rstr1_found = FALSE; - BOOL rstr2_found = FALSE; u8 log_page_bits; ntfs_debug("Entering."); @@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi) if (IS_ERR(page)) { ntfs_error(vol->sb, "Error mapping $LogFile " "page (index %lu).", idx); - return FALSE; + goto err_out; } } kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); @@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi) */ if (ntfs_is_rcrd_recordp((le32*)kaddr)) break; - /* - * A modified by chkdsk restart page means we cannot handle - * this log file. - */ - if (ntfs_is_chkd_recordp((le32*)kaddr)) { - ntfs_error(vol->sb, "$LogFile has been modified by " - "chkdsk. Mount this volume in " - "Windows."); - goto err_out; - } - /* If not a restart page, continue. */ - if (!ntfs_is_rstr_recordp((le32*)kaddr)) { - /* Skip to the minimum page size for the next one. */ + /* If not a (modified by chkdsk) restart page, continue. */ + if (!ntfs_is_rstr_recordp((le32*)kaddr) && + !ntfs_is_chkd_recordp((le32*)kaddr)) { if (!pos) pos = NTFS_BLOCK_SIZE >> 1; continue; } - /* We now know we have a restart page. */ - if (!pos) { - rstr1_found = TRUE; - rstr1_pos = pos; - } else { - if (rstr2_found) { - ntfs_error(vol->sb, "Found more than two " - "restart pages in $LogFile."); - goto err_out; - } - rstr2_found = TRUE; - rstr2_pos = pos; - } /* - * Check the restart page for consistency and get a copy of the - * complete multi sector transfer deprotected restart page. + * Check the (modified by chkdsk) restart page for consistency + * and get a copy of the complete multi sector transfer + * deprotected restart page. */ - if (!ntfs_check_and_load_restart_page(log_vi, + err = ntfs_check_and_load_restart_page(log_vi, (RESTART_PAGE_HEADER*)kaddr, pos, - !pos ? &rstr1_ph : &rstr2_ph)) { - /* Error output already done inside the function. */ - goto err_out; + !rstr1_ph ? &rstr1_ph : &rstr2_ph, + !rstr1_ph ? &rstr1_lsn : &rstr2_lsn); + if (!err) { + /* + * If we have now found the first (modified by chkdsk) + * restart page, continue looking for the second one. + */ + if (!pos) { + pos = NTFS_BLOCK_SIZE >> 1; + continue; + } + /* + * We have now found the second (modified by chkdsk) + * restart page, so we can stop looking. + */ + break; } /* - * We have a valid restart page. The next one must be after - * a whole system page size as specified by the valid restart - * page. + * Error output already done inside the function. Note, we do + * not abort if the restart page was invalid as we might still + * find a valid one further in the file. */ + if (err != -EINVAL) { + ntfs_unmap_page(page); + goto err_out; + } + /* Continue looking. */ if (!pos) - pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; + pos = NTFS_BLOCK_SIZE >> 1; } - if (page) { + if (page) ntfs_unmap_page(page); - page = NULL; - } if (logfile_is_empty) { NVolSetLogFileEmpty(vol); is_empty: ntfs_debug("Done. ($LogFile is empty.)"); return TRUE; } - if (!rstr1_found || !rstr2_found) { - ntfs_error(vol->sb, "Did not find two restart pages in " - "$LogFile."); - goto err_out; + if (!rstr1_ph) { + BUG_ON(rstr2_ph); + ntfs_error(vol->sb, "Did not find any restart pages in " + "$LogFile and it was not empty."); + return FALSE; + } + /* If both restart pages were found, use the more recent one. */ + if (rstr2_ph) { + /* + * If the second restart area is more recent, switch to it. + * Otherwise just throw it away. + */ + if (rstr2_lsn > rstr1_lsn) { + ntfs_free(rstr1_ph); + rstr1_ph = rstr2_ph; + /* rstr1_lsn = rstr2_lsn; */ + } else + ntfs_free(rstr2_ph); + rstr2_ph = NULL; } - /* - * The two restart areas must be identical except for the update - * sequence number. - */ - ofs = le16_to_cpu(rstr1_ph->usa_ofs); - if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16), - memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs, - le32_to_cpu(rstr1_ph->system_page_size) - ofs))) { - ntfs_error(vol->sb, "The two restart pages in $LogFile do not " - "match."); - goto err_out; - } - ntfs_free(rstr1_ph); - ntfs_free(rstr2_ph); /* All consistency checks passed. */ + if (rp) + *rp = rstr1_ph; + else + ntfs_free(rstr1_ph); ntfs_debug("Done."); return TRUE; err_out: - if (page) - ntfs_unmap_page(page); if (rstr1_ph) ntfs_free(rstr1_ph); - if (rstr2_ph) - ntfs_free(rstr2_ph); return FALSE; } /** * ntfs_is_logfile_clean - check in the journal if the volume is clean * @log_vi: struct inode of loaded journal $LogFile to check + * @rp: copy of the current restart page * * Analyze the $LogFile journal and return TRUE if it indicates the volume was * shutdown cleanly and FALSE if not. @@ -619,11 +655,9 @@ err_out: * is empty this function requires that NVolLogFileEmpty() is true otherwise an * empty volume will be reported as dirty. */ -BOOL ntfs_is_logfile_clean(struct inode *log_vi) +BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) { ntfs_volume *vol = NTFS_SB(log_vi->i_sb); - struct page *page; - RESTART_PAGE_HEADER *rp; RESTART_AREA *ra; ntfs_debug("Entering."); @@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) ntfs_debug("Done. ($LogFile is empty.)"); return TRUE; } - /* - * Read the first restart page. It will be possibly incomplete and - * will not be multi sector transfer deprotected but we only need the - * first NTFS_BLOCK_SIZE bytes so it does not matter. - */ - page = ntfs_map_page(log_vi->i_mapping, 0); - if (IS_ERR(page)) { - ntfs_error(vol->sb, "Error mapping $LogFile page (index 0)."); + BUG_ON(!rp); + if (!ntfs_is_rstr_record(rp->magic) && + !ntfs_is_chkd_record(rp->magic)) { + ntfs_error(vol->sb, "Restart page buffer is invalid. This is " + "probably a bug in that the $LogFile should " + "have been consistency checked before calling " + "this function."); return FALSE; } - rp = (RESTART_PAGE_HEADER*)page_address(page); - if (!ntfs_is_rstr_record(rp->magic)) { - ntfs_error(vol->sb, "No restart page found at offset zero in " - "$LogFile. This is probably a bug in that " - "the $LogFile should have been consistency " - "checked before calling this function."); - goto err_out; - } ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); /* * If the $LogFile has active clients, i.e. it is open, and we do not @@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) if (ra->client_in_use_list != LOGFILE_NO_CLIENT && !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { ntfs_debug("Done. $LogFile indicates a dirty shutdown."); - goto err_out; + return FALSE; } - ntfs_unmap_page(page); /* $LogFile indicates a clean shutdown. */ ntfs_debug("Done. $LogFile indicates a clean shutdown."); return TRUE; -err_out: - ntfs_unmap_page(page); - return FALSE; } /** diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index 4ee4378de06..42388f95ea6 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -2,7 +2,7 @@ * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of * the Linux-NTFS project. * - * Copyright (c) 2000-2004 Anton Altaparmakov + * Copyright (c) 2000-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -296,9 +296,11 @@ typedef struct { /* sizeof() = 160 (0xa0) bytes */ } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; -extern BOOL ntfs_check_logfile(struct inode *log_vi); +extern BOOL ntfs_check_logfile(struct inode *log_vi, + RESTART_PAGE_HEADER **rp); -extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); +extern BOOL ntfs_is_logfile_clean(struct inode *log_vi, + const RESTART_PAGE_HEADER *rp); extern BOOL ntfs_empty_logfile(struct inode *log_vi); diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index fac5944df6d..3288bcc2c4a 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -27,27 +27,63 @@ #include <linux/highmem.h> /** - * ntfs_malloc_nofs - allocate memory in multiples of pages - * @size number of bytes to allocate + * __ntfs_malloc - allocate memory in multiples of pages + * @size: number of bytes to allocate + * @gfp_mask: extra flags for the allocator + * + * Internal function. You probably want ntfs_malloc_nofs()... * * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and * returns a pointer to the allocated memory. * * If there was insufficient memory to complete the request, return NULL. + * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ -static inline void *ntfs_malloc_nofs(unsigned long size) +static inline void *__ntfs_malloc(unsigned long size, + unsigned int __nocast gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); /* kmalloc() has per-CPU caches so is faster for now. */ - return kmalloc(PAGE_SIZE, GFP_NOFS); - /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ + return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); + /* return (void *)__get_free_page(gfp_mask); */ } if (likely(size >> PAGE_SHIFT < num_physpages)) - return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } +/** + * ntfs_malloc_nofs - allocate memory in multiples of pages + * @size: number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs(unsigned long size) +{ + return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM); +} + +/** + * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages + * @size: number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * This function guarantees that the allocation will succeed. It will sleep + * for as long as it takes to complete the allocation. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs_nofail(unsigned long size) +{ + return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL); +} + static inline void ntfs_free(void *addr) { if (likely(((unsigned long)addr < VMALLOC_START) || diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 317f7c679fd..2c32b84385a 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, } while (bh); tail->b_this_page = head; attach_page_buffers(page, head); - BUG_ON(!page_has_buffers(page)); } bh = head = page_buffers(page); BUG_ON(!bh); @@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) */ if (!NInoTestClearDirty(ni)) goto done; - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); rl = NULL; @@ -1955,7 +1953,7 @@ restore_undo_alloc: a = ctx->attr; a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); undo_alloc: - if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { + if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) { ntfs_error(vol->sb, "Failed to free clusters from mft data " "attribute.%s", es); NVolSetErrors(vol); diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 758855b0414..f5b2ac92908 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src, int size) { if (likely((dst != src) && (size > 0))) - memmove(base + dst, base + src, size * sizeof (*base)); + memmove(base + dst, base + src, size * sizeof(*base)); } /** @@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl, } /** + * ntfs_rl_realloc_nofail - Reallocate memory for runlists + * @rl: original runlist + * @old_size: number of runlist elements in the original runlist @rl + * @new_size: number of runlist elements we need space for + * + * As the runlists grow, more memory will be required. To prevent the + * kernel having to allocate and reallocate large numbers of small bits of + * memory, this function returns an entire page of memory. + * + * This function guarantees that the allocation will succeed. It will sleep + * for as long as it takes to complete the allocation. + * + * It is up to the caller to serialize access to the runlist @rl. + * + * N.B. If the new allocation doesn't require a different number of pages in + * memory, the function will return the original pointer. + * + * On success, return a pointer to the newly allocated, or recycled, memory. + * On error, return -errno. The following error codes are defined: + * -ENOMEM - Not enough memory to allocate runlist array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl, + int old_size, int new_size) +{ + runlist_element *new_rl; + + old_size = PAGE_ALIGN(old_size * sizeof(*rl)); + new_size = PAGE_ALIGN(new_size * sizeof(*rl)); + if (old_size == new_size) + return rl; + + new_rl = ntfs_malloc_nofs_nofail(new_size); + BUG_ON(!new_rl); + + if (likely(rl != NULL)) { + if (unlikely(old_size > new_size)) + old_size = new_size; + memcpy(new_rl, rl, old_size); + ntfs_free(rl); + } + return new_rl; +} + +/** * ntfs_are_rl_mergeable - test if two runlists can be joined together * @dst: original runlist * @src: new runlist to test for mergeability with @dst @@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, /* Scan to the end of the source runlist. */ for (dend = 0; likely(drl[dend].length); dend++) ; + dend++; drl = ntfs_rl_realloc(drl, dend, dend + 1); if (IS_ERR(drl)) return drl; @@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ (srl[send - 1].vcn + srl[send - 1].length))); - /* Or we'll lose an end marker */ - if (start && finish && (drl[dins].length == 0)) + /* Or we will lose an end marker. */ + if (finish && !drl[dins].length) ss++; if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) finish = FALSE; @@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { /* Add an unmapped runlist element. */ if (!slots) { - /* FIXME/TODO: We need to have the - * extra memory already! (AIA) */ - drl = ntfs_rl_realloc(drl, ds, ds + 2); - if (!drl) - goto critical_error; + drl = ntfs_rl_realloc_nofail(drl, ds, + ds + 2); slots = 2; } ds++; @@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, drl[ds].length = marker_vcn - drl[ds].vcn; /* Finally add the ENOENT terminator. */ ds++; - if (!slots) { - /* FIXME/TODO: We need to have the extra - * memory already! (AIA) */ - drl = ntfs_rl_realloc(drl, ds, ds + 1); - if (!drl) - goto critical_error; - } + if (!slots) + drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1); drl[ds].vcn = marker_vcn; drl[ds].lcn = LCN_ENOENT; drl[ds].length = (s64)0; @@ -659,11 +697,6 @@ finished: ntfs_debug("Merged runlist:"); ntfs_debug_dump_runlist(drl); return drl; - -critical_error: - /* Critical error! We cannot afford to fail here. */ - ntfs_error(NULL, "Critical error! Not enough memory."); - panic("NTFS: Cannot continue."); } /** @@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol, ntfs_error(vol->sb, "Corrupt attribute."); return ERR_PTR(-EIO); } + /* If the mapping pairs array is valid but empty, nothing to do. */ + if (!vcn && !*buf) + return old_rl; /* Current position in runlist array. */ rlpos = 0; /* Allocate first page and set current runlist size to one page. */ @@ -1419,6 +1455,7 @@ err_out: /** * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn + * @vol: ntfs volume (needed for error output) * @runlist: runlist to truncate * @new_length: the new length of the runlist in VCNs * @@ -1426,12 +1463,16 @@ err_out: * holding the runlist elements to a length of @new_length VCNs. * * If @new_length lies within the runlist, the runlist elements with VCNs of - * @new_length and above are discarded. + * @new_length and above are discarded. As a special case if @new_length is + * zero, the runlist is discarded and set to NULL. * * If @new_length lies beyond the runlist, a sparse runlist element is added to * the end of the runlist @runlist or if the last runlist element is a sparse * one already, this is extended. * + * Note, no checking is done for unmapped runlist elements. It is assumed that + * the caller has mapped any elements that need to be mapped already. + * * Return 0 on success and -errno on error. * * Locking: The caller must hold @runlist->lock for writing. @@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, BUG_ON(!runlist); BUG_ON(new_length < 0); rl = runlist->rl; + if (!new_length) { + ntfs_debug("Freeing runlist."); + runlist->rl = NULL; + if (rl) + ntfs_free(rl); + return 0; + } if (unlikely(!rl)) { /* * Create a runlist consisting of a sparse runlist element of @@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, return 0; } +/** + * ntfs_rl_punch_nolock - punch a hole into a runlist + * @vol: ntfs volume (needed for error output) + * @runlist: runlist to punch a hole into + * @start: starting VCN of the hole to be created + * @length: size of the hole to be created in units of clusters + * + * Punch a hole into the runlist @runlist starting at VCN @start and of size + * @length clusters. + * + * Return 0 on success and -errno on error, in which case @runlist has not been + * modified. + * + * If @start and/or @start + @length are outside the runlist return error code + * -ENOENT. + * + * If the runlist contains unmapped or error elements between @start and @start + * + @length return error code -EINVAL. + * + * Locking: The caller must hold @runlist->lock for writing. + */ +int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, + const VCN start, const s64 length) +{ + const VCN end = start + length; + s64 delta; + runlist_element *rl, *rl_end, *rl_real_end, *trl; + int old_size; + BOOL lcn_fixup = FALSE; + + ntfs_debug("Entering for start 0x%llx, length 0x%llx.", + (long long)start, (long long)length); + BUG_ON(!runlist); + BUG_ON(start < 0); + BUG_ON(length < 0); + BUG_ON(end < 0); + rl = runlist->rl; + if (unlikely(!rl)) { + if (likely(!start && !length)) + return 0; + return -ENOENT; + } + /* Find @start in the runlist. */ + while (likely(rl->length && start >= rl[1].vcn)) + rl++; + rl_end = rl; + /* Find @end in the runlist. */ + while (likely(rl_end->length && end >= rl_end[1].vcn)) { + /* Verify there are no unmapped or error elements. */ + if (unlikely(rl_end->lcn < LCN_HOLE)) + return -EINVAL; + rl_end++; + } + /* Check the last element. */ + if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE)) + return -EINVAL; + /* This covers @start being out of bounds, too. */ + if (!rl_end->length && end > rl_end->vcn) + return -ENOENT; + if (!length) + return 0; + if (!rl->length) + return -ENOENT; + rl_real_end = rl_end; + /* Determine the runlist size. */ + while (likely(rl_real_end->length)) + rl_real_end++; + old_size = rl_real_end - runlist->rl + 1; + /* If @start is in a hole simply extend the hole. */ + if (rl->lcn == LCN_HOLE) { + /* + * If both @start and @end are in the same sparse run, we are + * done. + */ + if (end <= rl[1].vcn) { + ntfs_debug("Done (requested hole is already sparse)."); + return 0; + } +extend_hole: + /* Extend the hole. */ + rl->length = end - rl->vcn; + /* If @end is in a hole, merge it with the current one. */ + if (rl_end->lcn == LCN_HOLE) { + rl_end++; + rl->length = rl_end->vcn - rl->vcn; + } + /* We have done the hole. Now deal with the remaining tail. */ + rl++; + /* Cut out all runlist elements up to @end. */ + if (rl < rl_end) + memmove(rl, rl_end, (rl_real_end - rl_end + 1) * + sizeof(*rl)); + /* Adjust the beginning of the tail if necessary. */ + if (end > rl->vcn) { + s64 delta = end - rl->vcn; + rl->vcn = end; + rl->length -= delta; + /* Only adjust the lcn if it is real. */ + if (rl->lcn >= 0) + rl->lcn += delta; + } +shrink_allocation: + /* Reallocate memory if the allocation changed. */ + if (rl < rl_end) { + rl = ntfs_rl_realloc(runlist->rl, old_size, + old_size - (rl_end - rl)); + if (IS_ERR(rl)) + ntfs_warning(vol->sb, "Failed to shrink " + "runlist buffer. This just " + "wastes a bit of memory " + "temporarily so we ignore it " + "and return success."); + else + runlist->rl = rl; + } + ntfs_debug("Done (extend hole)."); + return 0; + } + /* + * If @start is at the beginning of a run things are easier as there is + * no need to split the first run. + */ + if (start == rl->vcn) { + /* + * @start is at the beginning of a run. + * + * If the previous run is sparse, extend its hole. + * + * If @end is not in the same run, switch the run to be sparse + * and extend the newly created hole. + * + * Thus both of these cases reduce the problem to the above + * case of "@start is in a hole". + */ + if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) { + rl--; + goto extend_hole; + } + if (end >= rl[1].vcn) { + rl->lcn = LCN_HOLE; + goto extend_hole; + } + /* + * The final case is when @end is in the same run as @start. + * For this need to split the run into two. One run for the + * sparse region between the beginning of the old run, i.e. + * @start, and @end and one for the remaining non-sparse + * region, i.e. between @end and the end of the old run. + */ + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); + if (IS_ERR(trl)) + goto enomem_out; + old_size++; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } +split_end: + /* Shift all the runs up by one. */ + memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl)); + /* Finally, setup the two split runs. */ + rl->lcn = LCN_HOLE; + rl->length = length; + rl++; + rl->vcn += length; + /* Only adjust the lcn if it is real. */ + if (rl->lcn >= 0 || lcn_fixup) + rl->lcn += length; + rl->length -= length; + ntfs_debug("Done (split one)."); + return 0; + } + /* + * @start is neither in a hole nor at the beginning of a run. + * + * If @end is in a hole, things are easier as simply truncating the run + * @start is in to end at @start - 1, deleting all runs after that up + * to @end, and finally extending the beginning of the run @end is in + * to be @start is all that is needed. + */ + if (rl_end->lcn == LCN_HOLE) { + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + /* Cut out all runlist elements up to @end. */ + if (rl < rl_end) + memmove(rl, rl_end, (rl_real_end - rl_end + 1) * + sizeof(*rl)); + /* Extend the beginning of the run @end is in to be @start. */ + rl->vcn = start; + rl->length = rl[1].vcn - start; + goto shrink_allocation; + } + /* + * If @end is not in a hole there are still two cases to distinguish. + * Either @end is or is not in the same run as @start. + * + * The second case is easier as it can be reduced to an already solved + * problem by truncating the run @start is in to end at @start - 1. + * Then, if @end is in the next run need to split the run into a sparse + * run followed by a non-sparse run (already covered above) and if @end + * is not in the next run switching it to be sparse, again reduces the + * problem to the already covered case of "@start is in a hole". + */ + if (end >= rl[1].vcn) { + /* + * If @end is not in the next run, reduce the problem to the + * case of "@start is in a hole". + */ + if (rl[1].length && end >= rl[2].vcn) { + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + rl->vcn = start; + rl->lcn = LCN_HOLE; + goto extend_hole; + } + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); + if (IS_ERR(trl)) + goto enomem_out; + old_size++; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + /* + * @end is in the next run, reduce the problem to the case + * where "@start is at the beginning of a run and @end is in + * the same run as @start". + */ + delta = rl->vcn - start; + rl->vcn = start; + if (rl->lcn >= 0) { + rl->lcn -= delta; + /* Need this in case the lcn just became negative. */ + lcn_fixup = TRUE; + } + rl->length += delta; + goto split_end; + } + /* + * The first case from above, i.e. @end is in the same run as @start. + * We need to split the run into three. One run for the non-sparse + * region between the beginning of the old run and @start, one for the + * sparse region between @start and @end, and one for the remaining + * non-sparse region, i.e. between @end and the end of the old run. + */ + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2); + if (IS_ERR(trl)) + goto enomem_out; + old_size += 2; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } + /* Shift all the runs up by two. */ + memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl)); + /* Finally, setup the three split runs. */ + rl->length = start - rl->vcn; + rl++; + rl->vcn = start; + rl->lcn = LCN_HOLE; + rl->length = length; + rl++; + delta = end - rl->vcn; + rl->vcn = end; + rl->lcn += delta; + rl->length -= delta; + ntfs_debug("Done (split both)."); + return 0; +enomem_out: + ntfs_error(vol->sb, "Not enough memory to extend runlist buffer."); + return -ENOMEM; +} + #endif /* NTFS_RW */ diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h index aa0ee6540e7..47728fbb610 100644 --- a/fs/ntfs/runlist.h +++ b/fs/ntfs/runlist.h @@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, const s64 new_length); +int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, + const VCN start, const s64 length); + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_RUNLIST_H */ diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 41aa8eb6755..453d0d51ea4 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -126,6 +126,14 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) if (*v) \ goto needs_val; \ } +#define NTFS_GETOPT_OCTAL(option, variable) \ + if (!strcmp(p, option)) { \ + if (!v || !*v) \ + goto needs_arg; \ + variable = simple_strtoul(ov = v, &v, 8); \ + if (*v) \ + goto needs_val; \ + } #define NTFS_GETOPT_BOOL(option, variable) \ if (!strcmp(p, option)) { \ BOOL val; \ @@ -157,9 +165,9 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) *v++ = 0; NTFS_GETOPT("uid", uid) else NTFS_GETOPT("gid", gid) - else NTFS_GETOPT("umask", fmask = dmask) - else NTFS_GETOPT("fmask", fmask) - else NTFS_GETOPT("dmask", dmask) + else NTFS_GETOPT_OCTAL("umask", fmask = dmask) + else NTFS_GETOPT_OCTAL("fmask", fmask) + else NTFS_GETOPT_OCTAL("dmask", dmask) else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) @@ -1133,7 +1141,8 @@ mft_unmap_out: * * Return TRUE on success or FALSE on error. */ -static BOOL load_and_check_logfile(ntfs_volume *vol) +static BOOL load_and_check_logfile(ntfs_volume *vol, + RESTART_PAGE_HEADER **rp) { struct inode *tmp_ino; @@ -1145,7 +1154,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol) /* Caller will display error message. */ return FALSE; } - if (!ntfs_check_logfile(tmp_ino)) { + if (!ntfs_check_logfile(tmp_ino, rp)) { iput(tmp_ino); /* ntfs_check_logfile() will have displayed error output. */ return FALSE; @@ -1689,6 +1698,7 @@ static BOOL load_system_files(ntfs_volume *vol) VOLUME_INFORMATION *vi; ntfs_attr_search_ctx *ctx; #ifdef NTFS_RW + RESTART_PAGE_HEADER *rp; int err; #endif /* NTFS_RW */ @@ -1841,8 +1851,9 @@ get_ctx_vol_failed: * Get the inode for the logfile, check it and determine if the volume * was shutdown cleanly. */ - if (!load_and_check_logfile(vol) || - !ntfs_is_logfile_clean(vol->logfile_ino)) { + rp = NULL; + if (!load_and_check_logfile(vol, &rp) || + !ntfs_is_logfile_clean(vol->logfile_ino, rp)) { static const char *es1a = "Failed to load $LogFile"; static const char *es1b = "$LogFile is not clean"; static const char *es2 = ". Mount in Windows."; @@ -1857,6 +1868,10 @@ get_ctx_vol_failed: "continue nor on_errors=" "remount-ro was specified%s", es1, es2); + if (vol->logfile_ino) { + BUG_ON(!rp); + ntfs_free(rp); + } goto iput_logfile_err_out; } sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; @@ -1867,6 +1882,7 @@ get_ctx_vol_failed: /* This will prevent a read-write remount. */ NVolSetErrors(vol); } + ntfs_free(rp); #endif /* NTFS_RW */ /* Get the root directory inode so we can do path lookups. */ vol->root_ino = ntfs_iget(sb, FILE_root); diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 19c42e231b4..a389a5a16c8 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, return -EINVAL; conversion_err: ntfs_error(vol->sb, "Unicode name contains characters that cannot be " - "converted to character set %s.", nls->charset); + "converted to character set %s. You might want to " + "try to use the mount option nls=utf8.", nls->charset); if (ns != *outs) kfree(ns); if (wc != -ENAMETOOLONG) diff --git a/fs/open.c b/fs/open.c index 4ee2dcc31c2..2fac58c5191 100644 --- a/fs/open.c +++ b/fs/open.c @@ -24,6 +24,7 @@ #include <linux/personality.h> #include <linux/pagemap.h> #include <linux/syscalls.h> +#include <linux/rcupdate.h> #include <asm/unistd.h> @@ -842,14 +843,16 @@ int get_unused_fd(void) { struct files_struct * files = current->files; int fd, error; + struct fdtable *fdt; error = -EMFILE; spin_lock(&files->file_lock); repeat: - fd = find_next_zero_bit(files->open_fds->fds_bits, - files->max_fdset, - files->next_fd); + fdt = files_fdtable(files); + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fdset, + fdt->next_fd); /* * N.B. For clone tasks sharing a files structure, this test @@ -872,14 +875,14 @@ repeat: goto repeat; } - FD_SET(fd, files->open_fds); - FD_CLR(fd, files->close_on_exec); - files->next_fd = fd + 1; + FD_SET(fd, fdt->open_fds); + FD_CLR(fd, fdt->close_on_exec); + fdt->next_fd = fd + 1; #if 1 /* Sanity check */ - if (files->fd[fd] != NULL) { + if (fdt->fd[fd] != NULL) { printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); - files->fd[fd] = NULL; + fdt->fd[fd] = NULL; } #endif error = fd; @@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd); static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) { - __FD_CLR(fd, files->open_fds); - if (fd < files->next_fd) - files->next_fd = fd; + struct fdtable *fdt = files_fdtable(files); + __FD_CLR(fd, fdt->open_fds); + if (fd < fdt->next_fd) + fdt->next_fd = fd; } void fastcall put_unused_fd(unsigned int fd) @@ -924,10 +928,11 @@ EXPORT_SYMBOL(put_unused_fd); void fastcall fd_install(unsigned int fd, struct file * file) { struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); - if (unlikely(files->fd[fd] != NULL)) - BUG(); - files->fd[fd] = file; + fdt = files_fdtable(files); + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); } @@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd) { struct file * filp; struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); - if (fd >= files->max_fds) + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) goto out_unlock; - filp = files->fd[fd]; + filp = fdt->fd[fd]; if (!filp) goto out_unlock; - files->fd[fd] = NULL; - FD_CLR(fd, files->close_on_exec); + rcu_assign_pointer(fdt->fd[fd], NULL); + FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); return filp_close(filp, files); diff --git a/fs/pipe.c b/fs/pipe.c index 2c7a23dde2d..66aa0b938d6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode) { DEFINE_WAIT(wait); - prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE); + /* + * Pipes are system-local resources, so sleeping on them + * is considered a noninteractive wait: + */ + prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); up(PIPE_SEM(*inode)); schedule(); finish_wait(PIPE_WAIT(*inode), &wait); diff --git a/fs/proc/array.c b/fs/proc/array.c index 37668fe998a..d88d518d30f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) { struct group_info *group_info; int g; + struct fdtable *fdt = NULL; read_lock(&tasklist_lock); buffer += sprintf(buffer, @@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer) p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); task_lock(p); + if (p->files) + fdt = files_fdtable(p->files); buffer += sprintf(buffer, "FDSize:\t%d\n" "Groups:\t", - p->files ? p->files->max_fds : 0); + fdt ? fdt->max_fds : 0); group_info = p->group_info; get_group_info(group_info); diff --git a/fs/proc/base.c b/fs/proc/base.c index 84751f3f52d..23db452ab42 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -62,6 +62,7 @@ #include <linux/namespace.h> #include <linux/mm.h> #include <linux/smp_lock.h> +#include <linux/rcupdate.h> #include <linux/kallsyms.h> #include <linux/mount.h> #include <linux/security.h> @@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); return 0; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } return -ENOENT; @@ -1039,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) int retval; char buf[NUMBUF]; struct files_struct * files; + struct fdtable *fdt; retval = -ENOENT; if (!pid_alive(p)) @@ -1061,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) files = get_files_struct(p); if (!files) goto out; - spin_lock(&files->file_lock); + rcu_read_lock(); + fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < files->max_fds; + fd < fdt->max_fds; fd++, filp->f_pos++) { unsigned int i,j; if (!fcheck_files(files, fd)) continue; - spin_unlock(&files->file_lock); + rcu_read_unlock(); j = NUMBUF; i = fd; @@ -1081,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ino = fake_ino(tid, PROC_TID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - spin_lock(&files->file_lock); + rcu_read_lock(); break; } - spin_lock(&files->file_lock); + rcu_read_lock(); } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } out: @@ -1261,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); if (fcheck_files(files, fd)) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { inode->i_uid = task->euid; @@ -1275,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) security_task_to_inode(task, inode); return 1; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } d_drop(dentry); @@ -1367,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -1375,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1385,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, return NULL; out_unlock2: - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); out_unlock: iput(inode); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 133c2868510..effa6c0c467 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode) struct proc_dir_entry *de; struct task_struct *tsk; + truncate_inode_pages(&inode->i_data, 0); + /* Let go of any associated process */ tsk = PROC_I(inode)->task; if (tsk) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index b79162a3547..80f32911c0c 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode) static void qnx4_delete_inode(struct inode *inode) { QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); + truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; qnx4_truncate(inode); lock_kernel(); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ff291c973a5..1a8a1bf2154 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode) 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); struct reiserfs_transaction_handle th; + truncate_inode_pages(&inode->i_data, 0); + reiserfs_write_lock(inode->i_sb); /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index a8e29e9bbbd..4b15761434b 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) struct reiserfs_journal *journal = SB_JOURNAL(sb); unsigned long bcount = journal->j_bcount; while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1); + schedule_timeout_uninterruptible(1); journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; while ((atomic_read(&journal->j_wcount) > 0 || atomic_read(&journal->j_jlock)) && diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 6951c35755b..44b02fc02eb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (SB_AP_BITMAP(s)) brelse(SB_AP_BITMAP(s)[j].bh); } - if (SB_AP_BITMAP(s)) - vfree(SB_AP_BITMAP(s)); + vfree(SB_AP_BITMAP(s)); } if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); diff --git a/fs/select.c b/fs/select.c index b80e7eb0ac0..f10a10317d5 100644 --- a/fs/select.c +++ b/fs/select.c @@ -22,6 +22,7 @@ #include <linux/personality.h> /* for STICKY_TIMEOUTS */ #include <linux/file.h> #include <linux/fs.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> @@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) unsigned long *open_fds; unsigned long set; int max; + struct fdtable *fdt; /* handle last in-complete long-word first */ set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; - open_fds = current->files->open_fds->fds_bits+n; + fdt = files_fdtable(current->files); + open_fds = fdt->open_fds->fds_bits+n; max = 0; if (set) { set &= BITS(fds, n); @@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout) int retval, i; long __timeout = *timeout; - spin_lock(¤t->files->file_lock); + rcu_read_lock(); retval = max_select_fd(n, fds); - spin_unlock(¤t->files->file_lock); + rcu_read_unlock(); if (retval < 0) return retval; @@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s char *bits; long timeout; int ret, size, max_fdset; + struct fdtable *fdt; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ - max_fdset = current->files->max_fdset; + rcu_read_lock(); + fdt = files_fdtable(current->files); + max_fdset = fdt->max_fdset; + rcu_read_unlock(); if (n > max_fdset) n = max_fdset; @@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti unsigned int i; struct poll_list *head; struct poll_list *walk; + struct fdtable *fdt; + int max_fdset; /* Do a sanity check on nfds ... */ - if (nfds > current->files->max_fdset && nfds > OPEN_MAX) + rcu_read_lock(); + fdt = files_fdtable(current->files); + max_fdset = fdt->max_fdset; + rcu_read_unlock(); + if (nfds > max_fdset && nfds > OPEN_MAX) return -EINVAL; if (timeout) { diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 4765aaac9fd..10b994428fe 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -331,6 +331,7 @@ static void smb_delete_inode(struct inode *ino) { DEBUG1("ino=%ld\n", ino->i_ino); + truncate_inode_pages(&ino->i_data, 0); lock_kernel(); if (smb_close(ino)) PARANOIA("could not close inode %ld\n", ino->i_ino); diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 220babe91ef..38ab558835c 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir, if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) { /* a damn Win95 bug - sometimes it clags if you ask it too fast */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/5); + schedule_timeout_interruptible(msecs_to_jiffies(200)); continue; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 0530077d9dd..fa33eceb001 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode) static void sysv_delete_inode(struct inode *inode) { + truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; sysv_truncate(inode); lock_kernel(); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 3d68de39fad..b83890beaaa 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); */ void udf_delete_inode(struct inode * inode) { + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) goto no_delete; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 718627ca8b5..55f4aa16e3f 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode) void ufs_delete_inode (struct inode * inode) { + truncate_inode_pages(&inode->i_data, 0); /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ lock_kernel(); mark_inode_dirty(inode); diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index c92306f0fdc..8e8f32dabe5 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,5 +1,3 @@ -menu "XFS support" - config XFS_FS tristate "XFS filesystem support" select EXPORTFS if NFSD!=n @@ -22,27 +20,11 @@ config XFS_FS config XFS_EXPORT bool - default y if XFS_FS && EXPORTFS - -config XFS_RT - bool "Realtime support (EXPERIMENTAL)" - depends on XFS_FS && EXPERIMENTAL - help - If you say Y here you will be able to mount and use XFS filesystems - which contain a realtime subvolume. The realtime subvolume is a - separate area of disk space where only file data is stored. The - realtime subvolume is designed to provide very deterministic - data rates suitable for media streaming applications. - - See the xfs man page in section 5 for a bit more information. - - This feature is unsupported at this time, is not yet fully - functional, and may cause serious problems. - - If unsure, say N. + depends on XFS_FS && EXPORTFS + default y config XFS_QUOTA - bool "Quota support" + tristate "XFS Quota support" depends on XFS_FS help If you say Y here, you will be able to set limits for disk usage on @@ -59,7 +41,7 @@ config XFS_QUOTA they are completely independent subsystems. config XFS_SECURITY - bool "Security Label support" + bool "XFS Security Label support" depends on XFS_FS help Security labels support alternative access control models @@ -71,7 +53,7 @@ config XFS_SECURITY extended attributes for inode security labels, say N. config XFS_POSIX_ACL - bool "POSIX ACL support" + bool "XFS POSIX ACL support" depends on XFS_FS help POSIX Access Control Lists (ACLs) support permissions for users and @@ -82,4 +64,19 @@ config XFS_POSIX_ACL If you don't know what Access Control Lists are, say N. -endmenu +config XFS_RT + bool "XFS Realtime support (EXPERIMENTAL)" + depends on XFS_FS && EXPERIMENTAL + help + If you say Y here you will be able to mount and use XFS filesystems + which contain a realtime subvolume. The realtime subvolume is a + separate area of disk space where only file data is stored. The + realtime subvolume is designed to provide very deterministic + data rates suitable for media streaming applications. + + See the xfs man page in section 5 for a bit more information. + + This feature is unsupported at this time, is not yet fully + functional, and may cause serious problems. + + If unsure, say N. diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index fbfcbe5a7cd..d8c87fa21ad 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -1,5 +1,5 @@ # -# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of version 2 of the GNU General Public License as @@ -55,7 +55,18 @@ ifeq ($(CONFIG_XFS_TRACE),y) endif obj-$(CONFIG_XFS_FS) += xfs.o -obj-$(CONFIG_XFS_QUOTA) += quota/ + +xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ + xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o) + +ifeq ($(CONFIG_XFS_QUOTA),y) +xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o +endif xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h index 6c6fd0faa8e..b0d2873ab27 100644 --- a/fs/xfs/linux-2.6/time.h +++ b/fs/xfs/linux-2.6/time.h @@ -39,8 +39,7 @@ typedef struct timespec timespec_t; static inline void delay(long ticks) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ticks); + schedule_timeout_uninterruptible(ticks); } static inline void nanotime(struct timespec *tvp) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 655bf4a78af..e82cf72ac59 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1780,10 +1780,10 @@ xfsbufd( xfsbufd_force_sleep = 0; } - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); + schedule_timeout_interruptible + (xfs_buf_timer_centisecs * msecs_to_jiffies(10)); - age = (xfs_buf_age_centisecs * HZ) / 100; + age = xfs_buf_age_centisecs * msecs_to_jiffies(10); spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 0da87bfc999..2302454d8d4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -467,7 +467,7 @@ xfs_flush_inode( igrab(inode); xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work); - delay(HZ/2); + delay(msecs_to_jiffies(500)); } /* @@ -492,7 +492,7 @@ xfs_flush_device( igrab(inode); xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work); - delay(HZ/2); + delay(msecs_to_jiffies(500)); xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } @@ -520,10 +520,9 @@ xfssyncd( struct vfs_sync_work *work, *n; LIST_HEAD (tmp); - timeleft = (xfs_syncd_centisecs * HZ) / 100; + timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - timeleft = schedule_timeout(timeleft); + timeleft = schedule_timeout_interruptible(timeleft); /* swsusp */ try_to_freeze(); if (kthread_should_stop()) @@ -537,7 +536,8 @@ xfssyncd( */ if (!timeleft || list_empty(&vfsp->vfs_sync_list)) { if (!timeleft) - timeleft = (xfs_syncd_centisecs * HZ) / 100; + timeleft = xfs_syncd_centisecs * + msecs_to_jiffies(10); INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list); list_add_tail(&vfsp->vfs_sync_work.w_list, &vfsp->vfs_sync_list); diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6 index 8b7b676718b..93e60e83935 100644 --- a/fs/xfs/quota/Makefile-linux-2.6 +++ b/fs/xfs/quota/Makefile-linux-2.6 @@ -41,13 +41,13 @@ ifeq ($(CONFIG_XFS_TRACE),y) EXTRA_CFLAGS += -DXFS_VNODE_TRACE endif -obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o - -xfs_quota-y += xfs_dquot.o \ +xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_dquot_item.o \ xfs_trans_dquot.o \ xfs_qm_syscalls.o \ xfs_qm_bhv.o \ xfs_qm.o -xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o +ifeq ($(CONFIG_XFS_QUOTA),y) +xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o +endif diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 3dae14c8c55..fa8394f9437 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -170,7 +170,7 @@ ktrace_enter( void *val14, void *val15) { - static lock_t wrap_lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(wrap_lock); unsigned long flags; int index; ktrace_entry_t *ktep; diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index ae35189b3d7..5ab0dd885b1 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -40,22 +40,28 @@ #include <asm/byteorder.h> -#ifdef __LITTLE_ENDIAN -# define __BYTE_ORDER __LITTLE_ENDIAN -#endif #ifdef __BIG_ENDIAN -# define __BYTE_ORDER __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +#else /* __KERNEL__ */ + +#if __BYTE_ORDER == __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST #endif #endif /* __KERNEL__ */ /* do we need conversion? */ - #define ARCH_NOCONVERT 1 -#if __BYTE_ORDER == __LITTLE_ENDIAN -# define ARCH_CONVERT 0 -#else +#ifdef XFS_NATIVE_HOST # define ARCH_CONVERT ARCH_NOCONVERT +#else +# define ARCH_CONVERT 0 #endif /* generic swapping macros */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 09c413576ba..09a77b17565 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2017,7 +2017,7 @@ xfs_bmbt_get_state( ext_flag); } -#if __BYTE_ORDER != __BIG_ENDIAN +#ifndef XFS_NATIVE_HOST /* Endian flipping versions of the bmbt extraction functions */ void xfs_bmbt_disk_get_all( @@ -2087,7 +2087,7 @@ xfs_bmbt_disk_get_state( return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r), ext_flag); } -#endif +#endif /* XFS_NATIVE_HOST */ /* @@ -2531,7 +2531,7 @@ xfs_bmbt_set_allf( #endif /* XFS_BIG_BLKNOS */ } -#if __BYTE_ORDER != __BIG_ENDIAN +#ifndef XFS_NATIVE_HOST /* * Set all the fields in a bmap extent record from the uncompressed form. */ @@ -2617,7 +2617,7 @@ xfs_bmbt_disk_set_allf( } #endif /* XFS_BIG_BLKNOS */ } -#endif +#endif /* XFS_NATIVE_HOST */ /* * Set the blockcount field in a bmap extent record. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0a40cf126c2..2cf4fe45cbc 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -62,7 +62,7 @@ typedef struct xfs_bmdr_block * l1:0-20 are blockcount. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#ifndef XFS_NATIVE_HOST #define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ #define BMBT_EXNTFLAG_BITOFF 0 @@ -87,7 +87,7 @@ typedef struct xfs_bmdr_block #define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */ #define BMBT_BLOCKCOUNT_BITLEN 21 -#endif +#endif /* XFS_NATIVE_HOST */ #define BMBT_USE_64 1 @@ -505,7 +505,7 @@ xfs_exntst_t xfs_bmbt_get_state( xfs_bmbt_rec_t *r); -#if __BYTE_ORDER != __BIG_ENDIAN +#ifndef XFS_NATIVE_HOST void xfs_bmbt_disk_get_all( xfs_bmbt_rec_t *r, @@ -538,7 +538,7 @@ xfs_bmbt_disk_get_startoff( xfs_bmbt_get_blockcount(r) #define xfs_bmbt_disk_get_startoff(r) \ xfs_bmbt_get_startoff(r) -#endif +#endif /* XFS_NATIVE_HOST */ int xfs_bmbt_increment( @@ -623,7 +623,7 @@ xfs_bmbt_set_state( xfs_bmbt_rec_t *r, xfs_exntst_t v); -#if __BYTE_ORDER != __BIG_ENDIAN +#ifndef XFS_NATIVE_HOST void xfs_bmbt_disk_set_all( xfs_bmbt_rec_t *r, @@ -641,7 +641,7 @@ xfs_bmbt_disk_set_allf( xfs_bmbt_set_all(r, s) #define xfs_bmbt_disk_set_allf(r, o, b, c, v) \ xfs_bmbt_set_allf(r, o, b, c, v) -#endif +#endif /* XFS_NATIVE_HOST */ void xfs_bmbt_to_bmdr( diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h index dd423ce1bc8..480bffc1f29 100644 --- a/fs/xfs/xfs_dir_leaf.h +++ b/fs/xfs/xfs_dir_leaf.h @@ -127,13 +127,13 @@ typedef union { * Watch the order here (endian-ness dependent). */ struct { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#ifndef XFS_NATIVE_HOST xfs_dahash_t h; /* hash value */ __uint32_t be; /* block and entry */ -#else /* __BYTE_ORDER == __BIG_ENDIAN */ +#else __uint32_t be; /* block and entry */ xfs_dahash_t h; /* hash value */ -#endif /* __BYTE_ORDER == __BIG_ENDIAN */ +#endif /* XFS_NATIVE_HOST */ } s; } xfs_dircook_t; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 276ec70eb7f..50e2cadf909 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -341,7 +341,7 @@ xfs_inode_item_format( nrecs = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nrecs > 0); -#if __BYTE_ORDER == __BIG_ENDIAN +#ifdef XFS_NATIVE_HOST if (nrecs == ip->i_d.di_nextents) { /* * There are no delayed allocation @@ -473,7 +473,7 @@ xfs_inode_item_format( #endif ASSERT(nrecs > 0); ASSERT(nrecs == ip->i_d.di_anextents); -#if __BYTE_ORDER == __BIG_ENDIAN +#ifdef XFS_NATIVE_HOST /* * There are not delayed allocation extents * for attributes, so just point at the array. diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index eb7fdc6ebc3..a884cea82fc 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -112,7 +112,7 @@ struct xfs_mount; * this has endian issues, of course. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#ifndef XFS_NATIVE_HOST #define GET_CLIENT_ID(i,arch) \ ((i) & 0xff) #else @@ -414,14 +414,10 @@ typedef struct xlog_op_header { #define XLOG_FMT_IRIX_BE 3 /* our fmt */ -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define XLOG_FMT XLOG_FMT_LINUX_LE -#else -#if __BYTE_ORDER == __BIG_ENDIAN +#ifdef XFS_NATIVE_HOST #define XLOG_FMT XLOG_FMT_LINUX_BE #else -#error unknown byte order -#endif +#define XLOG_FMT XLOG_FMT_LINUX_LE #endif typedef struct xlog_rec_header { |