aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/auth_x.c53
-rw-r--r--fs/ceph/caps.c73
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c43
-rw-r--r--fs/ceph/messenger.c19
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c29
-rw-r--r--fs/ceph/osd_client.h2
-rw-r--r--fs/ceph/osdmap.c17
-rw-r--r--fs/ceph/snap.c6
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/super.c29
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fscache/object.c6
-rw-r--r--fs/fscache/operation.c4
-rw-r--r--fs/logfs/dev_bdev.c9
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/journal.c7
-rw-r--r--fs/logfs/logfs.h1
-rw-r--r--fs/logfs/readwrite.c13
-rw-r--r--fs/logfs/segment.c54
-rw-r--r--fs/logfs/super.c15
-rw-r--r--fs/namei.c18
-rw-r--r--fs/nilfs2/segbuf.c8
-rw-r--r--fs/nilfs2/segment.c15
-rw-r--r--fs/ocfs2/acl.c77
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/inode.c15
-rw-r--r--fs/ocfs2/localalloc.c10
-rw-r--r--fs/ocfs2/locks.c2
-rw-r--r--fs/ocfs2/namei.c28
-rw-r--r--fs/ocfs2/ocfs2.h14
-rw-r--r--fs/ocfs2/refcounttree.c1
-rw-r--r--fs/ocfs2/suballoc.c129
-rw-r--r--fs/ocfs2/suballoc.h5
-rw-r--r--fs/ocfs2/xattr.c12
-rw-r--r--fs/proc/base.c5
42 files changed, 530 insertions, 248 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ceabe3..ce8ef610772 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode,
/*
* We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context.
+ *
+ * called with page locked.
+ * return success with page locked,
+ * or any failure (incl -EAGAIN) with page unlocked.
*/
static int ceph_update_writeable_page(struct file *file,
loff_t pos, unsigned len,
@@ -961,9 +965,11 @@ retry_locked:
snapc = ceph_get_snap_context((void *)page->private);
unlock_page(page);
ceph_queue_writeback(inode);
- wait_event_interruptible(ci->i_cap_wq,
+ r = wait_event_interruptible(ci->i_cap_wq,
context_is_writeable_or_written(inode, snapc));
ceph_put_snap_context(snapc);
+ if (r == -ERESTARTSYS)
+ return r;
return -EAGAIN;
}
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
int r;
do {
- /* get a page*/
+ /* get a page */
page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
return -ENOMEM;
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index f0318427b6d..8d8a8496476 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
return (ac->want_keys & xi->have_keys) == ac->want_keys;
}
+static int ceph_x_encrypt_buflen(int ilen)
+{
+ return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
+ sizeof(u32);
+}
+
static int ceph_x_encrypt(struct ceph_crypto_key *secret,
void *ibuf, int ilen, void *obuf, size_t olen)
{
@@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
struct timespec validity;
struct ceph_crypto_key old_key;
void *tp, *tpend;
+ struct ceph_timespec new_validity;
+ struct ceph_crypto_key new_session_key;
+ struct ceph_buffer *new_ticket_blob;
+ unsigned long new_expires, new_renew_after;
+ u64 new_secret_id;
ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
@@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
goto bad;
memcpy(&old_key, &th->session_key, sizeof(old_key));
- ret = ceph_crypto_key_decode(&th->session_key, &dp, dend);
+ ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
if (ret)
goto out;
- ceph_decode_copy(&dp, &th->validity, sizeof(th->validity));
- ceph_decode_timespec(&validity, &th->validity);
- th->expires = get_seconds() + validity.tv_sec;
- th->renew_after = th->expires - (validity.tv_sec / 4);
- dout(" expires=%lu renew_after=%lu\n", th->expires,
- th->renew_after);
+ ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+ ceph_decode_timespec(&validity, &new_validity);
+ new_expires = get_seconds() + validity.tv_sec;
+ new_renew_after = new_expires - (validity.tv_sec / 4);
+ dout(" expires=%lu renew_after=%lu\n", new_expires,
+ new_renew_after);
/* ticket blob for service */
ceph_decode_8_safe(&p, end, is_enc, bad);
@@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
dout(" ticket blob is %d bytes\n", dlen);
ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
struct_v = ceph_decode_8(&tp);
- th->secret_id = ceph_decode_64(&tp);
- ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend);
+ new_secret_id = ceph_decode_64(&tp);
+ ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
if (ret)
goto out;
+
+ /* all is well, update our ticket */
+ ceph_crypto_key_destroy(&th->session_key);
+ if (th->ticket_blob)
+ ceph_buffer_put(th->ticket_blob);
+ th->session_key = new_session_key;
+ th->ticket_blob = new_ticket_blob;
+ th->validity = new_validity;
+ th->secret_id = new_secret_id;
+ th->expires = new_expires;
+ th->renew_after = new_renew_after;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
@@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
struct ceph_x_ticket_handler *th,
struct ceph_x_authorizer *au)
{
- int len;
+ int maxlen;
struct ceph_x_authorize_a *msg_a;
struct ceph_x_authorize_b msg_b;
void *p, *end;
@@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout("build_authorizer for %s %p\n",
ceph_entity_type_name(th->service), au);
- len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) +
- ticket_blob_len + 16;
- dout(" need len %d\n", len);
- if (au->buf && au->buf->alloc_len < len) {
+ maxlen = sizeof(*msg_a) + sizeof(msg_b) +
+ ceph_x_encrypt_buflen(ticket_blob_len);
+ dout(" need len %d\n", maxlen);
+ if (au->buf && au->buf->alloc_len < maxlen) {
ceph_buffer_put(au->buf);
au->buf = NULL;
}
if (!au->buf) {
- au->buf = ceph_buffer_new(len, GFP_NOFS);
+ au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
if (!au->buf)
return -ENOMEM;
}
@@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
(int)au->buf->vec.iov_len);
+ BUG_ON(au->buf->vec.iov_len > maxlen);
return 0;
out_buf:
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb357b..7d0a0d0adc1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
+ __releases(session->s_mutex)
{
struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_cap *cap;
int file_wanted, used;
int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
- int drop_session_lock = session ? 0 : 1;
int issued, implemented, want, retain, revoking, flushing = 0;
int mds = -1; /* keep track of how far we've gone through i_caps list
to avoid an infinite loop on retry */
@@ -1639,7 +1639,7 @@ ack:
if (queue_invalidate)
ceph_queue_invalidate(inode);
- if (session && drop_session_lock)
+ if (session)
mutex_unlock(&session->s_mutex);
if (took_snap_rwsem)
up_read(&mdsc->snap_rwsem);
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
* Handle a cap GRANT message from the MDS. (Note that a GRANT may
* actually be a revocation if it specifies a smaller cap set.)
*
- * caller holds s_mutex.
+ * caller holds s_mutex and i_lock, we drop both.
+ *
* return value:
* 0 - ok
* 1 - check_caps on auth cap only (writeback)
* 2 - check_caps (ack revoke)
*/
-static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
- struct ceph_mds_session *session,
- struct ceph_cap *cap,
- struct ceph_buffer *xattr_buf)
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+ struct ceph_mds_session *session,
+ struct ceph_cap *cap,
+ struct ceph_buffer *xattr_buf)
__releases(inode->i_lock)
-
+ __releases(session->s_mutex)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
struct timespec mtime, atime, ctime;
- int reply = 0;
+ int check_caps = 0;
int wake = 0;
int writeback = 0;
int revoked_rdcache = 0;
@@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
writeback = 1; /* will delay ack */
else if (dirty & ~newcaps)
- reply = 1; /* initiate writeback in check_caps */
+ check_caps = 1; /* initiate writeback in check_caps */
else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
revoked_rdcache)
- reply = 2; /* send revoke ack in check_caps */
+ check_caps = 2; /* send revoke ack in check_caps */
cap->issued = newcaps;
+ cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
dout("caps unchanged: %s -> %s\n",
ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
* pending revocation */
wake = 1;
}
+ BUG_ON(cap->issued & ~cap->implemented);
spin_unlock(&inode->i_lock);
if (writeback)
@@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
ceph_queue_invalidate(inode);
if (wake)
wake_up(&ci->i_cap_wq);
- return reply;
+
+ if (check_caps == 1)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+ session);
+ else if (check_caps == 2)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+ else
+ mutex_unlock(&session->s_mutex);
}
/*
@@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
ci->i_cap_exporting_issued = cap->issued;
}
__ceph_remove_cap(cap);
- } else {
- WARN_ON(!cap);
}
+ /* else, we already released it */
spin_unlock(&inode->i_lock);
}
@@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
- int check_caps = 0;
void *snaptrace;
- int r;
dout("handle_caps from mds%d\n", mds);
@@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
snaptrace, le32_to_cpu(h->snap_trace_len));
- check_caps = 1; /* we may have sent a RELEASE to the old auth */
- goto done;
+ ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
+ session);
+ goto done_unlocked;
}
/* the rest require a cap */
@@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
- r = handle_cap_grant(inode, h, session, cap, msg->middle);
- if (r == 1)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
- session);
- else if (r == 2)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY,
- session);
- break;
+ handle_cap_grant(inode, h, session, cap, msg->middle);
+ goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
done:
mutex_unlock(&session->s_mutex);
-
- if (check_caps)
- ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
+done_unlocked:
if (inode)
iput(inode);
return;
@@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
int ret = 0;
-
- dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
- mds, ceph_cap_string(drop), ceph_cap_string(unless));
+ int used = 0;
spin_lock(&inode->i_lock);
+ used = __ceph_caps_used(ci);
+
+ dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
+ mds, ceph_cap_string(used), ceph_cap_string(drop),
+ ceph_cap_string(unless));
+
+ /* only drop unused caps */
+ drop &= ~used;
+
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
if (force ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5107384ee02..8a9116e15b7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -288,8 +288,10 @@ more:
CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
/* discard old result, if any */
- if (fi->last_readdir)
+ if (fi->last_readdir) {
ceph_mdsc_put_request(fi->last_readdir);
+ fi->last_readdir = NULL;
+ }
/* requery frag tree, as the frag topology may have changed */
frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7abe1aed819..aca82d55cc5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode)
ceph_queue_caps_release(inode);
+ /*
+ * we may still have a snap_realm reference if there are stray
+ * caps in i_cap_exporting_issued or i_snap_caps.
+ */
+ if (ci->i_snap_realm) {
+ struct ceph_mds_client *mdsc =
+ &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+
+ dout(" dropping residual ref to snap realm %p\n", realm);
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(mdsc, realm);
+ }
+
kfree(ci->i_symlink);
while ((n = rb_first(&ci->i_fragtree)) != NULL) {
frag = rb_entry(n, struct ceph_inode_frag, node);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a2600101ec2..5c7920be642 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
struct ceph_mds_session *s;
s = kzalloc(sizeof(*s), GFP_NOFS);
+ if (!s)
+ return ERR_PTR(-ENOMEM);
s->s_mdsc = mdsc;
s->s_mds = mds;
s->s_state = CEPH_MDS_SESSION_NEW;
@@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &mdsc->request_tree);
- ceph_mdsc_put_request(req);
+ RB_CLEAR_NODE(&req->r_node);
if (req->r_unsafe_dir) {
struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
@@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
}
+
+ ceph_mdsc_put_request(req);
}
/*
@@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
if (time_after_eq(jiffies, session->s_cap_ttl) &&
time_after_eq(session->s_cap_ttl, session->s_renew_requested))
pr_info("mds%d caps stale\n", session->s_mds);
+ session->s_renew_requested = jiffies;
/* do not try to renew caps until a recovering mds has reconnected
* with its clients. */
@@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
ceph_mds_state_name(state));
- session->s_renew_requested = jiffies;
msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
++session->s_renew_seq);
if (IS_ERR(msg))
@@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc,
/* get, open session */
session = __ceph_lookup_mds_session(mdsc, mds);
- if (!session)
+ if (!session) {
session = register_session(mdsc, mds);
+ if (IS_ERR(session)) {
+ err = PTR_ERR(session);
+ goto finish;
+ }
+ }
dout("do_request mds%d session %p state %s\n", mds, session,
session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
@@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
dout("handle_reply %p\n", req);
/* correct session? */
- if (!req->r_session && req->r_session != session) {
+ if (req->r_session != session) {
pr_err("mdsc_handle_reply got %llu on session mds%d"
" not mds%d\n", tid, session->s_mds,
req->r_session ? req->r_session->s_mds : -1);
@@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
*/
static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
{
- struct ceph_mds_request *req = NULL;
+ struct ceph_mds_request *req = NULL, *nextreq;
struct rb_node *n;
mutex_lock(&mdsc->mutex);
dout("wait_unsafe_requests want %lld\n", want_tid);
+restart:
req = __get_oldest_req(mdsc);
while (req && req->r_tid <= want_tid) {
+ /* find next request */
+ n = rb_next(&req->r_node);
+ if (n)
+ nextreq = rb_entry(n, struct ceph_mds_request, r_node);
+ else
+ nextreq = NULL;
if ((req->r_op & CEPH_MDS_OP_WRITE)) {
/* write op */
ceph_mdsc_get_request(req);
+ if (nextreq)
+ ceph_mdsc_get_request(nextreq);
mutex_unlock(&mdsc->mutex);
dout("wait_unsafe_requests wait on %llu (want %llu)\n",
req->r_tid, want_tid);
wait_for_completion(&req->r_safe_completion);
mutex_lock(&mdsc->mutex);
- n = rb_next(&req->r_node);
ceph_mdsc_put_request(req);
- } else {
- n = rb_next(&req->r_node);
+ if (!nextreq)
+ break; /* next dne before, so we're done! */
+ if (RB_EMPTY_NODE(&nextreq->r_node)) {
+ /* next request was removed from tree */
+ ceph_mdsc_put_request(nextreq);
+ goto restart;
+ }
+ ceph_mdsc_put_request(nextreq); /* won't go away */
}
- if (!n)
- break;
- req = rb_entry(n, struct ceph_mds_request, r_node);
+ req = nextreq;
}
mutex_unlock(&mdsc->mutex);
dout("wait_unsafe_requests done\n");
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 781656a49bf..a32f0f896d9 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
}
/*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+ return con->connect_seq > 0;
+}
+
+/*
* generic get/put
*/
struct ceph_connection *ceph_con_get(struct ceph_connection *con)
@@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con)
con->in_base_pos = 0;
}
-static void prepare_read_connect_retry(struct ceph_connection *con)
-{
- dout("prepare_read_connect_retry %p\n", con);
- con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
- + sizeof(con->peer_addr_for_me);
-}
-
static void prepare_read_ack(struct ceph_connection *con)
{
dout("prepare_read_ack %p\n", con);
@@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con)
}
con->auth_retry = 1;
prepare_write_connect(con->msgr, con, 0);
- prepare_read_connect_retry(con);
+ prepare_read_connect(con);
break;
case CEPH_MSGR_TAG_RESETSESSION:
@@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con)
goto out;
}
- clear_bit(BUSY, &con->state); /* to avoid an improbable race */
-
mutex_lock(&con->mutex);
if (test_bit(CLOSED, &con->state))
goto out_unlock;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa591111..a343dae73cd 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
struct ceph_connection *con);
extern void ceph_con_open(struct ceph_connection *con,
struct ceph_entity_addr *addr);
+extern bool ceph_con_opened(struct ceph_connection *con);
extern void ceph_con_close(struct ceph_connection *con);
extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dbe63db9762..c7b4dedaace 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
*/
static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
{
+ struct ceph_osd_request *req;
int ret = 0;
dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
if (list_empty(&osd->o_requests)) {
__remove_osd(osdc, osd);
+ } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
+ &osd->o_con.peer_addr,
+ sizeof(osd->o_con.peer_addr)) == 0 &&
+ !ceph_con_opened(&osd->o_con)) {
+ dout(" osd addr hasn't changed and connection never opened,"
+ " letting msgr retry");
+ /* touch each r_stamp for handle_timeout()'s benfit */
+ list_for_each_entry(req, &osd->o_requests, r_osd_item)
+ req->r_stamp = jiffies;
+ ret = -EAGAIN;
} else {
ceph_con_close(&osd->o_con);
ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc,
reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */
reqhead->reassert_version = req->r_reassert_version;
- req->r_sent_stamp = jiffies;
+ req->r_stamp = jiffies;
list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
ceph_msg_get(req->r_request); /* send consumes a ref */
@@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work)
unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
unsigned long keepalive =
osdc->client->mount_args->osd_keepalive_timeout * HZ;
- unsigned long last_sent = 0;
+ unsigned long last_stamp = 0;
struct rb_node *p;
struct list_head slow_osds;
@@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work)
req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
r_req_lru_item);
- if (time_before(jiffies, req->r_sent_stamp + timeout))
+ if (time_before(jiffies, req->r_stamp + timeout))
break;
- BUG_ON(req == last_req && req->r_sent_stamp == last_sent);
+ BUG_ON(req == last_req && req->r_stamp == last_stamp);
last_req = req;
- last_sent = req->r_sent_stamp;
+ last_stamp = req->r_stamp;
osd = req->r_osd;
BUG_ON(!osd);
@@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work)
*/
INIT_LIST_HEAD(&slow_osds);
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
- if (time_before(jiffies, req->r_sent_stamp + keepalive))
+ if (time_before(jiffies, req->r_stamp + keepalive))
break;
osd = req->r_osd;
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
if (kickosd) {
- __reset_osd(osdc, kickosd);
+ err = __reset_osd(osdc, kickosd);
+ if (err == -EAGAIN)
+ return 1;
} else {
for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd =
@@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc,
kick:
dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
- req->r_osd->o_osd);
+ req->r_osd ? req->r_osd->o_osd : -1);
req->r_flags |= CEPH_OSD_FLAG_RETRY;
err = __send_request(osdc, req);
if (err) {
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 1b1a3ca43af..b0759911e7c 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -70,7 +70,7 @@ struct ceph_osd_request {
char r_oid[40]; /* object name */
int r_oid_len;
- unsigned long r_sent_stamp;
+ unsigned long r_stamp; /* send OR check time */
bool r_resend; /* msg send failed, needs retry */
struct ceph_file_layout r_file_layout;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index b83f2692b83..d82fe87c2a6 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
return NULL;
}
+void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
+{
+ ceph_decode_copy(p, &pi->v, sizeof(pi->v));
+ calc_pg_masks(pi);
+ *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
+ *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+}
+
/*
* decode a full map.
*/
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ev, CEPH_PG_POOL_VERSION);
goto bad;
}
- ceph_decode_copy(p, &pi->v, sizeof(pi->v));
+ __decode_pool(p, pi);
__insert_pg_pool(&map->pg_pools, pi);
- calc_pg_masks(pi);
- *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
- *p += le32_to_cpu(pi->v.num_removed_snap_intervals)
- * sizeof(u64) * 2;
}
ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
pi->id = pool;
__insert_pg_pool(&map->pg_pools, pi);
}
- ceph_decode_copy(p, &pi->v, sizeof(pi->v));
- calc_pg_masks(pi);
+ __decode_pool(p, pi);
}
/* old_pool */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index bf2a5f3846a..df04e210a05 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
because we rebuild_snap_realms() works _downward_ in
hierarchy after each update.) */
if (realm->cached_context &&
- realm->cached_context->seq <= realm->seq &&
+ realm->cached_context->seq == realm->seq &&
(!parent ||
- realm->cached_context->seq <= parent->cached_context->seq)) {
+ realm->cached_context->seq >= parent->cached_context->seq)) {
dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
" (unchanged)\n",
realm->ino, realm, realm->cached_context,
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
* queued (again) by ceph_update_snap_trace()
* below. Queue it _now_, under the old context.
*/
+ spin_lock(&realm->inodes_with_caps_lock);
list_del_init(&ci->i_snap_realm_item);
+ spin_unlock(&realm->inodes_with_caps_lock);
spin_unlock(&inode->i_lock);
ceph_queue_cap_snap(ci,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ef9008b885b..0d0e97ed3ff 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -582,7 +582,9 @@ got:
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
- ei->i_state = EXT3_STATE_NEW;
+ ei->i_state_flags = 0;
+ ext3_set_inode_state(inode, EXT3_STATE_NEW);
+
ei->i_extra_isize =
(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7263a..ea33bdf0a30 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
- ei->i_state = 0;
+ ei->i_state_flags = 0;
ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 361c0b9962a..57f6eef6ccd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -263,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_group_t f;
f = ext4_flex_group(sbi, block_group);
- atomic_dec(&sbi->s_flex_groups[f].free_inodes);
+ atomic_dec(&sbi->s_flex_groups[f].used_dirs);
}
}
@@ -773,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb,
if (sbi->s_log_groups_per_flex) {
ext4_group_t f = ext4_flex_group(sbi, group);
- atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+ atomic_inc(&sbi->s_flex_groups[f].used_dirs);
}
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 986120f3006..11119e07233 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1035,7 +1035,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
sector_t lblock)
{
struct ext4_inode_info *ei = EXT4_I(inode);
- int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+ sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
int blk_bits;
if (lblock < EXT4_NDIR_BLOCKS)
@@ -1050,7 +1050,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
}
ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
ei->i_da_metadata_calc_len = 1;
- blk_bits = roundup_pow_of_two(lblock + 1);
+ blk_bits = order_base_2(lblock);
return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba191dae873..e14d22c170d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data, struct vfsmount *mnt);
+#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+static struct file_system_type ext3_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ext3",
+ .get_sb = ext4_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#else
+#define IS_EXT3_SB(sb) (0)
+#endif
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
@@ -2539,7 +2553,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* enable delayed allocation by default
* Use -o nodelalloc to turn it off
*/
- set_opt(sbi->s_mount_opt, DELALLOC);
+ if (!IS_EXT3_SB(sb))
+ set_opt(sbi->s_mount_opt, DELALLOC);
if (!parse_options((char *) data, sb, &journal_devnum,
&journal_ioprio, NULL, 0))
@@ -4068,7 +4083,7 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
}
-#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
@@ -4095,15 +4110,7 @@ static inline void register_as_ext2(void) { }
static inline void unregister_as_ext2(void) { }
#endif
-#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static struct file_system_type ext3_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext3",
- .get_sb = ext4_get_sb,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
+#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static inline void register_as_ext3(void)
{
int err = register_filesystem(&ext3_fs_type);
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c1ef5015486..6fcc7e71fba 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -309,7 +309,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
{
struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options;
wchar_t *ip, *ext_start, *end, *name_start;
- unsigned char base[9], ext[4], buf[8], *p;
+ unsigned char base[9], ext[4], buf[5], *p;
unsigned char charbuf[NLS_MAX_CHARSET_SIZE];
int chl, chi;
int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen;
@@ -467,7 +467,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
return 0;
}
- i = jiffies & 0xffff;
+ i = jiffies;
sz = (jiffies >> 16) & 0x7;
if (baselen > 2) {
baselen = numtail2_baselen;
@@ -476,7 +476,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
name_res[baselen + 4] = '~';
name_res[baselen + 5] = '1' + sz;
while (1) {
- sprintf(buf, "%04X", i);
+ snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
memcpy(&name_res[baselen], buf, 4);
if (vfat_find_form(dir, name_res) < 0)
break;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index e513ac599c8..0b589a9b4ff 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
static void fscache_object_slow_work_put_ref(struct slow_work *);
static int fscache_object_slow_work_get_ref(struct slow_work *);
static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
#endif
static void fscache_initialise_object(struct fscache_object *);
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
.get_ref = fscache_object_slow_work_get_ref,
.put_ref = fscache_object_slow_work_put_ref,
.execute = fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
.desc = fscache_object_slow_work_desc,
#endif
};
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
/*
* describe an object for slow-work debugging
*/
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
static void fscache_object_slow_work_desc(struct slow_work *work,
struct seq_file *m)
{
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 313e79a1426..9f6c928d458 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work)
/*
* describe an operation for slow-work debugging
*/
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
{
struct fscache_operation *op =
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = {
.get_ref = fscache_op_get_ref,
.put_ref = fscache_op_put_ref,
.execute = fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
.desc = fscache_op_desc,
#endif
};
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9718c22f186..a5d0c56d3eb 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err)
prefetchw(&bvec->bv_page->flags);
end_page_writeback(page);
+ page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
@@ -97,8 +98,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
int i;
+ if (max_pages > BIO_MAX_PAGES)
+ max_pages = BIO_MAX_PAGES;
bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio); /* FIXME: handle this */
+ BUG_ON(!bio);
for (i = 0; i < nr_pages; i++) {
if (i >= max_pages) {
@@ -191,8 +194,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
int i;
+ if (max_pages > BIO_MAX_PAGES)
+ max_pages = BIO_MAX_PAGES;
bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio); /* FIXME: handle this */
+ BUG_ON(!bio);
for (i = 0; i < nr_pages; i++) {
if (i >= max_pages) {
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 56a8bfbb012..c76b4b5c7ff 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
(filler_t *)logfs_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
- dd = kmap_atomic(page, KM_USER0);
+ dd = kmap(page);
BUG_ON(dd->namelen == 0);
full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
pos, be64_to_cpu(dd->ino), dd->type);
- kunmap_atomic(dd, KM_USER0);
+ kunmap(page);
page_cache_release(page);
if (full)
break;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 6ad30a4c905..d57c7b07b60 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
struct logfs_area *area = super->s_journal_area;
+ struct btree_head32 *head = &super->s_reserved_segments;
u32 segno, ec;
int i, err;
@@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
/* Drop old segments */
journal_for_each(i)
if (super->s_journal_seg[i]) {
+ btree_remove32(head, super->s_journal_seg[i]);
logfs_set_segment_unreserved(sb,
super->s_journal_seg[i],
super->s_journal_ec[i]);
@@ -819,8 +821,13 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
super->s_journal_seg[i] = segno;
super->s_journal_ec[i] = ec;
logfs_set_segment_reserved(sb, segno);
+ err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+ BUG_ON(err); /* mempool should prevent this */
+ err = logfs_erase_segment(sb, segno, 1);
+ BUG_ON(err); /* FIXME: remount-ro would be nicer */
}
/* Manually move journal_area */
+ freeseg(sb, area->a_segno);
area->a_segno = super->s_journal_seg[0];
area->a_is_open = 0;
area->a_used_bytes = 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 12977943137..b84b0eec602 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page);
int logfs_init_mapping(struct super_block *sb);
void logfs_sync_area(struct logfs_area *area);
void logfs_sync_segments(struct super_block *sb);
+void freeseg(struct super_block *sb, u32 segno);
/* area handling */
int logfs_init_areas(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7a23b3e7c0a..c3a3a6814b8 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index,
return ret;
}
-/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
gc_level_t gc_level, long flags)
{
@@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
if (level != 0)
alloc_indirect_block(inode, page, 0);
err = logfs_write_buf(inode, page, flags);
+ if (!err && shrink_level(gc_level) == 0) {
+ /* Rewrite cannot mark the inode dirty but has to
+ * write it immediatly.
+ * Q: Can't we just create an alias for the inode
+ * instead? And if not, why not?
+ */
+ if (inode->i_ino == LOGFS_INO_MASTER)
+ logfs_write_anchor(inode->i_sb);
+ else {
+ err = __logfs_write_inode(inode, flags);
+ }
+ }
}
logfs_put_write_page(page);
return err;
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 1a14f9910d5..0ecd8f07c11 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -93,50 +93,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
} while (len);
}
-/*
- * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
- */
-static void pad_wbuf(struct logfs_area *area, int final)
+static void pad_partial_page(struct logfs_area *area)
{
struct super_block *sb = area->a_sb;
- struct logfs_super *super = logfs_super(sb);
struct page *page;
u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
pgoff_t index = ofs >> PAGE_SHIFT;
long offset = ofs & (PAGE_SIZE-1);
u32 len = PAGE_SIZE - offset;
- if (len == PAGE_SIZE) {
- /* The math in this function can surely use some love */
- len = 0;
- }
- if (len) {
- BUG_ON(area->a_used_bytes >= super->s_segsize);
-
- page = get_mapping_page(area->a_sb, index, 0);
+ if (len % PAGE_SIZE) {
+ page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
memset(page_address(page) + offset, 0xff, len);
SetPagePrivate(page);
page_cache_release(page);
}
+}
- if (!final)
- return;
+static void pad_full_pages(struct logfs_area *area)
+{
+ struct super_block *sb = area->a_sb;
+ struct logfs_super *super = logfs_super(sb);
+ u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
+ u32 len = super->s_segsize - area->a_used_bytes;
+ pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
+ pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+ struct page *page;
- area->a_used_bytes += len;
- for ( ; area->a_used_bytes < super->s_segsize;
- area->a_used_bytes += PAGE_SIZE) {
- /* Memset another page */
- index++;
- page = get_mapping_page(area->a_sb, index, 0);
+ while (no_indizes) {
+ page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
- memset(page_address(page), 0xff, PAGE_SIZE);
+ SetPageUptodate(page);
+ memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
SetPagePrivate(page);
page_cache_release(page);
+ index++;
+ no_indizes--;
}
}
/*
+ * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
+ * Also make sure we allocate (and memset) all pages for final writeout.
+ */
+static void pad_wbuf(struct logfs_area *area, int final)
+{
+ pad_partial_page(area);
+ if (final)
+ pad_full_pages(area);
+}
+
+/*
* We have to be careful with the alias tree. Since lookup is done by bix,
* it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
* indirect blocks. So always use it through accessor functions.
@@ -683,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
return 0;
}
-static void freeseg(struct super_block *sb, u32 segno)
+void freeseg(struct super_block *sb, u32 segno)
{
struct logfs_super *super = logfs_super(sb);
struct address_space *mapping = super->s_mapping_inode->i_mapping;
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c66beab78de..9d856c49afc 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb)
}
if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
printk(KERN_INFO"Superblocks don't match - fixing.\n");
- return write_one_sb(sb, super->s_devops->find_last_sb);
+ return logfs_write_sb(sb);
}
/* If neither is valid now, something's wrong. Didn't we properly
* check them before?!? */
@@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb)
{
int err;
+ err = logfs_open_segfile(sb);
+ if (err)
+ return err;
+
/* Repair any broken superblock copies */
err = logfs_recover_sb(sb);
if (err)
@@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb)
if (err)
return err;
- err = logfs_open_segfile(sb);
- if (err)
- return err;
-
/* Do one GC pass before any data gets dirtied */
logfs_gc_pass(sb);
@@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
sb->s_root = d_alloc_root(rootdir);
if (!sb->s_root)
- goto fail;
+ goto fail2;
super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
if (!super->s_erase_page)
@@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
return 0;
err1:
- up_write(&sb->s_umount);
- deactivate_super(sb);
+ deactivate_locked_super(sb);
return err;
err0:
kfree(super);
diff --git a/fs/namei.c b/fs/namei.c
index 1c0fca6e899..a7dce91a7e4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1610,8 +1610,7 @@ exit:
static struct file *do_last(struct nameidata *nd, struct path *path,
int open_flag, int acc_mode,
- int mode, const char *pathname,
- int *want_dir)
+ int mode, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
struct file *filp;
@@ -1642,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (nd->last.name[nd->last.len]) {
if (open_flag & O_CREAT)
goto exit;
- *want_dir = 1;
+ nd->flags |= LOOKUP_DIRECTORY;
}
/* just plain open? */
@@ -1656,8 +1655,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (path->dentry->d_inode->i_op->follow_link)
return NULL;
error = -ENOTDIR;
- if (*want_dir && !path->dentry->d_inode->i_op->lookup)
- goto exit_dput;
+ if (nd->flags & LOOKUP_DIRECTORY) {
+ if (!path->dentry->d_inode->i_op->lookup)
+ goto exit_dput;
+ }
path_to_nameidata(path, nd);
audit_inode(pathname, nd->path.dentry);
goto ok;
@@ -1766,7 +1767,6 @@ struct file *do_filp_open(int dfd, const char *pathname,
int count = 0;
int flag = open_to_namei_flags(open_flag);
int force_reval = 0;
- int want_dir = open_flag & O_DIRECTORY;
if (!(open_flag & O_CREAT))
mode = 0;
@@ -1828,7 +1828,9 @@ reval:
if (open_flag & O_EXCL)
nd.flags |= LOOKUP_EXCL;
}
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+ if (open_flag & O_DIRECTORY)
+ nd.flags |= LOOKUP_DIRECTORY;
+ filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path holder;
struct inode *inode = path.dentry->d_inode;
@@ -1866,7 +1868,7 @@ reval:
}
holder = path;
nd.flags &= ~LOOKUP_PARENT;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+ filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
if (inode->i_op->put_link)
inode->i_op->put_link(holder.dentry, &nd, cookie);
path_put(&holder);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 636eaafd6ea..6129a431aa3 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -323,14 +323,14 @@ int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs)
int nilfs_wait_on_logs(struct list_head *logs)
{
struct nilfs_segment_buffer *segbuf;
- int err;
+ int err, ret = 0;
list_for_each_entry(segbuf, logs, sb_list) {
err = nilfs_segbuf_wait(segbuf);
- if (err)
- return err;
+ if (err && !ret)
+ ret = err;
}
- return 0;
+ return ret;
}
/*
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 69576a95e13..c161d89061b 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1510,6 +1510,12 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
break;
+ nilfs_clear_logs(&sci->sc_segbufs);
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
@@ -1517,12 +1523,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
NULL);
WARN_ON(err); /* do not happen */
}
- nilfs_clear_logs(&sci->sc_segbufs);
-
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
@@ -1897,8 +1897,7 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
list_splice_tail_init(&sci->sc_write_logs, &logs);
ret = nilfs_wait_on_logs(&logs);
- if (ret)
- nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret);
+ nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0501974bedd..8ccf0f8c9cc 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -30,6 +30,8 @@
#include "alloc.h"
#include "dlmglue.h"
#include "file.h"
+#include "inode.h"
+#include "journal.h"
#include "ocfs2_fs.h"
#include "xattr.h"
@@ -166,6 +168,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
}
/*
+ * Helper function to set i_mode in memory and disk. Some call paths
+ * will not have di_bh or a journal handle to pass, in which case it
+ * will create it's own.
+ */
+static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
+ handle_t *handle, umode_t new_mode)
+{
+ int ret, commit_handle = 0;
+ struct ocfs2_dinode *di;
+
+ if (di_bh == NULL) {
+ ret = ocfs2_read_inode_block(inode, &di_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ } else
+ get_bh(di_bh);
+
+ if (handle == NULL) {
+ handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
+ OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out_brelse;
+ }
+
+ commit_handle = 1;
+ }
+
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ inode->i_mode = new_mode;
+ di->i_mode = cpu_to_le16(inode->i_mode);
+
+ ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+ if (commit_handle)
+ ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out_brelse:
+ brelse(di_bh);
+out:
+ return ret;
+}
+
+/*
* Set the access or default ACL of an inode.
*/
static int ocfs2_set_acl(handle_t *handle,
@@ -193,9 +249,14 @@ static int ocfs2_set_acl(handle_t *handle,
if (ret < 0)
return ret;
else {
- inode->i_mode = mode;
if (ret == 0)
acl = NULL;
+
+ ret = ocfs2_acl_set_mode(inode, di_bh,
+ handle, mode);
+ if (ret)
+ return ret;
+
}
}
break;
@@ -283,6 +344,7 @@ int ocfs2_init_acl(handle_t *handle,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl = NULL;
int ret = 0;
+ mode_t mode;
if (!S_ISLNK(inode->i_mode)) {
if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
@@ -291,12 +353,17 @@ int ocfs2_init_acl(handle_t *handle,
if (IS_ERR(acl))
return PTR_ERR(acl);
}
- if (!acl)
- inode->i_mode &= ~current_umask();
+ if (!acl) {
+ mode = inode->i_mode & ~current_umask();
+ ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
+ }
}
if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
struct posix_acl *clone;
- mode_t mode;
if (S_ISDIR(inode->i_mode)) {
ret = ocfs2_set_acl(handle, inode, di_bh,
@@ -313,7 +380,7 @@ int ocfs2_init_acl(handle_t *handle,
mode = inode->i_mode;
ret = posix_acl_create_masq(clone, &mode);
if (ret >= 0) {
- inode->i_mode = mode;
+ ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
if (ret > 0) {
ret = ocfs2_set_acl(handle, inode,
di_bh, ACL_TYPE_ACCESS,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a659606dcb9..9289b4357d2 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
ok:
spin_unlock(&res->spinlock);
}
- spin_unlock(&dlm->spinlock);
// mlog(0, "woo! got an assert_master from node %u!\n",
// assert->node_idx);
@@ -1926,7 +1925,6 @@ ok:
/* master is known, detach if not already detached.
* ensures that only one assert_master call will happen
* on this mle. */
- spin_lock(&dlm->spinlock);
spin_lock(&dlm->master_lock);
rr = atomic_read(&mle->mle_refs.refcount);
@@ -1959,7 +1957,6 @@ ok:
__dlm_put_mle(mle);
}
spin_unlock(&dlm->master_lock);
- spin_unlock(&dlm->spinlock);
} else if (res) {
if (res->owner != assert->node_idx) {
mlog(0, "assert_master from %u, but current "
@@ -1967,6 +1964,7 @@ ok:
res->owner, namelen, name);
}
}
+ spin_unlock(&dlm->spinlock);
done:
ret = 0;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 278a223aae1..ab207901d32 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -891,6 +891,21 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
/* Do some basic inode verification... */
di = (struct ocfs2_dinode *) di_bh->b_data;
if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+ /*
+ * Inodes in the orphan dir must have ORPHANED_FL. The only
+ * inodes that come back out of the orphan dir are reflink
+ * targets. A reflink target may be moved out of the orphan
+ * dir between the time we scan the directory and the time we
+ * process it. This would lead to HAS_REFCOUNT_FL being set but
+ * ORPHANED_FL not.
+ */
+ if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
+ mlog(0, "Reflinked inode %llu is no longer orphaned. "
+ "it shouldn't be deleted\n",
+ (unsigned long long)oi->ip_blkno);
+ goto bail;
+ }
+
/* for lack of a better error? */
status = -EEXIST;
mlog(ML_ERROR,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ca992d91f51..c983715d8d8 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
(unsigned long long)la_start_blk,
(unsigned long long)blkno);
- status = ocfs2_free_clusters(handle, main_bm_inode,
- main_bm_bh, blkno, count);
+ status = ocfs2_release_clusters(handle,
+ main_bm_inode,
+ main_bm_bh, blkno,
+ count);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
}
retry_enospc:
- (*ac)->ac_bits_wanted = osb->local_alloc_bits;
-
+ (*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
if (status == -ENOSPC) {
if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
@@ -1061,6 +1062,7 @@ retry_enospc:
OCFS2_LA_DISABLED)
goto bail;
+ ac->ac_bits_wanted = osb->local_alloc_default_bits;
status = ocfs2_claim_clusters(osb, handle, ac,
osb->local_alloc_bits,
&cluster_off,
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 544ac624517..b5cb3ede940 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if (__mandatory_lock(inode))
+ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d9cd4e373a5..b1eb50ae409 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
- struct ocfs2_dinode *fe,
+ struct buffer_head *fe_bh,
char *name,
struct ocfs2_dir_lookup_result *lookup,
struct inode *orphan_dir_inode);
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir,
fe = (struct ocfs2_dinode *) fe_bh->b_data;
if (inode_is_unlinkable(inode)) {
- status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
+ status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name,
&orphan_insert, orphan_dir);
if (status < 0) {
mlog_errno(status);
@@ -1300,7 +1300,7 @@ static int ocfs2_rename(struct inode *old_dir,
if (S_ISDIR(new_inode->i_mode) ||
(ocfs2_read_links_count(newfe) == 1)) {
status = ocfs2_orphan_add(osb, handle, new_inode,
- newfe, orphan_name,
+ newfe_bh, orphan_name,
&orphan_insert, orphan_dir);
if (status < 0) {
mlog_errno(status);
@@ -1911,7 +1911,7 @@ leave:
static int ocfs2_orphan_add(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
- struct ocfs2_dinode *fe,
+ struct buffer_head *fe_bh,
char *name,
struct ocfs2_dir_lookup_result *lookup,
struct inode *orphan_dir_inode)
@@ -1919,6 +1919,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
struct buffer_head *orphan_dir_bh = NULL;
int status = 0;
struct ocfs2_dinode *orphan_fe;
+ struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
@@ -1959,6 +1960,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
goto leave;
}
+ /*
+ * We're going to journal the change of i_flags and i_orphaned_slot.
+ * It's safe anyway, though some callers may duplicate the journaling.
+ * Journaling within the func just make the logic look more
+ * straightforward.
+ */
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(inode),
+ fe_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
/* Record which orphan dir our inode now resides
@@ -1966,6 +1982,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
* dir to lock. */
fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
+ ocfs2_journal_dirty(handle, fe_bh);
+
mlog(0, "Inode %llu orphaned in slot %d\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
@@ -2123,7 +2141,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
}
di = (struct ocfs2_dinode *)new_di_bh->b_data;
- status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
+ status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
&orphan_insert, orphan_dir);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1238b491db9..adf5e2ebc2c 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
}
-#define ocfs2_set_bit ext2_set_bit
-#define ocfs2_clear_bit ext2_clear_bit
+static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
+{
+ ext2_set_bit(bit, bitmap);
+}
+#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
+
+static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
+{
+ ext2_clear_bit(bit, bitmap);
+}
+#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
+
#define ocfs2_test_bit ext2_test_bit
#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
#define ocfs2_find_next_bit ext2_find_next_bit
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9e96921dffd..29405f2ff61 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4075,6 +4075,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
spin_unlock(&OCFS2_I(t_inode)->ip_lock);
i_size_write(t_inode, size);
+ t_inode->i_blocks = s_inode->i_blocks;
di->i_xattr_inline_size = s_di->i_xattr_inline_size;
di->i_clusters = s_di->i_clusters;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c3c60bc3e07..19ba00f2854 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits);
-static inline int ocfs2_block_group_clear_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits);
-
static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
@@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
#define do_error(fmt, ...) \
do{ \
- if (clean_error) \
+ if (resize) \
mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
else \
ocfs2_error(sb, fmt, ##__VA_ARGS__); \
@@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
static int ocfs2_validate_gd_self(struct super_block *sb,
struct buffer_head *bh,
- int clean_error)
+ int resize)
{
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
@@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb,
static int ocfs2_validate_gd_parent(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh,
- int clean_error)
+ int resize)
{
unsigned int max_bits;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
@@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb,
return -EINVAL;
}
- if (le16_to_cpu(gd->bg_chain) >=
- le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+ /* In resize, we may meet the case bg_chain == cl_next_free_rec. */
+ if ((le16_to_cpu(gd->bg_chain) >
+ le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
+ ((le16_to_cpu(gd->bg_chain) ==
+ le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
do_error("Group descriptor #%llu has bad chain %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_chain));
@@ -1975,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
bits_wanted, cluster_start, num_clusters);
}
-static inline int ocfs2_block_group_clear_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits)
+static int ocfs2_block_group_clear_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct ocfs2_group_desc *bg,
+ struct buffer_head *group_bh,
+ unsigned int bit_off,
+ unsigned int num_bits,
+ void (*undo_fn)(unsigned int bit,
+ unsigned long *bmap))
{
int status;
unsigned int tmp;
- int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
struct ocfs2_group_desc *undo_bg = NULL;
- int cluster_bitmap = 0;
mlog_entry_void();
@@ -1996,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
- if (ocfs2_is_cluster_bitmap(alloc_inode))
- journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
+ BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
- group_bh, journal_type);
+ group_bh,
+ undo_fn ?
+ OCFS2_JOURNAL_ACCESS_UNDO :
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto bail;
}
- if (ocfs2_is_cluster_bitmap(alloc_inode))
- cluster_bitmap = 1;
-
- if (cluster_bitmap) {
+ if (undo_fn) {
jbd_lock_bh_state(group_bh);
undo_bg = (struct ocfs2_group_desc *)
bh2jh(group_bh)->b_committed_data;
@@ -2020,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
while(tmp--) {
ocfs2_clear_bit((bit_off + tmp),
(unsigned long *) bg->bg_bitmap);
- if (cluster_bitmap)
- ocfs2_set_bit(bit_off + tmp,
- (unsigned long *) undo_bg->bg_bitmap);
+ if (undo_fn)
+ undo_fn(bit_off + tmp,
+ (unsigned long *) undo_bg->bg_bitmap);
}
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
- if (cluster_bitmap)
+ if (undo_fn)
jbd_unlock_bh_state(group_bh);
status = ocfs2_journal_dirty(handle, group_bh);
@@ -2039,12 +2033,14 @@ bail:
/*
* expects the suballoc inode to already be locked.
*/
-int ocfs2_free_suballoc_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct buffer_head *alloc_bh,
- unsigned int start_bit,
- u64 bg_blkno,
- unsigned int count)
+static int _ocfs2_free_suballoc_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+ u64 bg_blkno,
+ unsigned int count,
+ void (*undo_fn)(unsigned int bit,
+ unsigned long *bitmap))
{
int status = 0;
u32 tmp_used;
@@ -2079,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
group, group_bh,
- start_bit, count);
+ start_bit, count, undo_fn);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2110,6 +2106,17 @@ bail:
return status;
}
+int ocfs2_free_suballoc_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+ u64 bg_blkno,
+ unsigned int count)
+{
+ return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
+ start_bit, bg_blkno, count, NULL);
+}
+
int ocfs2_free_dinode(handle_t *handle,
struct inode *inode_alloc_inode,
struct buffer_head *inode_alloc_bh,
@@ -2123,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle,
inode_alloc_bh, bit, bg_blkno, 1);
}
-int ocfs2_free_clusters(handle_t *handle,
- struct inode *bitmap_inode,
- struct buffer_head *bitmap_bh,
- u64 start_blk,
- unsigned int num_clusters)
+static int _ocfs2_free_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters,
+ void (*undo_fn)(unsigned int bit,
+ unsigned long *bitmap))
{
int status;
u16 bg_start_bit;
@@ -2154,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle,
mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
(unsigned long long)bg_blkno, bg_start_bit);
- status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
- bg_start_bit, bg_blkno,
- num_clusters);
+ status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
+ bg_start_bit, bg_blkno,
+ num_clusters, undo_fn);
if (status < 0) {
mlog_errno(status);
goto out;
@@ -2170,6 +2179,32 @@ out:
return status;
}
+int ocfs2_free_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters)
+{
+ return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
+ start_blk, num_clusters,
+ _ocfs2_set_bit);
+}
+
+/*
+ * Give never-used clusters back to the global bitmap. We don't need
+ * to protect these bits in the undo buffer.
+ */
+int ocfs2_release_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters)
+{
+ return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
+ start_blk, num_clusters,
+ _ocfs2_clear_bit);
+}
+
static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
{
printk("Block Group:\n");
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index fa60723c43e..e0f46df357e 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle,
struct buffer_head *bitmap_bh,
u64 start_blk,
unsigned int num_clusters);
+int ocfs2_release_clusters(handle_t *handle,
+ struct inode *bitmap_inode,
+ struct buffer_head *bitmap_bh,
+ u64 start_blk,
+ unsigned int num_clusters);
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
{
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1b0d386f6d..3e7773089b9 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
/* Now tell xh->xh_entries about it */
for (i = 0; i < count; i++) {
offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
- if (offset < namevalue_offset)
+ if (offset <= namevalue_offset)
le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
namevalue_size);
}
@@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
int indexed)
{
int ret;
- struct ocfs2_alloc_context *meta_ac;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_xattr_set_ctxt ctxt = {
- .meta_ac = meta_ac,
- };
+ struct ocfs2_xattr_set_ctxt ctxt;
- ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+ memset(&ctxt, 0, sizeof(ctxt));
+ ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
if (ret < 0) {
mlog_errno(ret);
return ret;
@@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
ocfs2_commit_trans(osb, ctxt.handle);
out:
- ocfs2_free_alloc_context(meta_ac);
+ ocfs2_free_alloc_context(ctxt.meta_ac);
return ret;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a7310841c83..b1f6e62773d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -442,12 +442,13 @@ static const struct file_operations proc_lstats_operations = {
unsigned long badness(struct task_struct *p, unsigned long uptime);
static int proc_oom_score(struct task_struct *task, char *buffer)
{
- unsigned long points;
+ unsigned long points = 0;
struct timespec uptime;
do_posix_clock_monotonic_gettime(&uptime);
read_lock(&tasklist_lock);
- points = badness(task->group_leader, uptime.tv_sec);
+ if (pid_alive(task))
+ points = badness(task, uptime.tv_sec);
read_unlock(&tasklist_lock);
return sprintf(buffer, "%lu\n", points);
}